This test is failing from time to time. Making sure the parent thread has terminated before continuing triggers the underlying race condition, and makes the test to always fail.
Signed-off-by: Rémi Bernon rbernon@codeweavers.com ---
Notes: v5: Fix some warnings, reword the commit message a bit.
The failure was hard to reproduce. The most reliable way without the sleep added here is to start multiple runs on the testbot, changing locale for each run. The locale change seems to have an impact on the timing and triggers the race condition.
dlls/user32/tests/msg.c | 61 +++++++++++++++++++++++++---------------- 1 file changed, 37 insertions(+), 24 deletions(-)
diff --git a/dlls/user32/tests/msg.c b/dlls/user32/tests/msg.c index b473a128c8d..43a6372aa8d 100644 --- a/dlls/user32/tests/msg.c +++ b/dlls/user32/tests/msg.c @@ -8556,6 +8556,8 @@ static void test_paint_messages(void) struct wnd_event { HWND hwnd; + HWND child_hwnd; + HANDLE child; HANDLE grand_child; HANDLE start_event; HANDLE stop_event; @@ -8588,44 +8590,50 @@ static DWORD CALLBACK create_grand_child_thread( void *param ) struct wnd_event *wnd_event = param; HWND hchild; MSG msg; + DWORD ret;
hchild = CreateWindowExA(0, "TestWindowClass", "Test child", - WS_CHILD | WS_VISIBLE, 0, 0, 10, 10, wnd_event->hwnd, 0, 0, NULL); + WS_CHILD | WS_VISIBLE, 0, 0, 10, 10, wnd_event->child_hwnd, 0, 0, NULL); ok (hchild != 0, "Failed to create child window\n"); flush_events(); flush_sequence(); SetEvent( wnd_event->start_event );
- for (;;) - { - MsgWaitForMultipleObjects(0, NULL, FALSE, 1000, QS_ALLINPUT); - if (!IsWindow( hchild )) break; /* will be destroyed when parent thread exits */ - while (PeekMessageA(&msg, 0, 0, 0, PM_REMOVE)) DispatchMessageA(&msg); - } + ret = WaitForSingleObject( wnd_event->child, 5000 ); + ok( !ret, "WaitForSingleObject returned %x, error: %u\n", ret, GetLastError() ); + + while (PeekMessageA(&msg, 0, 0, 0, PM_REMOVE)) DispatchMessageA(&msg); + todo_wine + ok( !IsWindow( hchild ), "Child window not destroyed\n" ); + return 0; }
static DWORD CALLBACK create_child_thread( void *param ) { struct wnd_event *wnd_event = param; - struct wnd_event child_event; DWORD ret, tid; MSG msg;
- child_event.hwnd = CreateWindowExA(0, "TestWindowClass", "Test child", - WS_CHILD | WS_VISIBLE, 0, 0, 10, 10, wnd_event->hwnd, 0, 0, NULL); - ok (child_event.hwnd != 0, "Failed to create child window\n"); - SetFocus( child_event.hwnd ); + wnd_event->child_hwnd = CreateWindowExA( 0, "TestWindowClass", "Test child", + WS_CHILD | WS_VISIBLE, 0, 0, 10, 10, + wnd_event->hwnd, 0, 0, NULL ); + ok (wnd_event->child_hwnd != 0, "Failed to create child windows\n"); + SetFocus( wnd_event->child_hwnd ); + + wnd_event->grand_child = CreateThread( NULL, 0, create_grand_child_thread, wnd_event, 0, &tid ); + ok( wnd_event->grand_child != 0, "CreateThread failed, error %u\n", GetLastError() ); + flush_events(); flush_sequence(); - child_event.start_event = wnd_event->start_event; - wnd_event->grand_child = CreateThread(NULL, 0, create_grand_child_thread, &child_event, 0, &tid); for (;;) { - DWORD ret = MsgWaitForMultipleObjects(1, &child_event.start_event, FALSE, 1000, QS_SENDMESSAGE); + ret = MsgWaitForMultipleObjects(1, &wnd_event->start_event, FALSE, 5000, QS_SENDMESSAGE); if (ret != 1) break; while (PeekMessageA(&msg, 0, 0, 0, PM_REMOVE)) DispatchMessageA(&msg); } + ok( !ret, "MsgWaitForMultipleObjects failed %x, error: %u\n", ret, GetLastError() ); + ret = WaitForSingleObject( wnd_event->stop_event, 5000 ); ok( !ret, "WaitForSingleObject failed %x\n", ret ); return 0; @@ -8759,33 +8767,38 @@ static void test_interthread_messages(void) wnd_event.hwnd = CreateWindowExA(0, "TestParentClass", "Test parent", WS_OVERLAPPEDWINDOW | WS_VISIBLE, 100, 100, 200, 200, 0, 0, 0, NULL); ok (wnd_event.hwnd != 0, "Failed to create parent window\n"); + + wnd_event.start_event = CreateEventA( NULL, TRUE, FALSE, NULL ); + wnd_event.stop_event = CreateEventA( NULL, TRUE, FALSE, NULL ); + + wnd_event.child = CreateThread( NULL, 0, create_child_thread, &wnd_event, 0, &tid ); + ok( wnd_event.child != 0, "CreateThread failed, error %u\n", GetLastError() ); + flush_events(); flush_sequence(); log_all_parent_messages++; - wnd_event.start_event = CreateEventA( NULL, TRUE, FALSE, NULL ); - wnd_event.stop_event = CreateEventA( NULL, TRUE, FALSE, NULL ); - hThread = CreateThread( NULL, 0, create_child_thread, &wnd_event, 0, &tid ); for (;;) { - ret = MsgWaitForMultipleObjects(1, &wnd_event.start_event, FALSE, 1000, QS_SENDMESSAGE); + ret = MsgWaitForMultipleObjects(1, &wnd_event.start_event, FALSE, 5000, QS_SENDMESSAGE); if (ret != 1) break; while (PeekMessageA(&msg, 0, 0, 0, PM_REMOVE)) DispatchMessageA(&msg); } ok( !ret, "MsgWaitForMultipleObjects failed %x\n", ret ); /* now wait for the thread without processing messages; this shouldn't deadlock */ SetEvent( wnd_event.stop_event ); - ret = WaitForSingleObject( hThread, 5000 ); + + ret = WaitForSingleObject( wnd_event.child, 5000 ); ok( !ret, "WaitForSingleObject failed %x\n", ret ); - CloseHandle( hThread );
ret = WaitForSingleObject( wnd_event.grand_child, 5000 ); - ok( !ret, "WaitForSingleObject failed %x\n", ret ); - CloseHandle( wnd_event.grand_child ); + ok( !ret, "WaitForSingleObject failed %x, error: %u\n", ret, GetLastError() );
+ CloseHandle( wnd_event.child ); + CloseHandle( wnd_event.grand_child ); CloseHandle( wnd_event.start_event ); CloseHandle( wnd_event.stop_event ); flush_events(); - ok_sequence(WmExitThreadSeq, "destroy child on thread exit", FALSE); + ok_sequence(WmExitThreadSeq, "destroy child on thread exit", TRUE); log_all_parent_messages--; DestroyWindow( wnd_event.hwnd );
On thread destroy, a WM_WINE_DESTROYWINDOW is sent for the child windows living in other threads. However there's then a race condition between these threads peeking for messages and the current thread detaching its child windows from their threads -which clears their message queues.
Signed-off-by: Rémi Bernon rbernon@codeweavers.com --- dlls/user32/tests/msg.c | 3 +-- server/window.c | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-)
diff --git a/dlls/user32/tests/msg.c b/dlls/user32/tests/msg.c index 43a6372aa8d..885d477f5e9 100644 --- a/dlls/user32/tests/msg.c +++ b/dlls/user32/tests/msg.c @@ -8603,7 +8603,6 @@ static DWORD CALLBACK create_grand_child_thread( void *param ) ok( !ret, "WaitForSingleObject returned %x, error: %u\n", ret, GetLastError() );
while (PeekMessageA(&msg, 0, 0, 0, PM_REMOVE)) DispatchMessageA(&msg); - todo_wine ok( !IsWindow( hchild ), "Child window not destroyed\n" );
return 0; @@ -8798,7 +8797,7 @@ static void test_interthread_messages(void) CloseHandle( wnd_event.start_event ); CloseHandle( wnd_event.stop_event ); flush_events(); - ok_sequence(WmExitThreadSeq, "destroy child on thread exit", TRUE); + ok_sequence(WmExitThreadSeq, "destroy child on thread exit", FALSE); log_all_parent_messages--; DestroyWindow( wnd_event.hwnd );
diff --git a/server/window.c b/server/window.c index c9b131cba5d..3985ea4eb20 100644 --- a/server/window.c +++ b/server/window.c @@ -1914,7 +1914,7 @@ void destroy_window( struct window *win ) post_message( win->parent->handle, WM_PARENTNOTIFY, WM_DESTROY, win->handle ); }
- detach_window_thread( win ); + if (win->thread == current) detach_window_thread( win ); if (win->win_region) free_region( win->win_region ); if (win->update_region) free_region( win->update_region ); if (win->class) release_class( win->class );
Hi,
While running your changed tests, I think I found new failures. Being a bot and all I'm not very good at pattern recognition, so I might be wrong, but could you please double-check?
Full results can be found at: https://testbot.winehq.org/JobDetails.pl?Key=59970
Your paranoid android.
=== wvistau64 (32 bit report) ===
user32: msg.c:16288: Test failed: 0: WaitForSingleObject failed
=== w1064v1809 (32 bit report) ===
user32: msg.c:5128: Test failed: ShowWindow(SW_HIDE):overlapped: 13: the msg sequence is not complete: expected 0000 - actual 0024
=== w1064v1809_he (32 bit report) ===
user32: msg.c:17779: Test failed: Restore minimized window: 41: the msg sequence is not complete: expected 0000 - actual 0007
On 11/13/19 11:31 AM, Rémi Bernon wrote:
- detach_window_thread( win );
- if (win->thread == current) detach_window_thread( win ); if (win->win_region) free_region( win->win_region ); if (win->update_region) free_region( win->update_region ); if (win->class) release_class( win->class );
Actually this causes some leaks that are reported when server is shutting down, sorry for missing it. This seems to be the case when the thread the child window lives in has terminated already, in which case the parent window should do the release.
On Wed, 13 Nov 2019, Rémi Bernon wrote: [...]
v5: Fix some warnings, reword the commit message a bit. The failure was hard to reproduce. The most reliable way without the sleep added here is to start multiple runs on the testbot, changing locale for each run. The locale change seems to have an impact on the timing and triggers the race condition.
I don't know if this is going to help but I'll try to shed some light on this.
The TestBot prepares the wineprefixes for all its missions in advance, at the end of the 'Update Wine VM' task (that takes 10 to 20 seconds per wineprefix). For the debian10 VM that's 6 wineprefixes: win32, English win32, French win32, Japanese win32, Chinese wow32, English wow64, English
The 'submit job' page on the public TestBot does not allow specifying any other locale so the timing difference does not come from using a pre-initialized wineprefix versus using a fresh one.
But when QEmu runs a VM, the parts of the qcow2 file needed by the guest end up in the host's disk cache and that will include the wineprefix accessed by the test. So maybe not all wineprefixes can fit in the disk cache, causing the guest to have to hit the disk after a locale switch.
It's strange that this results in an actual timing difference for the test: I'd expect most of the difference to happen when starting up Wine and loading the registry, so mostly before the test gets into main(). But all the dlls are duplicated (not hard/soft linked) in each wineprefix nowadays so maybe that's where the timing difference comes from for the test.
Hi,
While running your changed tests, I think I found new failures. Being a bot and all I'm not very good at pattern recognition, so I might be wrong, but could you please double-check?
Full results can be found at: https://testbot.winehq.org/JobDetails.pl?Key=59969
Your paranoid android.
=== wvistau64 (32 bit report) ===
user32: msg.c:16289: Test failed: 0: WaitForSingleObject failed
=== w1064v1809 (32 bit report) ===
user32: msg.c:10290: Test failed: did not get expected count for minimum timeout (53 != ~100). msg.c:10360: Test failed: did not get expected count for minimum timeout (54 != ~100).
=== w1064v1809_ar (32 bit report) ===
user32: msg.c:15570: Test failed: DefWindowProcA(SC_RESTORE):overlapped: 33: the msg 0x000f was expected, but got msg 0x0024 instead