When a task fails due to a transient error it is re-run and the fact that a transient error happened cannot be recovered from the final Task object. Similarly the final Task object does not clearly identify timeouts. Furthermore all one can recover from the VM status history is that a Task was run more than once but not the reason why. So save abnormal task termination information in the history record.
Signed-off-by: Francois Gouget fgouget@codeweavers.com --- testbot/bin/WineRunBuild.pl | 20 ++++++++++++++++---- testbot/bin/WineRunReconfig.pl | 20 ++++++++++++++++---- testbot/bin/WineRunTask.pl | 19 ++++++++++++++++--- testbot/lib/WineTestBot/VMs.pm | 29 +++++++++++++++++++++++++++++ 4 files changed, 77 insertions(+), 11 deletions(-)
diff --git a/testbot/bin/WineRunBuild.pl b/testbot/bin/WineRunBuild.pl index b91d594b..e4295c8e 100755 --- a/testbot/bin/WineRunBuild.pl +++ b/testbot/bin/WineRunBuild.pl @@ -45,6 +45,7 @@ use WineTestBot::Config; use WineTestBot::Jobs; use WineTestBot::VMs; use WineTestBot::Log; +use WineTestBot::RecordGroups; use WineTestBot::Engine::Notify;
@@ -192,10 +193,13 @@ sub LogTaskError($) umask($OldUMask); }
-sub WrapUpAndExit($;$) +sub WrapUpAndExit($;$$) { - my ($Status, $Retry) = @_; + my ($Status, $Retry, $Timeout) = @_; my $NewVMStatus = $Status eq 'queued' ? 'offline' : 'dirty'; + my $VMResult = $Status eq "boterror" ? "boterror" : + $Status eq "queued" ? "error" : + $Timeout ? "timeout" : "";
my $TestFailures; my $Tries = $Task->TestFailures || 0; @@ -219,6 +223,13 @@ sub WrapUpAndExit($;$) LogTaskError("The previous $Tries run(s) terminated abnormally\n"); }
+ # Record result details that may be lost or overwritten by a later run + if ($VMResult) + { + $VMResult .= " $Tries $MaxTaskTries" if ($Retry); + $VM->RecordResult(undef, $VMResult); + } + # Update the Task and Job $Task->Status($Status); $Task->TestFailures($TestFailures); @@ -373,7 +384,7 @@ if (!$Pid) # log before giving up #
-my ($NewStatus, $ErrMessage, $TAError); +my ($NewStatus, $ErrMessage, $TAError, $TaskTimedOut); Debug(Elapsed($Start), " Waiting for the script (", $Task->Timeout, "s timeout)\n"); if (!defined $TA->Wait($Pid, $Task->Timeout, 60)) { @@ -382,6 +393,7 @@ if (!defined $TA->Wait($Pid, $Task->Timeout, 60)) { $ErrMessage = "The build timed out\n"; $NewStatus = "badbuild"; + $TaskTimedOut = 1; } else { @@ -480,4 +492,4 @@ $TA->Disconnect(); # Wrap up #
-WrapUpAndExit($NewStatus); +WrapUpAndExit($NewStatus, undef, $TaskTimedOut); diff --git a/testbot/bin/WineRunReconfig.pl b/testbot/bin/WineRunReconfig.pl index ff99fb7c..4eefa171 100755 --- a/testbot/bin/WineRunReconfig.pl +++ b/testbot/bin/WineRunReconfig.pl @@ -45,6 +45,7 @@ use WineTestBot::Config; use WineTestBot::Jobs; use WineTestBot::VMs; use WineTestBot::Log; +use WineTestBot::RecordGroups; use WineTestBot::Engine::Notify;
@@ -192,11 +193,14 @@ sub LogTaskError($) umask($OldUMask); }
-sub WrapUpAndExit($;$) +sub WrapUpAndExit($;$$) { - my ($Status, $Retry) = @_; + my ($Status, $Retry, $Timeout) = @_; my $NewVMStatus = $Status eq 'queued' ? 'offline' : $Status eq 'completed' ? 'idle' : 'dirty'; + my $VMResult = $Status eq "boterror" ? "boterror" : + $Status eq "queued" ? "error" : + $Timeout ? "timeout" : "";
my $TestFailures; my $Tries = $Task->TestFailures || 0; @@ -220,6 +224,13 @@ sub WrapUpAndExit($;$) LogTaskError("The previous $Tries run(s) terminated abnormally\n"); }
+ # Record result details that may be lost or overwritten by a later run + if ($VMResult) + { + $VMResult .= " $Tries $MaxTaskTries" if ($Retry); + $VM->RecordResult(undef, $VMResult); + } + # Update the Task and Job $Task->Status($Status); $Task->TestFailures($TestFailures); @@ -338,7 +349,7 @@ if (!$Pid) # log before giving up #
-my ($NewStatus, $ErrMessage, $TAError); +my ($NewStatus, $ErrMessage, $TAError, $TaskTimedOut); Debug(Elapsed($Start), " Waiting for the script (", $Task->Timeout, "s timeout)\n"); if (!defined $TA->Wait($Pid, $Task->Timeout, 60)) { @@ -347,6 +358,7 @@ if (!defined $TA->Wait($Pid, $Task->Timeout, 60)) { $ErrMessage = "The build timed out\n"; $NewStatus = "badbuild"; + $TaskTimedOut = 1; } else { @@ -470,4 +482,4 @@ if ($NewStatus eq 'completed') # Wrap up #
-WrapUpAndExit($NewStatus); +WrapUpAndExit($NewStatus, undef, $TaskTimedOut); diff --git a/testbot/bin/WineRunTask.pl b/testbot/bin/WineRunTask.pl index 3807a182..65faf725 100755 --- a/testbot/bin/WineRunTask.pl +++ b/testbot/bin/WineRunTask.pl @@ -44,6 +44,7 @@ use WineTestBot::Config; use WineTestBot::Jobs; use WineTestBot::VMs; use WineTestBot::Log; +use WineTestBot::RecordGroups; use WineTestBot::Engine::Notify;
@@ -219,10 +220,13 @@ sub LogTaskError($) umask($OldUMask); }
-sub WrapUpAndExit($;$$) +sub WrapUpAndExit($;$$$) { - my ($Status, $TestFailures, $Retry) = @_; + my ($Status, $TestFailures, $Retry, $Timeout) = @_; my $NewVMStatus = $Status eq 'queued' ? 'offline' : 'dirty'; + my $VMResult = $Status eq "boterror" ? "boterror" : + $Status eq "queued" ? "error" : + $Timeout ? "timeout" : "";
Debug(Elapsed($Start), " Taking a screenshot\n"); TakeScreenshot($VM, $FullScreenshotFileName); @@ -248,6 +252,13 @@ sub WrapUpAndExit($;$$) LogTaskError("The previous $Tries run(s) terminated abnormally\n"); }
+ # Record result details that may be lost or overwritten by a later run + if ($VMResult) + { + $VMResult .= " $Tries $MaxTaskTries" if ($Retry); + $VM->RecordResult(undef, $VMResult); + } + # Update the Task and Job $Task->Status($Status); $Task->TestFailures($TestFailures); @@ -489,6 +500,7 @@ if (!defined $TA->Wait($Pid, $Timeout, $Keepalive)) } }
+my $TimedOut; Debug(Elapsed($Start), " Retrieving the report file to '$FullLogFileName'\n"); if ($TA->GetFile($RptFileName, $FullLogFileName)) { @@ -712,6 +724,7 @@ if ($TA->GetFile($RptFileName, $FullLogFileName)) # so record the failure but don't add an error message. $LogFailures++; $CurrentIsBroken = 1; + $TimedOut = ($Step->Type ne "suite"); } elsif ((!$Pid and !%CurrentPids) or ($Pid and !$CurrentPids{$Pid} and !$CurrentPids{0})) @@ -767,4 +780,4 @@ FatalTAError(undef, $TAError, $PossibleCrash) if (defined $TAError); # Wrap up #
-WrapUpAndExit($NewStatus, $TaskFailures); +WrapUpAndExit($NewStatus, $TaskFailures, undef, $TaskTimedOut || $TimedOut); diff --git a/testbot/lib/WineTestBot/VMs.pm b/testbot/lib/WineTestBot/VMs.pm index 961be52f..2ba6b4b5 100644 --- a/testbot/lib/WineTestBot/VMs.pm +++ b/testbot/lib/WineTestBot/VMs.pm @@ -566,6 +566,35 @@ sub RecordStatus($$;$) } }
+=pod +=over 12 + +=item C<RecordStatus()> + +Adds a Record of the specified VM process result. + +Also resets the last known VM status so a new one will be recorded even if +it matches the current one, for instance if a new revert is started after the +first one failed. + +=back +=cut + +sub RecordResult($$$) +{ + my ($self, $Records, $Result) = @_; + + if ($Records) + { + $Records->AddRecord("vmresult", $self->GetRecordName(), $Result); + } + else + { + SaveRecord("vmresult", $self->GetRecordName(), $Result); + } + delete $_VMStatuses{$self->Name}; +} +
package WineTestBot::VMs;
This helps identify transient errors, TestBot errors, or why a task is being re-run.
Signed-off-by: Francois Gouget fgouget@codeweavers.com --- testbot/lib/WineTestBot/Activity.pm | 57 ++++++++++++++++++++++++++++++++++++- testbot/web/Activity.pl | 41 ++++++++++++++++++++++---- testbot/web/WineTestBot.css | 4 +++ 3 files changed, 95 insertions(+), 7 deletions(-)
diff --git a/testbot/lib/WineTestBot/Activity.pm b/testbot/lib/WineTestBot/Activity.pm index 31930ed2..d66109c0 100644 --- a/testbot/lib/WineTestBot/Activity.pm +++ b/testbot/lib/WineTestBot/Activity.pm @@ -66,6 +66,18 @@ describing the TestBot activity. The structure is as follows: ... }, }, + resultvms => { + <VMName1> => { + vm => <VMObject>, + result => <VMResult>, + tries => <Tries>, + maxtries => <MaxTries>, + details => <ResultDetails>, + }, + <VMName2> => { + ... + }, + }, }, <GroupNo2> => { ... @@ -134,11 +146,24 @@ sub GetActivity($) $VMStatus->{status} = $Status; $VMStatus->{rows} = 1;
+ $VMStatus->{result} = ""; if ($Status eq "running") { $VMStatus->{job} = $Jobs->GetItem($Extra[0]); $VMStatus->{step} = $VMStatus->{job}->Steps->GetItem($Extra[1]) if ($VMStatus->{job}); $VMStatus->{task} = $VMStatus->{step}->Tasks->GetItem($Extra[2]) if ($VMStatus->{step}); + if ($VMStatus->{task}) + { + if ($VMStatus->{task}->Status =~ /^(?:badpatch|badbuild|boterror)$/) + { + $VMStatus->{result} = $VMStatus->{task}->Status; + } + elsif ($VMStatus->{task}->Status eq "completed" and + $VMStatus->{task}->TestFailures) + { + $VMStatus->{result} = "failed"; + } + } } elsif (@Extra) { @@ -148,6 +173,24 @@ sub GetActivity($) $VMStatus->{details} = join(" ", @Extra); } } + elsif ($Record->Type eq "vmresult") + { + my ($RecordName, $RecordHost) = split / /, $Record->Name; + next if (!$VMs->ItemExists($RecordName)); + + my $ResultVMs = ( $Group->{resultvms} ||= {} ); + my $VMResult = ( $ResultVMs->{$RecordName} ||= {} ); + + $VMResult->{host} = $RecordHost; + my ($Result, @Extras) = split / /, $Record->Value; + $VMResult->{result} = $Result; + if (@Extras >= 2 and $Extras[0] =~ /^\d+$/ and $Extras[1] =~ /^\d+$/) + { + $VMResult->{tries} = shift @Extras; + $VMResult->{maxtries} = shift @Extras; + } + $VMResult->{details} = join(" ", @Extras); + } }
### Fill the holes in the table, compute end times, etc. @@ -157,7 +200,8 @@ sub GetActivity($) { my $Group = $Activity->{$RecordGroup->Id}; my $StatusVMs = $Group->{statusvms}; - next if (!$StatusVMs); + my $ResultVMs = $Group->{resultvms}; + next if (!$StatusVMs and !$ResultVMs); if ($LastGroup) { $LastGroup->{end} = $Group->{start}; @@ -175,6 +219,17 @@ sub GetActivity($) { my $LastVMStatus = $LastStatusVMs{$VM->Name} ? $LastStatusVMs{$VM->Name}->{$VM->Name} : undef;
+ my $VMResult = $ResultVMs->{$VM->Name}; + if ($VMResult and $LastVMStatus and $LastVMStatus->{status} ne "engine") + { + # Transfer the result to the relevant status object + $LastVMStatus->{result} = $VMResult->{result}; + $LastVMStatus->{details} = $VMResult->{details}; + $LastVMStatus->{tries} = $VMResult->{tries}; + $LastVMStatus->{maxtries} = $VMResult->{maxtries}; + } + next if (!$StatusVMs); + my $VMStatus = $StatusVMs->{$VM->Name}; if ($VMStatus) { diff --git a/testbot/web/Activity.pl b/testbot/web/Activity.pl index 64a752b5..5a730df4 100644 --- a/testbot/web/Activity.pl +++ b/testbot/web/Activity.pl @@ -160,11 +160,26 @@ EOF my $VMStatus = $Group->{statusvms}->{$VM->Name}; next if ($VMStatus->{merged});
- # Add borders to separate VM hosts and indicate anomalies. + # Add borders to separate VM hosts and indicate various anomalies. print "<td class='Record Record-$VMStatus->{status}"; - my $Host = $VM->GetHost(); - print " Record-left" if ($Col > 0 and $SortedVMs[$Col-1]->GetHost() ne $Host); - print " Record-right" if ($Col+1 < @SortedVMs and $SortedVMs[$Col+1]->GetHost() ne $Host); + if ($VMStatus->{result} eq "timeout") + { + print " Record-timeout"; + } + elsif ($VMStatus->{result} eq "boterror") + { + print " Record-boterror"; + } + elsif ($VMStatus->{result} eq "error") + { + print " Record-error"; + } + else + { + my $Host = $VM->GetHost(); + print " Record-left" if ($Col > 0 and $SortedVMs[$Col-1]->GetHost() ne $Host); + print " Record-right" if ($Col+1 < @SortedVMs and $SortedVMs[$Col+1]->GetHost() ne $Host); + } print " Record-miss" if ($VMStatus->{mispredict}); print "'"; print " rowspan='$VMStatus->{rows}'" if ($VMStatus->{rows} > 1); @@ -215,6 +230,17 @@ EOF $Label = "<span class='RecordHost'>(on $Host)</span><br>$Label"; } print "$Label ", _GetHtmlDuration($VMStatus->{end} - $VMStatus->{start}); + + my $Result = ""; + if ($VMStatus->{status} ne "dirty") + { + $Result = $VMStatus->{result} if ($VMStatus->{result}); + $Result .= " $VMStatus->{tries}/$VMStatus->{maxtries}" if ($VMStatus->{tries}); + $Result .= ": $VMStatus->{details}" if ($VMStatus->{details}); + $Result =~ s/^: //; + } + print "<br><span class='RecordResult'>$Result</span>" if ($Result); + print "</td>\n"; } print "</tr>\n"; @@ -238,8 +264,11 @@ sub GenerateFooter($) print "<span class='Record-running'>running</span> a task (in which case it links to it),<br>\n"; print "<span class='Record-dirty'>dirty</span> while the server is powering off the VM after a task or while it assesses its state on startup.</p>\n";
- print "<p>If no time is indicated then the VM remained in that state for less than 2 seconds. The tasks column indicates the number of runnable / queued tasks before that scheduling round. A long horizontal bar indicates the TestBot server was restarted.</p>\n"; - print "<p>This <span class='Record Record-idle Record-miss'>border</span> indicates that the server threw away the VM's current state without using it.</p>\n"; + print "<p>If no time is indicated then the VM remained in that state for less than 2 seconds. The tasks column indicates the number of runnable / queued tasks before that scheduling round. A long horizontal bar indicates the TestBot server was restarted. </p>\n"; + print "<p>This <span class='Record Record-running Record-timeout'>border</span> indicates that the task timed out,<br>\n"; + print "this <span class='Record Record-running Record-error'>border</span> denotes a transient (network?) error so the task will be re-run,<br>\n"; + print "and this <span class='Record Record-running Record-boterror'>border</span> indicates a TestBot error.<br>\n"; + print "Finally this <span class='Record Record-idle Record-miss'>border</span> indicates that the server threw away the VM's current state without using it.</p>\n";
print "<p>The VM could also be <span class='Record-offline'>offline</span> due to a temporary issue,<br>\n"; print "or until the administrator can look at it for <span class='Record-maintenance'>maintenance</span>,<br>\n"; diff --git a/testbot/web/WineTestBot.css b/testbot/web/WineTestBot.css index 55de9a33..4afbe31c 100644 --- a/testbot/web/WineTestBot.css +++ b/testbot/web/WineTestBot.css @@ -348,6 +348,7 @@ td.Record { text-align: center; } .RecordHost { font-size: smaller; } .RecordJob { font-size: smaller; } .RecordDuration { } +.RecordResult { font-size: smaller; font-style: italic; }
.Record-start { } .Record-off { color: #c0c0c0; } @@ -371,4 +372,7 @@ td.Record { text-align: center; } /* Special borders */ .Record.Record-left { border-left: thin solid #601919; } .Record.Record-right { border-right: thin solid #601919; } +.Record.Record-boterror { border-left: thick solid red; border-right: thick solid red; } +.Record.Record-error { border-left: thick solid #990000; border-right: thick solid #990000; } +.Record.Record-timeout { border-left: thick solid blue; border-right: thick solid blue; } .Record.Record-miss { border-top: thick dashed #ff6600; }