If a test configuration had a high failure rate and no longer has any failure, it is not necessary to wait for $patternbuilds builds to consider the failure fixed. Then if the failures are fixed in all test configurations, the test unit can be moved to the old/fixed failures list.
Signed-off-by: Francois Gouget fgouget@codeweavers.com --- winetest/build-patterns | 81 ++++++++++++++++++++++++++++++++++++++++- winetest/winetest.conf | 3 ++ 2 files changed, 82 insertions(+), 2 deletions(-)
diff --git a/winetest/build-patterns b/winetest/build-patterns index 32bae6b31..6a14fc372 100755 --- a/winetest/build-patterns +++ b/winetest/build-patterns @@ -31,7 +31,7 @@ sub BEGIN } unshift @INC, $1 if ($0 =~ m=^(/.*)/[^/]+$=); } -use vars qw/$workdir $gitdir $patternbuilds/; +use vars qw/$workdir $gitdir $patternbuilds $fixed_threshold/; require "winetest.conf";
my $name0=$0; @@ -275,6 +275,20 @@ my %reports; # The @sortedbuilds index of the most recent build with a failure. # -1 if there is none. # +# - failures +# The number of builds that had a failure between the first and last failure +# builds. +# +# - failruns +# The number of test results between the first and last failure builds. +# Note that if there are builds for which WineTest was not run this will be +# different from last-first+1. +# +# - fixedruns +# The number of successful test results following last. Note that if +# WineTest was not run (yet?) for some builds this will be different from +# @sortedbuilds-last. +# # - status # A hashtable of test results indexed by the build name. my %tests; @@ -389,6 +403,34 @@ sub fail_type($) "random"; }
+sub get_fix_probability($$$) +{ + my ($failures, $failruns, $fixedruns) = @_; + + # We want a lower bound on the failure rate so we get a lower bound on the + # probability that a failure is fixed. + # So ideally we would compute the lowest failure rate that has a + # probability greater than some arbitrary value of giving us $failures in + # $failruns runs. + # - For instance a failure rate of 100% would obviously give us 2 failures + # out of 2 runs. But there is also a 49% chance for a 70% failure rate to + # have the same result. So the lowest failure rate that has more than + # an arbitrary 50% chance of matching this result is ~71%. + # - But that's hard to compute in the general case. + # - So instead just convert one failure to success and compute the ratio. + # - This gives a 0% failure rate for the 1/1 case which is fine because + # this case really does not have not enough data to derive a failure rate. + # - For the 2/2 case this gives 50% instead of a more likely value like 71%. + # But that's still good enough for our purpose. + # - The results continue on the low side up to at least $failruns=10 and + # then the difference is pretty small anyway. + my $failrate = ($failures - 1) / $failruns; + + # Then compute the probability of getting $fixedruns successes in a row. + # The complement is the probability that the failure has been fixed. + return 1 - (1 - $failrate) ** $fixedruns; +} + foreach my $testname (keys %tests) { my $test = $tests{$testname}; @@ -404,6 +446,11 @@ foreach my $testname (keys %tests) $testreport->{last} = -1; # - Type of failure: random or not (missing dll, etc.) $testreport->{failtype} = ""; + # - Statistics to compute the failure rate + $testreport->{failures} = 0; + $testreport->{failruns} = 0; + # - And the number of successful runs after the last failure + $testreport->{fixedruns} = 0;
for my $i (0..@sortedbuilds-1) { @@ -415,6 +462,7 @@ foreach my $testname (keys %tests) $build->{hastest}->{$testname}) { $testreport->{failtype} ||= 0; # success + $testreport->{fixedruns}++; } # else WineTest was not run for this build next; @@ -437,12 +485,39 @@ foreach my $testname (keys %tests) $testreport->{first} = $i; $testreport->{last} = $i; $testreport->{failtype} = $failtype; + $testreport->{failures} = 1; + # Assume the bug was introduced with the first failure and thus + # ignore successful runs that preceded it. + $testreport->{failruns} = 1; + $testreport->{fixedruns} = 0; } else { $testreport->{last} = $i; + $testreport->{failures}++; + $testreport->{failruns} += $testreport->{fixedruns} + 1; + $testreport->{fixedruns} = 0; } } + next if (!$testreport->{failed}); + next if (!$testreport->{fixedruns}); + + if ($testreport->{failtype} eq "random") + { + # - failruns counts the number of runs from the first to the last + # failure. + # - Both failruns and fixedruns account for the builds where the + # test was not run. + # - So for instance '...eeeF._F.._' gives failures=2 (not 5), + # failruns=3 (not 4, 9 or 10), and fixedruns=2 (not 3). + $testreport->{fixed} = get_fix_probability($testreport->{failures}, $testreport->{failruns}, $testreport->{fixedruns}); + } + else + { + # Since this failure is not random, even a single success means + # it is fixed. + $testreport->{fixed} = 1; + } } }
@@ -710,11 +785,13 @@ EOF
my $first = @sortedbuilds; my $last = -1; + my $fixed = 1; foreach my $reportdir (keys %$pagereports) { my $testreport = $test->{testreports}->{$reportdir}; next if (!$testreport->{failed});
+ $fixed = 0 if (($testreport->{fixed} || 0) < $fixed_threshold); $first = $testreport->{first} if ($testreport->{first} < $first); # For non-random failures we only care about the transition to # the failure state, which is recorded in 'first'. @@ -732,7 +809,7 @@ EOF } next if ($last == -1); # no report had a pattern of interest
- my $listid = ($last < @sortedbuilds - $patternbuilds) ? "old" : + my $listid = ($fixed or $last < @sortedbuilds - $patternbuilds) ? "old" : ($first > $patternbuilds) ? "recent" : "regular"; push @{$lists{$listid}->{testnames}}, $testname; diff --git a/winetest/winetest.conf b/winetest/winetest.conf index f193f8eed..1f587b3bc 100644 --- a/winetest/winetest.conf +++ b/winetest/winetest.conf @@ -27,4 +27,7 @@ $maxfilesize = 1.5 * 1024 * 1024; # The number of builds after which a failure is considered old / new $patternbuilds = 10;
+# Probability above which a failure is considered to be fixed (0..1) +$fixed_threshold = 0.99; + 1; # keep require happy
Instead of analyzing the failures of a single report across time, this analyzes the results of the last build with failures across reports. This is then compared to the failure-free results of the next days to determine the probability that the failure has been fixed.
This allows detecting when short-lived issues that impacted many test configurations are fixed.
Signed-off-by: Francois Gouget fgouget@codeweavers.com --- winetest/build-patterns | 78 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 76 insertions(+), 2 deletions(-)
diff --git a/winetest/build-patterns b/winetest/build-patterns index 6a14fc372..460c17c1b 100755 --- a/winetest/build-patterns +++ b/winetest/build-patterns @@ -252,6 +252,14 @@ my %reports; # - colors # A hashtable of colors indexed by failure count. # +# - last +# The @sortedbuilds index of the most recent build for which a report has a +# failure. -1 if there is none. +# +# - fixed +# True if the test had enough successful results after the last failure to be +# considered fixed. +# # - testreports # A hashtable mapping report directory names to objects storing the results # for that test and report combination. Each testreport object has the @@ -389,7 +397,7 @@ my @sortedreports = sort cmpreports keys %reports;
# -# Analyze single-report patterns +# Analyze single-report and single-build patterns #
sub fail_type($) @@ -434,6 +442,9 @@ sub get_fix_probability($$$) foreach my $testname (keys %tests) { my $test = $tests{$testname}; + $test->{last} = -1; + $test->{fixed} = 1; + foreach my $reportdir (@sortedreports) { my $testreport = $test->{testreports}->{$reportdir}; @@ -498,9 +509,18 @@ foreach my $testname (keys %tests) $testreport->{failruns} += $testreport->{fixedruns} + 1; $testreport->{fixedruns} = 0; } + if ($test->{last} < $testreport->{last}) + { + $test->{last} = $testreport->{last}; + } + } next if (!$testreport->{failed}); - next if (!$testreport->{fixedruns}); + if (!$testreport->{fixedruns}) + { + $test->{fixed} = 0; + next; + }
if ($testreport->{failtype} eq "random") { @@ -518,7 +538,60 @@ foreach my $testname (keys %tests) # it is fixed. $testreport->{fixed} = 1; } + $test->{fixed} = 0 if ($testreport->{fixed} < $fixed_threshold); + } + if ($test->{last} == @sortedbuilds-1) + { + $test->{fixed} = 0; + next; + } + + # No need for further analysis if this test unit seems fixed already + next if ($test->{fixed}); + next if ($test->{last} < @sortedbuilds - $patternbuilds); + + # Analyze the pattern formed by the results from the build of the latest + # failure: sometimes a commit will cause failures in many reports and is + # fixed soon after (e.g. next day). In such a case analysing the reports + # individually does not provide enough evidence that the bug is fixed until + # many days later. But analysing the results on the last failure build can + # yield evidence of a high failure rate so that the fix is obvious in the + # next few days. + my $lastbuild = $sortedbuilds[$test->{last}]; + my ($failures, $failruns, $fixedruns); + foreach my $reportdir (keys %{$test->{testreports}}) + { + my $testreport = $test->{testreports}->{$reportdir}; + next if (!$testreport->{failed}); + + my $status = $testreport->{status}->{$lastbuild->{name}}; + if (!defined $status and + $lastbuild->{hasreport}->{$reportdir} and + $lastbuild->{hastest}->{$testname}) + { + $failruns++; + next; + } + + my $failtype = fail_type($status); + next if ($failtype ne "random"); + $failures++; + $failruns++; + + for my $i ($test->{last}+1..@sortedbuilds-1) + { + my $build = $sortedbuilds[$i]; + $status = $testreport->{status}->{$build->{name}}; + if (!defined $status and + $build->{hasreport}->{$reportdir} and + $build->{hastest}->{$testname}) + { + $fixedruns++; + } + } } + next if (!$fixedruns); # no evidence of the issue being fixed + $test->{fixed} = get_fix_probability($failures, $failruns, $fixedruns); }
@@ -809,6 +882,7 @@ EOF } next if ($last == -1); # no report had a pattern of interest
+ $fixed = 1 if (($test->{fixed} || 0) >= $fixed_threshold); my $listid = ($fixed or $last < @sortedbuilds - $patternbuilds) ? "old" : ($first > $patternbuilds) ? "recent" : "regular";