If a test configuration had a high failure rate and no longer has any failure, it is not necessary to wait for $patternbuilds builds to consider the failure fixed. Then if the failures are fixed in all test configurations, the test unit can be moved to the old/fixed failures list.
Signed-off-by: Francois Gouget fgouget@codeweavers.com --- winetest/build-patterns | 81 ++++++++++++++++++++++++++++++++++++++++- winetest/winetest.conf | 3 ++ 2 files changed, 82 insertions(+), 2 deletions(-)
diff --git a/winetest/build-patterns b/winetest/build-patterns index 32bae6b31..6a14fc372 100755 --- a/winetest/build-patterns +++ b/winetest/build-patterns @@ -31,7 +31,7 @@ sub BEGIN } unshift @INC, $1 if ($0 =~ m=^(/.*)/[^/]+$=); } -use vars qw/$workdir $gitdir $patternbuilds/; +use vars qw/$workdir $gitdir $patternbuilds $fixed_threshold/; require "winetest.conf";
my $name0=$0; @@ -275,6 +275,20 @@ my %reports; # The @sortedbuilds index of the most recent build with a failure. # -1 if there is none. # +# - failures +# The number of builds that had a failure between the first and last failure +# builds. +# +# - failruns +# The number of test results between the first and last failure builds. +# Note that if there are builds for which WineTest was not run this will be +# different from last-first+1. +# +# - fixedruns +# The number of successful test results following last. Note that if +# WineTest was not run (yet?) for some builds this will be different from +# @sortedbuilds-last. +# # - status # A hashtable of test results indexed by the build name. my %tests; @@ -389,6 +403,34 @@ sub fail_type($) "random"; }
+sub get_fix_probability($$$) +{ + my ($failures, $failruns, $fixedruns) = @_; + + # We want a lower bound on the failure rate so we get a lower bound on the + # probability that a failure is fixed. + # So ideally we would compute the lowest failure rate that has a + # probability greater than some arbitrary value of giving us $failures in + # $failruns runs. + # - For instance a failure rate of 100% would obviously give us 2 failures + # out of 2 runs. But there is also a 49% chance for a 70% failure rate to + # have the same result. So the lowest failure rate that has more than + # an arbitrary 50% chance of matching this result is ~71%. + # - But that's hard to compute in the general case. + # - So instead just convert one failure to success and compute the ratio. + # - This gives a 0% failure rate for the 1/1 case which is fine because + # this case really does not have not enough data to derive a failure rate. + # - For the 2/2 case this gives 50% instead of a more likely value like 71%. + # But that's still good enough for our purpose. + # - The results continue on the low side up to at least $failruns=10 and + # then the difference is pretty small anyway. + my $failrate = ($failures - 1) / $failruns; + + # Then compute the probability of getting $fixedruns successes in a row. + # The complement is the probability that the failure has been fixed. + return 1 - (1 - $failrate) ** $fixedruns; +} + foreach my $testname (keys %tests) { my $test = $tests{$testname}; @@ -404,6 +446,11 @@ foreach my $testname (keys %tests) $testreport->{last} = -1; # - Type of failure: random or not (missing dll, etc.) $testreport->{failtype} = ""; + # - Statistics to compute the failure rate + $testreport->{failures} = 0; + $testreport->{failruns} = 0; + # - And the number of successful runs after the last failure + $testreport->{fixedruns} = 0;
for my $i (0..@sortedbuilds-1) { @@ -415,6 +462,7 @@ foreach my $testname (keys %tests) $build->{hastest}->{$testname}) { $testreport->{failtype} ||= 0; # success + $testreport->{fixedruns}++; } # else WineTest was not run for this build next; @@ -437,12 +485,39 @@ foreach my $testname (keys %tests) $testreport->{first} = $i; $testreport->{last} = $i; $testreport->{failtype} = $failtype; + $testreport->{failures} = 1; + # Assume the bug was introduced with the first failure and thus + # ignore successful runs that preceded it. + $testreport->{failruns} = 1; + $testreport->{fixedruns} = 0; } else { $testreport->{last} = $i; + $testreport->{failures}++; + $testreport->{failruns} += $testreport->{fixedruns} + 1; + $testreport->{fixedruns} = 0; } } + next if (!$testreport->{failed}); + next if (!$testreport->{fixedruns}); + + if ($testreport->{failtype} eq "random") + { + # - failruns counts the number of runs from the first to the last + # failure. + # - Both failruns and fixedruns account for the builds where the + # test was not run. + # - So for instance '...eeeF._F.._' gives failures=2 (not 5), + # failruns=3 (not 4, 9 or 10), and fixedruns=2 (not 3). + $testreport->{fixed} = get_fix_probability($testreport->{failures}, $testreport->{failruns}, $testreport->{fixedruns}); + } + else + { + # Since this failure is not random, even a single success means + # it is fixed. + $testreport->{fixed} = 1; + } } }
@@ -710,11 +785,13 @@ EOF
my $first = @sortedbuilds; my $last = -1; + my $fixed = 1; foreach my $reportdir (keys %$pagereports) { my $testreport = $test->{testreports}->{$reportdir}; next if (!$testreport->{failed});
+ $fixed = 0 if (($testreport->{fixed} || 0) < $fixed_threshold); $first = $testreport->{first} if ($testreport->{first} < $first); # For non-random failures we only care about the transition to # the failure state, which is recorded in 'first'. @@ -732,7 +809,7 @@ EOF } next if ($last == -1); # no report had a pattern of interest
- my $listid = ($last < @sortedbuilds - $patternbuilds) ? "old" : + my $listid = ($fixed or $last < @sortedbuilds - $patternbuilds) ? "old" : ($first > $patternbuilds) ? "recent" : "regular"; push @{$lists{$listid}->{testnames}}, $testname; diff --git a/winetest/winetest.conf b/winetest/winetest.conf index f193f8eed..1f587b3bc 100644 --- a/winetest/winetest.conf +++ b/winetest/winetest.conf @@ -27,4 +27,7 @@ $maxfilesize = 1.5 * 1024 * 1024; # The number of builds after which a failure is considered old / new $patternbuilds = 10;
+# Probability above which a failure is considered to be fixed (0..1) +$fixed_threshold = 0.99; + 1; # keep require happy