Module: tools
Branch: master
Commit: 1528f95b83cd13e118e2609cfc3752063ef22ae7
URL: https://source.winehq.org/git/tools.git/?a=commit;h=1528f95b83cd13e118e2609…
Author: Francois Gouget <fgouget(a)codeweavers.com>
Date: Fri Apr 30 14:57:14 2021 +0200
winetest/build-patterns: Detect fixed failures from the last failure day.
Instead of analyzing the failures of a single report across time, this
analyzes the results of the last build with failures across reports.
This is then compared to the failure-free results of the next days to
determine the probability that the failure has been fixed.
This allows detecting when short-lived issues that impacted many test
configurations are fixed.
Signed-off-by: Francois Gouget <fgouget(a)codeweavers.com>
Signed-off-by: Alexandre Julliard <julliard(a)winehq.org>
---
winetest/build-patterns | 78 +++++++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 76 insertions(+), 2 deletions(-)
diff --git a/winetest/build-patterns b/winetest/build-patterns
index 6a14fc3..460c17c 100755
--- a/winetest/build-patterns
+++ b/winetest/build-patterns
@@ -252,6 +252,14 @@ my %reports;
# - colors
# A hashtable of colors indexed by failure count.
#
+# - last
+# The @sortedbuilds index of the most recent build for which a report has a
+# failure. -1 if there is none.
+#
+# - fixed
+# True if the test had enough successful results after the last failure to be
+# considered fixed.
+#
# - testreports
# A hashtable mapping report directory names to objects storing the results
# for that test and report combination. Each testreport object has the
@@ -389,7 +397,7 @@ my @sortedreports = sort cmpreports keys %reports;
#
-# Analyze single-report patterns
+# Analyze single-report and single-build patterns
#
sub fail_type($)
@@ -434,6 +442,9 @@ sub get_fix_probability($$$)
foreach my $testname (keys %tests)
{
my $test = $tests{$testname};
+ $test->{last} = -1;
+ $test->{fixed} = 1;
+
foreach my $reportdir (@sortedreports)
{
my $testreport = $test->{testreports}->{$reportdir};
@@ -498,9 +509,18 @@ foreach my $testname (keys %tests)
$testreport->{failruns} += $testreport->{fixedruns} + 1;
$testreport->{fixedruns} = 0;
}
+ if ($test->{last} < $testreport->{last})
+ {
+ $test->{last} = $testreport->{last};
+ }
+
}
next if (!$testreport->{failed});
- next if (!$testreport->{fixedruns});
+ if (!$testreport->{fixedruns})
+ {
+ $test->{fixed} = 0;
+ next;
+ }
if ($testreport->{failtype} eq "random")
{
@@ -518,7 +538,60 @@ foreach my $testname (keys %tests)
# it is fixed.
$testreport->{fixed} = 1;
}
+ $test->{fixed} = 0 if ($testreport->{fixed} < $fixed_threshold);
+ }
+ if ($test->{last} == @sortedbuilds-1)
+ {
+ $test->{fixed} = 0;
+ next;
+ }
+
+ # No need for further analysis if this test unit seems fixed already
+ next if ($test->{fixed});
+ next if ($test->{last} < @sortedbuilds - $patternbuilds);
+
+ # Analyze the pattern formed by the results from the build of the latest
+ # failure: sometimes a commit will cause failures in many reports and is
+ # fixed soon after (e.g. next day). In such a case analysing the reports
+ # individually does not provide enough evidence that the bug is fixed until
+ # many days later. But analysing the results on the last failure build can
+ # yield evidence of a high failure rate so that the fix is obvious in the
+ # next few days.
+ my $lastbuild = $sortedbuilds[$test->{last}];
+ my ($failures, $failruns, $fixedruns);
+ foreach my $reportdir (keys %{$test->{testreports}})
+ {
+ my $testreport = $test->{testreports}->{$reportdir};
+ next if (!$testreport->{failed});
+
+ my $status = $testreport->{status}->{$lastbuild->{name}};
+ if (!defined $status and
+ $lastbuild->{hasreport}->{$reportdir} and
+ $lastbuild->{hastest}->{$testname})
+ {
+ $failruns++;
+ next;
+ }
+
+ my $failtype = fail_type($status);
+ next if ($failtype ne "random");
+ $failures++;
+ $failruns++;
+
+ for my $i ($test->{last}+1..@sortedbuilds-1)
+ {
+ my $build = $sortedbuilds[$i];
+ $status = $testreport->{status}->{$build->{name}};
+ if (!defined $status and
+ $build->{hasreport}->{$reportdir} and
+ $build->{hastest}->{$testname})
+ {
+ $fixedruns++;
+ }
+ }
}
+ next if (!$fixedruns); # no evidence of the issue being fixed
+ $test->{fixed} = get_fix_probability($failures, $failruns, $fixedruns);
}
@@ -809,6 +882,7 @@ EOF
}
next if ($last == -1); # no report had a pattern of interest
+ $fixed = 1 if (($test->{fixed} || 0) >= $fixed_threshold);
my $listid = ($fixed or $last < @sortedbuilds - $patternbuilds) ? "old" :
($first > $patternbuilds) ? "recent" :
"regular";
Module: tools
Branch: master
Commit: 3928647cdad708cac2549e39e486eb2fdce5a116
URL: https://source.winehq.org/git/tools.git/?a=commit;h=3928647cdad708cac2549e3…
Author: Francois Gouget <fgouget(a)codeweavers.com>
Date: Fri Apr 30 14:56:16 2021 +0200
winetest/build-patterns: Detect fixed failures at the report level.
If a test configuration had a high failure rate and no longer has any
failure, it is not necessary to wait for $patternbuilds builds to
consider the failure fixed.
Then if the failures are fixed in all test configurations, the test unit
can be moved to the old/fixed failures list.
Signed-off-by: Francois Gouget <fgouget(a)codeweavers.com>
Signed-off-by: Alexandre Julliard <julliard(a)winehq.org>
---
winetest/build-patterns | 81 +++++++++++++++++++++++++++++++++++++++++++++++--
winetest/winetest.conf | 3 ++
2 files changed, 82 insertions(+), 2 deletions(-)
diff --git a/winetest/build-patterns b/winetest/build-patterns
index 32bae6b..6a14fc3 100755
--- a/winetest/build-patterns
+++ b/winetest/build-patterns
@@ -31,7 +31,7 @@ sub BEGIN
}
unshift @INC, $1 if ($0 =~ m=^(/.*)/[^/]+$=);
}
-use vars qw/$workdir $gitdir $patternbuilds/;
+use vars qw/$workdir $gitdir $patternbuilds $fixed_threshold/;
require "winetest.conf";
my $name0=$0;
@@ -275,6 +275,20 @@ my %reports;
# The @sortedbuilds index of the most recent build with a failure.
# -1 if there is none.
#
+# - failures
+# The number of builds that had a failure between the first and last failure
+# builds.
+#
+# - failruns
+# The number of test results between the first and last failure builds.
+# Note that if there are builds for which WineTest was not run this will be
+# different from last-first+1.
+#
+# - fixedruns
+# The number of successful test results following last. Note that if
+# WineTest was not run (yet?) for some builds this will be different from
+# @sortedbuilds-last.
+#
# - status
# A hashtable of test results indexed by the build name.
my %tests;
@@ -389,6 +403,34 @@ sub fail_type($)
"random";
}
+sub get_fix_probability($$$)
+{
+ my ($failures, $failruns, $fixedruns) = @_;
+
+ # We want a lower bound on the failure rate so we get a lower bound on the
+ # probability that a failure is fixed.
+ # So ideally we would compute the lowest failure rate that has a
+ # probability greater than some arbitrary value of giving us $failures in
+ # $failruns runs.
+ # - For instance a failure rate of 100% would obviously give us 2 failures
+ # out of 2 runs. But there is also a 49% chance for a 70% failure rate to
+ # have the same result. So the lowest failure rate that has more than
+ # an arbitrary 50% chance of matching this result is ~71%.
+ # - But that's hard to compute in the general case.
+ # - So instead just convert one failure to success and compute the ratio.
+ # - This gives a 0% failure rate for the 1/1 case which is fine because
+ # this case really does not have not enough data to derive a failure rate.
+ # - For the 2/2 case this gives 50% instead of a more likely value like 71%.
+ # But that's still good enough for our purpose.
+ # - The results continue on the low side up to at least $failruns=10 and
+ # then the difference is pretty small anyway.
+ my $failrate = ($failures - 1) / $failruns;
+
+ # Then compute the probability of getting $fixedruns successes in a row.
+ # The complement is the probability that the failure has been fixed.
+ return 1 - (1 - $failrate) ** $fixedruns;
+}
+
foreach my $testname (keys %tests)
{
my $test = $tests{$testname};
@@ -404,6 +446,11 @@ foreach my $testname (keys %tests)
$testreport->{last} = -1;
# - Type of failure: random or not (missing dll, etc.)
$testreport->{failtype} = "";
+ # - Statistics to compute the failure rate
+ $testreport->{failures} = 0;
+ $testreport->{failruns} = 0;
+ # - And the number of successful runs after the last failure
+ $testreport->{fixedruns} = 0;
for my $i (0..@sortedbuilds-1)
{
@@ -415,6 +462,7 @@ foreach my $testname (keys %tests)
$build->{hastest}->{$testname})
{
$testreport->{failtype} ||= 0; # success
+ $testreport->{fixedruns}++;
}
# else WineTest was not run for this build
next;
@@ -437,12 +485,39 @@ foreach my $testname (keys %tests)
$testreport->{first} = $i;
$testreport->{last} = $i;
$testreport->{failtype} = $failtype;
+ $testreport->{failures} = 1;
+ # Assume the bug was introduced with the first failure and thus
+ # ignore successful runs that preceded it.
+ $testreport->{failruns} = 1;
+ $testreport->{fixedruns} = 0;
}
else
{
$testreport->{last} = $i;
+ $testreport->{failures}++;
+ $testreport->{failruns} += $testreport->{fixedruns} + 1;
+ $testreport->{fixedruns} = 0;
}
}
+ next if (!$testreport->{failed});
+ next if (!$testreport->{fixedruns});
+
+ if ($testreport->{failtype} eq "random")
+ {
+ # - failruns counts the number of runs from the first to the last
+ # failure.
+ # - Both failruns and fixedruns account for the builds where the
+ # test was not run.
+ # - So for instance '...eeeF._F.._' gives failures=2 (not 5),
+ # failruns=3 (not 4, 9 or 10), and fixedruns=2 (not 3).
+ $testreport->{fixed} = get_fix_probability($testreport->{failures}, $testreport->{failruns}, $testreport->{fixedruns});
+ }
+ else
+ {
+ # Since this failure is not random, even a single success means
+ # it is fixed.
+ $testreport->{fixed} = 1;
+ }
}
}
@@ -710,11 +785,13 @@ EOF
my $first = @sortedbuilds;
my $last = -1;
+ my $fixed = 1;
foreach my $reportdir (keys %$pagereports)
{
my $testreport = $test->{testreports}->{$reportdir};
next if (!$testreport->{failed});
+ $fixed = 0 if (($testreport->{fixed} || 0) < $fixed_threshold);
$first = $testreport->{first} if ($testreport->{first} < $first);
# For non-random failures we only care about the transition to
# the failure state, which is recorded in 'first'.
@@ -732,7 +809,7 @@ EOF
}
next if ($last == -1); # no report had a pattern of interest
- my $listid = ($last < @sortedbuilds - $patternbuilds) ? "old" :
+ my $listid = ($fixed or $last < @sortedbuilds - $patternbuilds) ? "old" :
($first > $patternbuilds) ? "recent" :
"regular";
push @{$lists{$listid}->{testnames}}, $testname;
diff --git a/winetest/winetest.conf b/winetest/winetest.conf
index f193f8e..1f587b3 100644
--- a/winetest/winetest.conf
+++ b/winetest/winetest.conf
@@ -27,4 +27,7 @@ $maxfilesize = 1.5 * 1024 * 1024;
# The number of builds after which a failure is considered old / new
$patternbuilds = 10;
+# Probability above which a failure is considered to be fixed (0..1)
+$fixed_threshold = 0.99;
+
1; # keep require happy