Merge lp:~percona-toolkit-dev/percona-toolkit/fix-pqd-infinite-loop-bug-888114 into lp:percona-toolkit/2.1

Proposed by Daniel Nichter
Status: Merged
Merged at revision: 262
Proposed branch: lp:~percona-toolkit-dev/percona-toolkit/fix-pqd-infinite-loop-bug-888114
Merge into: lp:percona-toolkit/2.1
Diff against target: 269 lines (+130/-20)
5 files modified
bin/pt-query-digest (+38/-6)
lib/Pipeline.pm (+25/-4)
t/lib/Pipeline.t (+44/-6)
t/pt-query-digest/continue_on_error.t (+23/-1)
t/pt-query-digest/mirror.t (+0/-3)
To merge this branch: bzr merge lp:~percona-toolkit-dev/percona-toolkit/fix-pqd-infinite-loop-bug-888114
Reviewer Review Type Date Requested Status
Daniel Nichter Approve
Review via email: mp+107129@code.launchpad.net
To post a comment you must log in.
Revision history for this message
Daniel Nichter (daniel-nichter) :
review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'bin/pt-query-digest'
2--- bin/pt-query-digest 2012-04-03 19:42:45 +0000
3+++ bin/pt-query-digest 2012-05-23 22:10:24 +0000
4@@ -11745,6 +11745,9 @@
5
6 push @{$self->{procs}}, $process;
7 push @{$self->{names}}, $name;
8+ if ( my $n = $args{retry_on_error} ) {
9+ $self->{retries}->{$name} = $n;
10+ }
11 if ( $self->{instrument} ) {
12 $self->{instrumentation}->{$name} = { time => 0, calls => 0 };
13 }
14@@ -11809,10 +11812,28 @@
15 }
16 };
17 if ( $EVAL_ERROR ) {
18- warn "Pipeline process $procno ("
19- . ($self->{names}->[$procno] || "")
20- . ") caused an error: $EVAL_ERROR";
21- die $EVAL_ERROR unless $self->{continue_on_error};
22+ my $name = $self->{names}->[$procno] || "";
23+ my $msg = "Pipeline process " . ($procno + 1)
24+ . " ($name) caused an error: "
25+ . $EVAL_ERROR;
26+ if ( defined $self->{retries}->{$name} ) {
27+ my $n = $self->{retries}->{$name};
28+ if ( $n ) {
29+ warn $msg . "Will retry pipeline process $procno ($name) "
30+ . "$n more " . ($n > 1 ? "times" : "time") . ".\n";
31+ $self->{retries}->{$name}--;
32+ }
33+ else {
34+ die $msg . "Terminating pipeline because process $procno "
35+ . "($name) caused too many errors.\n";
36+ }
37+ }
38+ elsif ( !$self->{continue_on_error} ) {
39+ die $msg;
40+ }
41+ else {
42+ warn $msg;
43+ }
44 }
45 }
46
47@@ -12562,8 +12583,14 @@
48
49 { # iteration
50 $pipeline->add(
51- name => 'iteration',
52- process => sub {
53+ # This is a critical proc: if we die here, we probably need
54+ # to stop, else an infinite loop can develop:
55+ # https://bugs.launchpad.net/percona-toolkit/+bug/888114
56+ # We'll retry twice in case the problem is just one bad
57+ # query class, or something like that.
58+ retry_on_error => 2,
59+ name => 'iteration',
60+ process => sub {
61 my ( $args ) = @_;
62
63 # Start the (next) iteration.
64@@ -13239,6 +13266,11 @@
65 );
66
67 if ( $o->get('report') ) {
68+ # XXX There's a bug here: --expected-range '','' will cause
69+ # Use of uninitialized value in numeric lt (<)
70+ # This bug is intentionally left unfixed at the moment because
71+ # we exploit it to test a more serious bug: an infinite loop:
72+ # https://bugs.launchpad.net/percona-toolkit/+bug/888114
73 my $expected_range = $o->get('expected-range');
74 my $explain_why = $expected_range
75 && ( @$worst < $expected_range->[0]
76
77=== modified file 'lib/Pipeline.pm'
78--- lib/Pipeline.pm 2012-01-19 19:46:56 +0000
79+++ lib/Pipeline.pm 2012-05-23 22:10:24 +0000
80@@ -71,6 +71,9 @@
81
82 push @{$self->{procs}}, $process;
83 push @{$self->{names}}, $name;
84+ if ( my $n = $args{retry_on_error} ) {
85+ $self->{retries}->{$name} = $n;
86+ }
87 if ( $self->{instrument} ) {
88 $self->{instrumentation}->{$name} = { time => 0, calls => 0 };
89 }
90@@ -156,10 +159,28 @@
91 }
92 };
93 if ( $EVAL_ERROR ) {
94- warn "Pipeline process $procno ("
95- . ($self->{names}->[$procno] || "")
96- . ") caused an error: $EVAL_ERROR";
97- die $EVAL_ERROR unless $self->{continue_on_error};
98+ my $name = $self->{names}->[$procno] || "";
99+ my $msg = "Pipeline process " . ($procno + 1)
100+ . " ($name) caused an error: "
101+ . $EVAL_ERROR;
102+ if ( defined $self->{retries}->{$name} ) {
103+ my $n = $self->{retries}->{$name};
104+ if ( $n ) {
105+ warn $msg . "Will retry pipeline process $procno ($name) "
106+ . "$n more " . ($n > 1 ? "times" : "time") . ".\n";
107+ $self->{retries}->{$name}--;
108+ }
109+ else {
110+ die $msg . "Terminating pipeline because process $procno "
111+ . "($name) caused too many errors.\n";
112+ }
113+ }
114+ elsif ( !$self->{continue_on_error} ) {
115+ die $msg;
116+ }
117+ else {
118+ warn $msg;
119+ }
120 }
121 }
122
123
124=== modified file 't/lib/Pipeline.t'
125--- t/lib/Pipeline.t 2012-03-06 13:56:08 +0000
126+++ t/lib/Pipeline.t 2012-05-23 22:10:24 +0000
127@@ -9,7 +9,7 @@
128 use strict;
129 use warnings FATAL => 'all';
130 use English qw(-no_match_vars);
131-use Test::More tests => 10;
132+use Test::More tests => 11;
133
134 use Time::HiRes qw(usleep);
135
136@@ -206,11 +206,13 @@
137 $oktorun = 1;
138 $pipeline = new Pipeline(continue_on_error=>1);
139
140-my @die = qw(1 0);
141+my @called = ();
142+my @die = qw(1 0);
143 $pipeline->add(
144 name => 'proc1',
145 process => sub {
146 my ( $args ) = @_;
147+ push @called, 'proc1';
148 die "I'm an error" if shift @die;
149 return $args;
150 },
151@@ -218,7 +220,7 @@
152 $pipeline->add(
153 name => 'proc2',
154 process => sub {
155- print "proc2";
156+ push @called, 'proc2';
157 $oktorun = 0;
158 return;
159 },
160@@ -229,12 +231,48 @@
161 stderr => 1,
162 );
163
164-like(
165- $output,
166- qr/0 \(proc1\) caused an error.+proc2/s,
167+is_deeply(
168+ \@called,
169+ [qw(proc1 proc1 proc2)],
170 "Continues on error"
171 );
172
173+# Override global for critical procs that must kill the pipeline if they die.
174+$oktorun = 1;
175+$pipeline = new Pipeline(continue_on_error=>1);
176+
177+@called = ();
178+$pipeline->add(
179+ name => 'proc1',
180+ process => sub {
181+ my ( $args ) = @_;
182+ push @called, 'proc1';
183+ $oktorun = 0 if @called > 8;
184+ return $args;
185+ },
186+);
187+$pipeline->add(
188+ retry_on_error => 2,
189+ name => 'proc2',
190+ process => sub {
191+ push @called, 'proc2';
192+ die;
193+ },
194+);
195+
196+$output = output(
197+ sub { $pipeline->execute(%args) },
198+ stderr => 1,
199+);
200+
201+is_deeply(
202+ \@called,
203+ [qw(proc1 proc2), # first attempt
204+ qw(proc1 proc2), # retry 1/2
205+ qw(proc1 proc2)], # retry 2/2
206+ "Retry proc"
207+) or diag($output);
208+
209 # #############################################################################
210 # Done.
211 # #############################################################################
212
213=== modified file 't/pt-query-digest/continue_on_error.t'
214--- t/pt-query-digest/continue_on_error.t 2011-07-12 22:56:55 +0000
215+++ t/pt-query-digest/continue_on_error.t 2012-05-23 22:10:24 +0000
216@@ -9,9 +9,12 @@
217 use strict;
218 use warnings FATAL => 'all';
219 use English qw(-no_match_vars);
220-use Test::More tests => 2;
221+use Test::More tests => 3;
222
223 use PerconaTest;
224+shift @INC; # our unshift (above)
225+shift @INC; # PerconaTest's unshift
226+require "$trunk/bin/pt-query-digest";
227
228 my $output;
229
230@@ -30,6 +33,25 @@
231 'Continues on error by default'
232 );
233
234+# #############################################################################
235+# Infinite loop in pt-query-digest if a report crashe
236+# https://bugs.launchpad.net/percona-toolkit/+bug/888114
237+# #############################################################################
238+
239+# This bug is due to the fact that --continue-on-error is on by default.
240+# To reproduce the problem, we must intentionally crash pt-query-digest
241+# in the right place, which means we're using another bug:a
242+$output = output(
243+ sub { pt_query_digest::main("$trunk/t/lib/samples/slowlogs/slow002.txt",
244+ "--expected-range", "'',''") },
245+ stderr => 1,
246+);
247+
248+like(
249+ $output,
250+ qr/Argument \S+ isn't numeric/,
251+ "Report crashed, but no infinite loop (bug 888114)"
252+);
253
254 # #############################################################################
255 # Done.
256
257=== modified file 't/pt-query-digest/mirror.t'
258--- t/pt-query-digest/mirror.t 2012-05-09 13:48:59 +0000
259+++ t/pt-query-digest/mirror.t 2012-05-23 22:10:24 +0000
260@@ -36,9 +36,6 @@
261 my $pid_file = "/tmp/pt-query-digest-mirror-test.pid";
262 diag(`rm $pid_file 2>/dev/null`);
263
264-my $pid_file = '/tmp/pt-query-digest.test.pid';
265-`rm -rf $pid_file >/dev/null`;
266-
267 # ##########################################################################
268 # Tests for swapping --processlist and --execute
269 # ##########################################################################

Subscribers

People subscribed via source and target branches