Merge lp:~percona-toolkit-dev/percona-toolkit/pt-hearbeat-monitor-master-change into lp:percona-toolkit/2.2
- pt-hearbeat-monitor-master-change
- Merge into 2.2
Proposed by
Daniel Nichter
Status: | Needs review |
---|---|
Proposed branch: | lp:~percona-toolkit-dev/percona-toolkit/pt-hearbeat-monitor-master-change |
Merge into: | lp:percona-toolkit/2.2 |
Diff against target: |
395 lines (+250/-13) 2 files modified
bin/pt-heartbeat (+76/-13) t/pt-heartbeat/check_master.t (+174/-0) |
To merge this branch: | bzr merge lp:~percona-toolkit-dev/percona-toolkit/pt-hearbeat-monitor-master-change |
Related bugs: | |
Related blueprints: |
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
Daniel Nichter | Pending | ||
Review via email: mp+205877@code.launchpad.net |
Commit message
Description of the change
To post a comment you must log in.
- 595. By Daniel Nichter
-
Implement --check-
master- server- id. - 596. By Daniel Nichter
-
Add more --check-
master- server- id docs.
Unmerged revisions
- 596. By Daniel Nichter
-
Add more --check-
master- server- id docs. - 595. By Daniel Nichter
-
Implement --check-
master- server- id. - 594. By Daniel Nichter
-
Add --check-
master- server- id and a failing test for it.
Preview Diff
[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1 | === modified file 'bin/pt-heartbeat' | |||
2 | --- bin/pt-heartbeat 2013-12-18 23:53:01 +0000 | |||
3 | +++ bin/pt-heartbeat 2014-02-12 02:05:53 +0000 | |||
4 | @@ -4778,6 +4778,7 @@ | |||
5 | 4778 | use List::Util qw(min max sum); | 4778 | use List::Util qw(min max sum); |
6 | 4779 | use Time::HiRes qw(gettimeofday time sleep usleep); | 4779 | use Time::HiRes qw(gettimeofday time sleep usleep); |
7 | 4780 | use IO::File; | 4780 | use IO::File; |
8 | 4781 | use Carp qw(confess); | ||
9 | 4781 | 4782 | ||
10 | 4782 | use Percona::Toolkit; | 4783 | use Percona::Toolkit; |
11 | 4783 | use constant PTDEBUG => $ENV{PTDEBUG} || 0; | 4784 | use constant PTDEBUG => $ENV{PTDEBUG} || 0; |
12 | @@ -4840,6 +4841,14 @@ | |||
13 | 4840 | if ( !$o->get('stop') && !$o->get('database') ) { | 4841 | if ( !$o->get('stop') && !$o->get('database') ) { |
14 | 4841 | $o->save_error('--database must be specified'); | 4842 | $o->save_error('--database must be specified'); |
15 | 4842 | } | 4843 | } |
16 | 4844 | |||
17 | 4845 | if ( $o->get('check-master-server-id') > 0 && !$o->get('monitor') ) { | ||
18 | 4846 | $o->save_error('--check-master-server-id requires --monitor'); | ||
19 | 4847 | } | ||
20 | 4848 | } | ||
21 | 4849 | |||
22 | 4850 | if ( $o->get('check-master-server-id') > 0 ) { | ||
23 | 4851 | $o->set('print-master-server-id', 1); | ||
24 | 4843 | } | 4852 | } |
25 | 4844 | 4853 | ||
26 | 4845 | eval { | 4854 | eval { |
27 | @@ -4958,6 +4967,7 @@ | |||
28 | 4958 | 4967 | ||
29 | 4959 | # pk_col and pk_val are used to identify the heartbeat row to update or | 4968 | # pk_col and pk_val are used to identify the heartbeat row to update or |
30 | 4960 | # or monitor. | 4969 | # or monitor. |
31 | 4970 | my ($ms, $master_server_id); | ||
32 | 4961 | my ($pk_col, $pk_val); | 4971 | my ($pk_col, $pk_val); |
33 | 4962 | if ( $id ) { | 4972 | if ( $id ) { |
34 | 4963 | # Legacy mode: update heartbeat row WHERE id=1 and monitor heartbeat | 4973 | # Legacy mode: update heartbeat row WHERE id=1 and monitor heartbeat |
35 | @@ -4973,10 +4983,10 @@ | |||
36 | 4973 | $pk_val = $server_id; | 4983 | $pk_val = $server_id; |
37 | 4974 | } | 4984 | } |
38 | 4975 | else { # monitor or check | 4985 | else { # monitor or check |
40 | 4976 | my $master_server_id = $o->get('master-server-id'); | 4986 | $master_server_id = $o->get('master-server-id'); |
41 | 4977 | if ( !$master_server_id ) { | 4987 | if ( !$master_server_id ) { |
42 | 4978 | eval { | 4988 | eval { |
44 | 4979 | my $ms = new MasterSlave( | 4989 | $ms = new MasterSlave( |
45 | 4980 | OptionParser => $o, | 4990 | OptionParser => $o, |
46 | 4981 | DSNParser => $dp, | 4991 | DSNParser => $dp, |
47 | 4982 | Quoter => $q, | 4992 | Quoter => $q, |
48 | @@ -5077,7 +5087,7 @@ | |||
49 | 5077 | 5087 | ||
50 | 5078 | $heartbeat_sth = $dbh->prepare($heartbeat_sql); | 5088 | $heartbeat_sth = $dbh->prepare($heartbeat_sql); |
51 | 5079 | 5089 | ||
53 | 5080 | my $ro_check = !!$o->get('check-read-only'); | 5090 | my $ro_check = $o->get('check-read-only'); |
54 | 5081 | $update_heartbeat = sub { | 5091 | $update_heartbeat = sub { |
55 | 5082 | my ($sth) = @_; | 5092 | my ($sth) = @_; |
56 | 5083 | my @vals; | 5093 | my @vals; |
57 | @@ -5151,15 +5161,16 @@ | |||
58 | 5151 | . ($dbi_driver eq 'mysql' ? '/*!50038, @@hostname AS host*/' : '') | 5161 | . ($dbi_driver eq 'mysql' ? '/*!50038, @@hostname AS host*/' : '') |
59 | 5152 | . ($id ? "" : ", server_id") | 5162 | . ($id ? "" : ", server_id") |
60 | 5153 | . " FROM $db_tbl " | 5163 | . " FROM $db_tbl " |
62 | 5154 | . "WHERE $pk_col='$pk_val' " | 5164 | . "WHERE $pk_col=? " |
63 | 5155 | . "LIMIT 1"; | 5165 | . "LIMIT 1"; |
64 | 5156 | PTDEBUG && _d("SELECT SQL:", $heartbeat_sql); | 5166 | PTDEBUG && _d("SELECT SQL:", $heartbeat_sql); |
65 | 5157 | 5167 | ||
66 | 5158 | $heartbeat_sth = $dbh->prepare($heartbeat_sql); | 5168 | $heartbeat_sth = $dbh->prepare($heartbeat_sql); |
67 | 5159 | 5169 | ||
68 | 5160 | $get_delay = sub { | 5170 | $get_delay = sub { |
71 | 5161 | my ($sth) = @_; | 5171 | my ($sth, $id) = @_; |
72 | 5162 | $sth->execute(); | 5172 | confess "No master server ID given" unless $id; |
73 | 5173 | $sth->execute($id); | ||
74 | 5163 | PTDEBUG && _d($sth->{Statement}); | 5174 | PTDEBUG && _d($sth->{Statement}); |
75 | 5164 | my ($ts, $hostname, $server_id) = $sth->fetchrow_array(); | 5175 | my ($ts, $hostname, $server_id) = $sth->fetchrow_array(); |
76 | 5165 | my $now = time; | 5176 | my $now = time; |
77 | @@ -5176,7 +5187,7 @@ | |||
78 | 5176 | $delay = 0.00 if $delay < 0; | 5187 | $delay = 0.00 if $delay < 0; |
79 | 5177 | 5188 | ||
80 | 5178 | $sth->finish(); | 5189 | $sth->finish(); |
82 | 5179 | return ($delay, $hostname, $pk_val); | 5190 | return ($delay, $hostname, $server_id); |
83 | 5180 | }; | 5191 | }; |
84 | 5181 | 5192 | ||
85 | 5182 | # https://bugs.launchpad.net/percona-toolkit/+bug/1163372 | 5193 | # https://bugs.launchpad.net/percona-toolkit/+bug/1163372 |
86 | @@ -5194,7 +5205,7 @@ | |||
87 | 5194 | $update_heartbeat->($heartbeat_sth); | 5205 | $update_heartbeat->($heartbeat_sth); |
88 | 5195 | } | 5206 | } |
89 | 5196 | else { | 5207 | else { |
91 | 5197 | $get_delay->($heartbeat_sth); | 5208 | $get_delay->($heartbeat_sth, $pk_val); |
92 | 5198 | } | 5209 | } |
93 | 5199 | $heartbeat_sth->finish(); | 5210 | $heartbeat_sth->finish(); |
94 | 5200 | 5211 | ||
95 | @@ -5224,6 +5235,7 @@ | |||
96 | 5224 | sth => $heartbeat_sth, | 5235 | sth => $heartbeat_sth, |
97 | 5225 | sql => $heartbeat_sql, | 5236 | sql => $heartbeat_sql, |
98 | 5226 | get_delay => $get_delay, | 5237 | get_delay => $get_delay, |
99 | 5238 | server_id => $pk_val, | ||
100 | 5227 | interval => $interval, | 5239 | interval => $interval, |
101 | 5228 | skew => $skew, | 5240 | skew => $skew, |
102 | 5229 | hires_ts => $hires_ts, | 5241 | hires_ts => $hires_ts, |
103 | @@ -5259,6 +5271,9 @@ | |||
104 | 5259 | # ######################################################################## | 5271 | # ######################################################################## |
105 | 5260 | # Monitor or update the heartbeat table. | 5272 | # Monitor or update the heartbeat table. |
106 | 5261 | # ######################################################################## | 5273 | # ######################################################################## |
107 | 5274 | my $check_server_id_time = $o->get('check-master-server-id'); | ||
108 | 5275 | my $last_server_id_check = int(time); | ||
109 | 5276 | |||
110 | 5262 | my $end = $o->get('run-time') ? int(time + $o->get('run-time')) : 0; | 5277 | my $end = $o->get('run-time') ? int(time + $o->get('run-time')) : 0; |
111 | 5263 | PTDEBUG && _d($end ? ('Will exit at', ts($end)) : 'Running forever'); | 5278 | PTDEBUG && _d($end ? ('Will exit at', ts($end)) : 'Running forever'); |
112 | 5264 | 5279 | ||
113 | @@ -5290,8 +5305,39 @@ | |||
114 | 5290 | } | 5305 | } |
115 | 5291 | 5306 | ||
116 | 5292 | if ( $o->get('monitor') ) { | 5307 | if ( $o->get('monitor') ) { |
117 | 5308 | if ( $check_server_id_time ) { | ||
118 | 5309 | # Time to --check-master-server-id? | ||
119 | 5310 | my $now = int(time); | ||
120 | 5311 | if ( $now - $last_server_id_check >= $check_server_id_time ) { | ||
121 | 5312 | PTDEBUG && _d("Checking master server id"); | ||
122 | 5313 | eval { | ||
123 | 5314 | my $master_dsn = $ms->get_master_dsn($dbh, $dsn, $dp) | ||
124 | 5315 | or die "This server is not a slave"; | ||
125 | 5316 | my $master_dbh = $dp->get_dbh( | ||
126 | 5317 | $dp->get_cxn_params($master_dsn), | ||
127 | 5318 | { AutoCommit => 1 } | ||
128 | 5319 | ); | ||
129 | 5320 | my ($new_master_server_id) | ||
130 | 5321 | = $master_dbh->selectrow_array('SELECT @@server_id'); | ||
131 | 5322 | $master_dbh->disconnect; | ||
132 | 5323 | if ( $new_master_server_id | ||
133 | 5324 | && $new_master_server_id != $master_server_id ) { | ||
134 | 5325 | PTDEBUG && _d("Master server id changed:", | ||
135 | 5326 | $master_server_id, "to", | ||
136 | 5327 | $new_master_server_id); | ||
137 | 5328 | $pk_val = $new_master_server_id; | ||
138 | 5329 | } | ||
139 | 5330 | }; | ||
140 | 5331 | if ( $EVAL_ERROR ) { | ||
141 | 5332 | PTDEBUG && _d("Error checking master id:", $EVAL_ERROR); | ||
142 | 5333 | } | ||
143 | 5334 | $last_server_id_check = $now; | ||
144 | 5335 | } | ||
145 | 5336 | } | ||
146 | 5337 | |||
147 | 5293 | $heartbeat_sth ||= $dbh->prepare($heartbeat_sql); | 5338 | $heartbeat_sth ||= $dbh->prepare($heartbeat_sql); |
149 | 5294 | my ($delay) = $get_delay->($heartbeat_sth); | 5339 | my ($delay, undef, $server_id) |
150 | 5340 | = $get_delay->($heartbeat_sth, $pk_val); | ||
151 | 5295 | 5341 | ||
152 | 5296 | unshift @samples, $delay; | 5342 | unshift @samples, $delay; |
153 | 5297 | pop @samples if @samples > $limit; | 5343 | pop @samples if @samples > $limit; |
154 | @@ -5302,7 +5348,7 @@ | |||
155 | 5302 | sum(@samples[0 .. $bound-1]) / $_; | 5348 | sum(@samples[0 .. $bound-1]) / $_; |
156 | 5303 | } @$frames; | 5349 | } @$frames; |
157 | 5304 | 5350 | ||
159 | 5305 | my $output = sprintf $format, $delay, @vals, $pk_val; | 5351 | my $output = sprintf $format, $delay, @vals, $server_id; |
160 | 5306 | if ( my $file = $o->get('file') ) { | 5352 | if ( my $file = $o->get('file') ) { |
161 | 5307 | open my $file, '>', $file | 5353 | open my $file, '>', $file |
162 | 5308 | or die "Can't open $file: $OS_ERROR"; | 5354 | or die "Can't open $file: $OS_ERROR"; |
163 | @@ -5354,11 +5400,11 @@ | |||
164 | 5354 | # Check the delay on a single server. Optionally recurse to all its slaves. | 5400 | # Check the delay on a single server. Optionally recurse to all its slaves. |
165 | 5355 | sub check_delay { | 5401 | sub check_delay { |
166 | 5356 | my ( %args ) = @_; | 5402 | my ( %args ) = @_; |
168 | 5357 | my @required_args = qw(dsn dbh sth sql get_delay interval skew OptionParser DSNParser); | 5403 | my @required_args = qw(dsn dbh sth sql get_delay server_id interval skew OptionParser DSNParser); |
169 | 5358 | foreach my $arg ( @required_args ) { | 5404 | foreach my $arg ( @required_args ) { |
170 | 5359 | die "I need a $arg argument" unless $args{$arg}; | 5405 | die "I need a $arg argument" unless $args{$arg}; |
171 | 5360 | } | 5406 | } |
173 | 5361 | my ($dsn, $dbh, $sth, $sql, $get_delay, $interval, $skew, $o, $dp) | 5407 | my ($dsn, $dbh, $sth, $sql, $get_delay, $server_id, $interval, $skew, $o, $dp) |
174 | 5362 | = @args{@required_args}; | 5408 | = @args{@required_args}; |
175 | 5363 | PTDEBUG && _d('Checking slave delay'); | 5409 | PTDEBUG && _d('Checking slave delay'); |
176 | 5364 | 5410 | ||
177 | @@ -5411,7 +5457,8 @@ | |||
178 | 5411 | } | 5457 | } |
179 | 5412 | sleep $next_interval - time; | 5458 | sleep $next_interval - time; |
180 | 5413 | PTDEBUG && _d('Woke up at', ts(time)); | 5459 | PTDEBUG && _d('Woke up at', ts(time)); |
182 | 5414 | my ($delay, $hostname, $master_server_id) = $get_delay->($sth); | 5460 | my ($delay, $hostname, $master_server_id) |
183 | 5461 | = $get_delay->($sth, $server_id); | ||
184 | 5415 | 5462 | ||
185 | 5416 | if ( $o->get('recurse') ) { | 5463 | if ( $o->get('recurse') ) { |
186 | 5417 | # Must print not only the delay, but the server's hostname if | 5464 | # Must print not only the delay, but the server's hostname if |
187 | @@ -5642,6 +5689,8 @@ | |||
188 | 5642 | 5689 | ||
189 | 5643 | L<"--daemonize"> and L<"--check"> are mutually exclusive. | 5690 | L<"--daemonize"> and L<"--check"> are mutually exclusive. |
190 | 5644 | 5691 | ||
191 | 5692 | L<"--master-server-id"> and L<"--check-master-server-id"> are mutually exclusive. | ||
192 | 5693 | |||
193 | 5645 | This tool accepts additional command-line arguments. Refer to the | 5694 | This tool accepts additional command-line arguments. Refer to the |
194 | 5646 | L<"SYNOPSIS"> and usage information for details. | 5695 | L<"SYNOPSIS"> and usage information for details. |
195 | 5647 | 5696 | ||
196 | @@ -5667,6 +5716,20 @@ | |||
197 | 5667 | their lag, too. The hostname or IP and port for each slave is printed | 5716 | their lag, too. The hostname or IP and port for each slave is printed |
198 | 5668 | before its delay. L<"--recurse"> only works with MySQL. | 5717 | before its delay. L<"--recurse"> only works with MySQL. |
199 | 5669 | 5718 | ||
200 | 5719 | =item --check-master-server-id | ||
201 | 5720 | |||
202 | 5721 | type: time; default: 0 | ||
203 | 5722 | |||
204 | 5723 | Check master server ID periodically and change L<"--monitor"> to match. | ||
205 | 5724 | The given time should be longer than L<"--interval">. For example, | ||
206 | 5725 | to L<"--monitor"> every 5 seconds and check the master server ID every | ||
207 | 5726 | 1 minute: | ||
208 | 5727 | |||
209 | 5728 | --monitor 5s --check-master-server-id 1m | ||
210 | 5729 | |||
211 | 5730 | This option implies L<"--print-master-server-id">. When the master server ID | ||
212 | 5731 | changes, the new value is printed. | ||
213 | 5732 | |||
214 | 5670 | =item --check-read-only | 5733 | =item --check-read-only |
215 | 5671 | 5734 | ||
216 | 5672 | Check if the server has read_only enabled; If it does, the tool skips doing | 5735 | Check if the server has read_only enabled; If it does, the tool skips doing |
217 | 5673 | 5736 | ||
218 | === added file 't/pt-heartbeat/check_master.t' | |||
219 | --- t/pt-heartbeat/check_master.t 1970-01-01 00:00:00 +0000 | |||
220 | +++ t/pt-heartbeat/check_master.t 2014-02-12 02:05:53 +0000 | |||
221 | @@ -0,0 +1,174 @@ | |||
222 | 1 | #!/usr/bin/env perl | ||
223 | 2 | |||
224 | 3 | BEGIN { | ||
225 | 4 | die "The PERCONA_TOOLKIT_BRANCH environment variable is not set.\n" | ||
226 | 5 | unless $ENV{PERCONA_TOOLKIT_BRANCH} && -d $ENV{PERCONA_TOOLKIT_BRANCH}; | ||
227 | 6 | unshift @INC, "$ENV{PERCONA_TOOLKIT_BRANCH}/lib"; | ||
228 | 7 | }; | ||
229 | 8 | |||
230 | 9 | use strict; | ||
231 | 10 | use warnings FATAL => 'all'; | ||
232 | 11 | use English qw(-no_match_vars); | ||
233 | 12 | use Test::More; | ||
234 | 13 | use Data::Dumper; | ||
235 | 14 | use File::Temp qw(tempfile); | ||
236 | 15 | |||
237 | 16 | use PerconaTest; | ||
238 | 17 | use Sandbox; | ||
239 | 18 | require "$trunk/bin/pt-heartbeat"; | ||
240 | 19 | |||
241 | 20 | my $dp = new DSNParser(opts=>$dsn_opts); | ||
242 | 21 | my $sb = new Sandbox(basedir => '/tmp', DSNParser => $dp); | ||
243 | 22 | my $master1_dbh = $sb->get_dbh_for('master'); | ||
244 | 23 | |||
245 | 24 | # Standard setup is: | ||
246 | 25 | # 12345 -> 12346 -> 12347 | ||
247 | 26 | # So we don't mess with standard servers, start also: | ||
248 | 27 | # 12348 -> 12349 | ||
249 | 28 | # Then --update 12345 and 12348, start --monitor on 12349, then change | ||
250 | 29 | # its master to 12345. Disregard the names "master1" and "master2". | ||
251 | 30 | |||
252 | 31 | #diag("Starting extra sandboxes..."); | ||
253 | 32 | |||
254 | 33 | diag(`$trunk/sandbox/start-sandbox master 12348 >/dev/null`); | ||
255 | 34 | my $master2_dbh = $sb->get_dbh_for('master1'); | ||
256 | 35 | |||
257 | 36 | diag(`$trunk/sandbox/start-sandbox slave 12349 12348 >/dev/null`); | ||
258 | 37 | my $slave_dbh = $sb->get_dbh_for('master2'); | ||
259 | 38 | |||
260 | 39 | if ( !$master1_dbh ) { | ||
261 | 40 | plan skip_all => 'Cannot connect to sandbox master'; | ||
262 | 41 | } | ||
263 | 42 | elsif ( !$master2_dbh ) { | ||
264 | 43 | plan skip_all => 'Cannot connect to second sandbox master'; | ||
265 | 44 | } | ||
266 | 45 | elsif ( !$slave_dbh ) { | ||
267 | 46 | plan skip_all => 'Cannot connect to sandbox slave1'; | ||
268 | 47 | } | ||
269 | 48 | |||
270 | 49 | $sb->load_file('master', 't/pt-heartbeat/samples/precision-time-table.sql'); | ||
271 | 50 | $sb->load_file('master1', 't/pt-heartbeat/samples/precision-time-table.sql'); | ||
272 | 51 | |||
273 | 52 | # Get master relay pos before we create heartbeat table and row, | ||
274 | 53 | # else slave 12349 won't reply these queries and get heartbeat | ||
275 | 54 | # from master 12345. | ||
276 | 55 | my $s = $master1_dbh->selectrow_hashref("SHOW MASTER STATUS"); | ||
277 | 56 | |||
278 | 57 | $master1_dbh->do("INSERT INTO test.heartbeat (ts, server_id) VALUES (UTC_TIMESTAMP(), 12345)"); | ||
279 | 58 | $master2_dbh->do("INSERT INTO test.heartbeat (ts, server_id) VALUES (UTC_TIMESTAMP(), 12348)"); | ||
280 | 59 | |||
281 | 60 | # ########################################################################### | ||
282 | 61 | # Helper subs | ||
283 | 62 | # ########################################################################### | ||
284 | 63 | |||
285 | 64 | my $base_pidfile = (tempfile("/tmp/pt-heartbeat-test.XXXXXXXX", OPEN => 0, UNLINK => 0))[1]; | ||
286 | 65 | my $master_port = $sb->port_for('master'); | ||
287 | 66 | |||
288 | 67 | my @exec_pids; | ||
289 | 68 | my @pidfiles; | ||
290 | 69 | |||
291 | 70 | sub start_update_instance { | ||
292 | 71 | my ($port) = @_; | ||
293 | 72 | my $pidfile = "$base_pidfile.$port.pid"; | ||
294 | 73 | push @pidfiles, $pidfile; | ||
295 | 74 | |||
296 | 75 | my $pid = fork(); | ||
297 | 76 | if ( $pid == 0 ) { | ||
298 | 77 | my $cmd = "$trunk/bin/pt-heartbeat"; | ||
299 | 78 | exec { $cmd } $cmd, qw(-h 127.0.0.1 -u msandbox -p msandbox -P), $port, | ||
300 | 79 | qw(--database test --table heartbeat --create-table), | ||
301 | 80 | qw(--utc --update --interval 0.5 --pid), $pidfile; | ||
302 | 81 | exit 1; | ||
303 | 82 | } | ||
304 | 83 | push @exec_pids, $pid; | ||
305 | 84 | |||
306 | 85 | PerconaTest::wait_for_files($pidfile); | ||
307 | 86 | ok( | ||
308 | 87 | -f $pidfile, | ||
309 | 88 | "--update on $port started" | ||
310 | 89 | ); | ||
311 | 90 | } | ||
312 | 91 | |||
313 | 92 | sub stop_all_instances { | ||
314 | 93 | my @pids = @exec_pids, map { chomp; $_ } map { slurp_file($_) } @pidfiles; | ||
315 | 94 | diag(`$trunk/bin/pt-heartbeat --stop >/dev/null`); | ||
316 | 95 | |||
317 | 96 | waitpid($_, 0) for @pids; | ||
318 | 97 | PerconaTest::wait_until(sub{ !-e $_ }) for @pidfiles; | ||
319 | 98 | |||
320 | 99 | unlink '/tmp/pt-heartbeat-sentinel'; | ||
321 | 100 | } | ||
322 | 101 | |||
323 | 102 | # ########################################################################### | ||
324 | 103 | # Test --check-master-server-id | ||
325 | 104 | # ########################################################################### | ||
326 | 105 | |||
327 | 106 | # Start --update on both masters. | ||
328 | 107 | #diag("Starting --update instances..."); | ||
329 | 108 | start_update_instance(12345); | ||
330 | 109 | start_update_instance(12348); | ||
331 | 110 | |||
332 | 111 | # Start --monitor on slave currently attached to master 12348. | ||
333 | 112 | my $output_file = "/tmp/pt-heartbeat-monitor.$PID"; | ||
334 | 113 | system("$trunk/bin/pt-heartbeat --monitor h=127.1,P=12349,u=msandbox,p=msandbox -D test --utc --check-master-server-id 1s --interval 0.5 --file $output_file --daemonize --run-time 5"); | ||
335 | 114 | #diag("Waiting for slave monitor to start..."); | ||
336 | 115 | PerconaTest::wait_for_files($output_file); | ||
337 | 116 | |||
338 | 117 | # Slave monitor should report master ID is 12348. | ||
339 | 118 | #diag("Waiting for slave monitor output..."); | ||
340 | 119 | my $output; | ||
341 | 120 | for (1..3) { | ||
342 | 121 | $output = `cat $output_file`; | ||
343 | 122 | if ($output ne "") { | ||
344 | 123 | last; | ||
345 | 124 | } | ||
346 | 125 | sleep(1); | ||
347 | 126 | } | ||
348 | 127 | |||
349 | 128 | like( | ||
350 | 129 | $output, | ||
351 | 130 | qr/12348$/, | ||
352 | 131 | "Monitor sees master 12348" | ||
353 | 132 | ); | ||
354 | 133 | |||
355 | 134 | # Simulate master VIP change by actually changing slave's master. | ||
356 | 135 | #diag("Changing slave's master..."); | ||
357 | 136 | $slave_dbh->do("STOP SLAVE"); | ||
358 | 137 | |||
359 | 138 | $slave_dbh->do("CHANGE MASTER TO master_host='127.0.0.1', master_user='msandbox', master_password='msandbox', master_port=12345, master_log_file='$s->{file}', master_log_pos=$s->{position}"); | ||
360 | 139 | |||
361 | 140 | $slave_dbh->do("START SLAVE"); | ||
362 | 141 | |||
363 | 142 | # Give pt-heartbeat time to detect change. | ||
364 | 143 | #diag("Waiting for pt-heartbeat to check master server ID..."); | ||
365 | 144 | sleep(2); | ||
366 | 145 | |||
367 | 146 | # Slave monitor should report new master ID 12345. | ||
368 | 147 | #diag("Waiting for slave monitor output..."); | ||
369 | 148 | for (1..3) { | ||
370 | 149 | $output = `cat $output_file`; | ||
371 | 150 | if ($output ne "") { | ||
372 | 151 | last; | ||
373 | 152 | } | ||
374 | 153 | sleep(1); | ||
375 | 154 | } | ||
376 | 155 | |||
377 | 156 | like( | ||
378 | 157 | $output, | ||
379 | 158 | qr/12345$/, | ||
380 | 159 | "Monitor changed to master 12345" | ||
381 | 160 | ); | ||
382 | 161 | |||
383 | 162 | #diag("Stopping --update instances..."); | ||
384 | 163 | stop_all_instances(); | ||
385 | 164 | |||
386 | 165 | # ############################################################################# | ||
387 | 166 | # Done. | ||
388 | 167 | # ############################################################################# | ||
389 | 168 | #diag("Stopping extra sandboxes..."); | ||
390 | 169 | diag(`$trunk/sandbox/stop-sandbox 12349 >/dev/null`); | ||
391 | 170 | diag(`$trunk/sandbox/stop-sandbox 12348 >/dev/null`); | ||
392 | 171 | diag(`rm -rf $output_file >/dev/null`); | ||
393 | 172 | $sb->wipe_clean($master1_dbh); | ||
394 | 173 | ok($sb->ok(), "Sandbox servers") or BAIL_OUT(__FILE__ . " broke the sandbox"); | ||
395 | 174 | done_testing; |