Merge lp:~percona-toolkit-dev/percona-toolkit/pt-hearbeat-monitor-master-change into lp:percona-toolkit/2.2

Proposed by Daniel Nichter
Status: Needs review
Proposed branch: lp:~percona-toolkit-dev/percona-toolkit/pt-hearbeat-monitor-master-change
Merge into: lp:percona-toolkit/2.2
Diff against target: 395 lines (+250/-13)
2 files modified
bin/pt-heartbeat (+76/-13)
t/pt-heartbeat/check_master.t (+174/-0)
To merge this branch: bzr merge lp:~percona-toolkit-dev/percona-toolkit/pt-hearbeat-monitor-master-change
Reviewer Review Type Date Requested Status
Daniel Nichter Pending
Review via email: mp+205877@code.launchpad.net
To post a comment you must log in.
595. By Daniel Nichter

Implement --check-master-server-id.

596. By Daniel Nichter

Add more --check-master-server-id docs.

Unmerged revisions

596. By Daniel Nichter

Add more --check-master-server-id docs.

595. By Daniel Nichter

Implement --check-master-server-id.

594. By Daniel Nichter

Add --check-master-server-id and a failing test for it.

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== modified file 'bin/pt-heartbeat'
--- bin/pt-heartbeat 2013-12-18 23:53:01 +0000
+++ bin/pt-heartbeat 2014-02-12 02:05:53 +0000
@@ -4778,6 +4778,7 @@
4778use List::Util qw(min max sum);4778use List::Util qw(min max sum);
4779use Time::HiRes qw(gettimeofday time sleep usleep);4779use Time::HiRes qw(gettimeofday time sleep usleep);
4780use IO::File;4780use IO::File;
4781use Carp qw(confess);
47814782
4782use Percona::Toolkit;4783use Percona::Toolkit;
4783use constant PTDEBUG => $ENV{PTDEBUG} || 0;4784use constant PTDEBUG => $ENV{PTDEBUG} || 0;
@@ -4840,6 +4841,14 @@
4840 if ( !$o->get('stop') && !$o->get('database') ) {4841 if ( !$o->get('stop') && !$o->get('database') ) {
4841 $o->save_error('--database must be specified');4842 $o->save_error('--database must be specified');
4842 }4843 }
4844
4845 if ( $o->get('check-master-server-id') > 0 && !$o->get('monitor') ) {
4846 $o->save_error('--check-master-server-id requires --monitor');
4847 }
4848 }
4849
4850 if ( $o->get('check-master-server-id') > 0 ) {
4851 $o->set('print-master-server-id', 1);
4843 }4852 }
48444853
4845 eval {4854 eval {
@@ -4958,6 +4967,7 @@
49584967
4959 # pk_col and pk_val are used to identify the heartbeat row to update or4968 # pk_col and pk_val are used to identify the heartbeat row to update or
4960 # or monitor.4969 # or monitor.
4970 my ($ms, $master_server_id);
4961 my ($pk_col, $pk_val);4971 my ($pk_col, $pk_val);
4962 if ( $id ) {4972 if ( $id ) {
4963 # Legacy mode: update heartbeat row WHERE id=1 and monitor heartbeat4973 # Legacy mode: update heartbeat row WHERE id=1 and monitor heartbeat
@@ -4973,10 +4983,10 @@
4973 $pk_val = $server_id;4983 $pk_val = $server_id;
4974 }4984 }
4975 else { # monitor or check4985 else { # monitor or check
4976 my $master_server_id = $o->get('master-server-id');4986 $master_server_id = $o->get('master-server-id');
4977 if ( !$master_server_id ) {4987 if ( !$master_server_id ) {
4978 eval {4988 eval {
4979 my $ms = new MasterSlave(4989 $ms = new MasterSlave(
4980 OptionParser => $o,4990 OptionParser => $o,
4981 DSNParser => $dp,4991 DSNParser => $dp,
4982 Quoter => $q,4992 Quoter => $q,
@@ -5077,7 +5087,7 @@
50775087
5078 $heartbeat_sth = $dbh->prepare($heartbeat_sql);5088 $heartbeat_sth = $dbh->prepare($heartbeat_sql);
50795089
5080 my $ro_check = !!$o->get('check-read-only');5090 my $ro_check = $o->get('check-read-only');
5081 $update_heartbeat = sub {5091 $update_heartbeat = sub {
5082 my ($sth) = @_;5092 my ($sth) = @_;
5083 my @vals;5093 my @vals;
@@ -5151,15 +5161,16 @@
5151 . ($dbi_driver eq 'mysql' ? '/*!50038, @@hostname AS host*/' : '')5161 . ($dbi_driver eq 'mysql' ? '/*!50038, @@hostname AS host*/' : '')
5152 . ($id ? "" : ", server_id")5162 . ($id ? "" : ", server_id")
5153 . " FROM $db_tbl "5163 . " FROM $db_tbl "
5154 . "WHERE $pk_col='$pk_val' "5164 . "WHERE $pk_col=? "
5155 . "LIMIT 1";5165 . "LIMIT 1";
5156 PTDEBUG && _d("SELECT SQL:", $heartbeat_sql);5166 PTDEBUG && _d("SELECT SQL:", $heartbeat_sql);
51575167
5158 $heartbeat_sth = $dbh->prepare($heartbeat_sql);5168 $heartbeat_sth = $dbh->prepare($heartbeat_sql);
51595169
5160 $get_delay = sub {5170 $get_delay = sub {
5161 my ($sth) = @_;5171 my ($sth, $id) = @_;
5162 $sth->execute();5172 confess "No master server ID given" unless $id;
5173 $sth->execute($id);
5163 PTDEBUG && _d($sth->{Statement});5174 PTDEBUG && _d($sth->{Statement});
5164 my ($ts, $hostname, $server_id) = $sth->fetchrow_array();5175 my ($ts, $hostname, $server_id) = $sth->fetchrow_array();
5165 my $now = time;5176 my $now = time;
@@ -5176,7 +5187,7 @@
5176 $delay = 0.00 if $delay < 0;5187 $delay = 0.00 if $delay < 0;
51775188
5178 $sth->finish();5189 $sth->finish();
5179 return ($delay, $hostname, $pk_val);5190 return ($delay, $hostname, $server_id);
5180 };5191 };
51815192
5182 # https://bugs.launchpad.net/percona-toolkit/+bug/11633725193 # https://bugs.launchpad.net/percona-toolkit/+bug/1163372
@@ -5194,7 +5205,7 @@
5194 $update_heartbeat->($heartbeat_sth);5205 $update_heartbeat->($heartbeat_sth);
5195 }5206 }
5196 else {5207 else {
5197 $get_delay->($heartbeat_sth);5208 $get_delay->($heartbeat_sth, $pk_val);
5198 }5209 }
5199 $heartbeat_sth->finish();5210 $heartbeat_sth->finish();
52005211
@@ -5224,6 +5235,7 @@
5224 sth => $heartbeat_sth,5235 sth => $heartbeat_sth,
5225 sql => $heartbeat_sql,5236 sql => $heartbeat_sql,
5226 get_delay => $get_delay,5237 get_delay => $get_delay,
5238 server_id => $pk_val,
5227 interval => $interval,5239 interval => $interval,
5228 skew => $skew,5240 skew => $skew,
5229 hires_ts => $hires_ts,5241 hires_ts => $hires_ts,
@@ -5259,6 +5271,9 @@
5259 # ########################################################################5271 # ########################################################################
5260 # Monitor or update the heartbeat table.5272 # Monitor or update the heartbeat table.
5261 # ########################################################################5273 # ########################################################################
5274 my $check_server_id_time = $o->get('check-master-server-id');
5275 my $last_server_id_check = int(time);
5276
5262 my $end = $o->get('run-time') ? int(time + $o->get('run-time')) : 0;5277 my $end = $o->get('run-time') ? int(time + $o->get('run-time')) : 0;
5263 PTDEBUG && _d($end ? ('Will exit at', ts($end)) : 'Running forever');5278 PTDEBUG && _d($end ? ('Will exit at', ts($end)) : 'Running forever');
52645279
@@ -5290,8 +5305,39 @@
5290 }5305 }
52915306
5292 if ( $o->get('monitor') ) {5307 if ( $o->get('monitor') ) {
5308 if ( $check_server_id_time ) {
5309 # Time to --check-master-server-id?
5310 my $now = int(time);
5311 if ( $now - $last_server_id_check >= $check_server_id_time ) {
5312 PTDEBUG && _d("Checking master server id");
5313 eval {
5314 my $master_dsn = $ms->get_master_dsn($dbh, $dsn, $dp)
5315 or die "This server is not a slave";
5316 my $master_dbh = $dp->get_dbh(
5317 $dp->get_cxn_params($master_dsn),
5318 { AutoCommit => 1 }
5319 );
5320 my ($new_master_server_id)
5321 = $master_dbh->selectrow_array('SELECT @@server_id');
5322 $master_dbh->disconnect;
5323 if ( $new_master_server_id
5324 && $new_master_server_id != $master_server_id ) {
5325 PTDEBUG && _d("Master server id changed:",
5326 $master_server_id, "to",
5327 $new_master_server_id);
5328 $pk_val = $new_master_server_id;
5329 }
5330 };
5331 if ( $EVAL_ERROR ) {
5332 PTDEBUG && _d("Error checking master id:", $EVAL_ERROR);
5333 }
5334 $last_server_id_check = $now;
5335 }
5336 }
5337
5293 $heartbeat_sth ||= $dbh->prepare($heartbeat_sql);5338 $heartbeat_sth ||= $dbh->prepare($heartbeat_sql);
5294 my ($delay) = $get_delay->($heartbeat_sth);5339 my ($delay, undef, $server_id)
5340 = $get_delay->($heartbeat_sth, $pk_val);
52955341
5296 unshift @samples, $delay;5342 unshift @samples, $delay;
5297 pop @samples if @samples > $limit;5343 pop @samples if @samples > $limit;
@@ -5302,7 +5348,7 @@
5302 sum(@samples[0 .. $bound-1]) / $_;5348 sum(@samples[0 .. $bound-1]) / $_;
5303 } @$frames;5349 } @$frames;
53045350
5305 my $output = sprintf $format, $delay, @vals, $pk_val;5351 my $output = sprintf $format, $delay, @vals, $server_id;
5306 if ( my $file = $o->get('file') ) { 5352 if ( my $file = $o->get('file') ) {
5307 open my $file, '>', $file5353 open my $file, '>', $file
5308 or die "Can't open $file: $OS_ERROR";5354 or die "Can't open $file: $OS_ERROR";
@@ -5354,11 +5400,11 @@
5354# Check the delay on a single server. Optionally recurse to all its slaves.5400# Check the delay on a single server. Optionally recurse to all its slaves.
5355sub check_delay {5401sub check_delay {
5356 my ( %args ) = @_;5402 my ( %args ) = @_;
5357 my @required_args = qw(dsn dbh sth sql get_delay interval skew OptionParser DSNParser);5403 my @required_args = qw(dsn dbh sth sql get_delay server_id interval skew OptionParser DSNParser);
5358 foreach my $arg ( @required_args ) {5404 foreach my $arg ( @required_args ) {
5359 die "I need a $arg argument" unless $args{$arg};5405 die "I need a $arg argument" unless $args{$arg};
5360 }5406 }
5361 my ($dsn, $dbh, $sth, $sql, $get_delay, $interval, $skew, $o, $dp)5407 my ($dsn, $dbh, $sth, $sql, $get_delay, $server_id, $interval, $skew, $o, $dp)
5362 = @args{@required_args};5408 = @args{@required_args};
5363 PTDEBUG && _d('Checking slave delay');5409 PTDEBUG && _d('Checking slave delay');
53645410
@@ -5411,7 +5457,8 @@
5411 }5457 }
5412 sleep $next_interval - time;5458 sleep $next_interval - time;
5413 PTDEBUG && _d('Woke up at', ts(time));5459 PTDEBUG && _d('Woke up at', ts(time));
5414 my ($delay, $hostname, $master_server_id) = $get_delay->($sth);5460 my ($delay, $hostname, $master_server_id)
5461 = $get_delay->($sth, $server_id);
54155462
5416 if ( $o->get('recurse') ) {5463 if ( $o->get('recurse') ) {
5417 # Must print not only the delay, but the server's hostname if5464 # Must print not only the delay, but the server's hostname if
@@ -5642,6 +5689,8 @@
56425689
5643L<"--daemonize"> and L<"--check"> are mutually exclusive.5690L<"--daemonize"> and L<"--check"> are mutually exclusive.
56445691
5692L<"--master-server-id"> and L<"--check-master-server-id"> are mutually exclusive.
5693
5645This tool accepts additional command-line arguments. Refer to the5694This tool accepts additional command-line arguments. Refer to the
5646L<"SYNOPSIS"> and usage information for details.5695L<"SYNOPSIS"> and usage information for details.
56475696
@@ -5667,6 +5716,20 @@
5667their lag, too. The hostname or IP and port for each slave is printed5716their lag, too. The hostname or IP and port for each slave is printed
5668before its delay. L<"--recurse"> only works with MySQL.5717before its delay. L<"--recurse"> only works with MySQL.
56695718
5719=item --check-master-server-id
5720
5721type: time; default: 0
5722
5723Check master server ID periodically and change L<"--monitor"> to match.
5724The given time should be longer than L<"--interval">. For example,
5725to L<"--monitor"> every 5 seconds and check the master server ID every
57261 minute:
5727
5728 --monitor 5s --check-master-server-id 1m
5729
5730This option implies L<"--print-master-server-id">. When the master server ID
5731changes, the new value is printed.
5732
5670=item --check-read-only5733=item --check-read-only
56715734
5672Check if the server has read_only enabled; If it does, the tool skips doing5735Check if the server has read_only enabled; If it does, the tool skips doing
56735736
=== added file 't/pt-heartbeat/check_master.t'
--- t/pt-heartbeat/check_master.t 1970-01-01 00:00:00 +0000
+++ t/pt-heartbeat/check_master.t 2014-02-12 02:05:53 +0000
@@ -0,0 +1,174 @@
1#!/usr/bin/env perl
2
3BEGIN {
4 die "The PERCONA_TOOLKIT_BRANCH environment variable is not set.\n"
5 unless $ENV{PERCONA_TOOLKIT_BRANCH} && -d $ENV{PERCONA_TOOLKIT_BRANCH};
6 unshift @INC, "$ENV{PERCONA_TOOLKIT_BRANCH}/lib";
7};
8
9use strict;
10use warnings FATAL => 'all';
11use English qw(-no_match_vars);
12use Test::More;
13use Data::Dumper;
14use File::Temp qw(tempfile);
15
16use PerconaTest;
17use Sandbox;
18require "$trunk/bin/pt-heartbeat";
19
20my $dp = new DSNParser(opts=>$dsn_opts);
21my $sb = new Sandbox(basedir => '/tmp', DSNParser => $dp);
22my $master1_dbh = $sb->get_dbh_for('master');
23
24# Standard setup is:
25# 12345 -> 12346 -> 12347
26# So we don't mess with standard servers, start also:
27# 12348 -> 12349
28# Then --update 12345 and 12348, start --monitor on 12349, then change
29# its master to 12345. Disregard the names "master1" and "master2".
30
31#diag("Starting extra sandboxes...");
32
33diag(`$trunk/sandbox/start-sandbox master 12348 >/dev/null`);
34my $master2_dbh = $sb->get_dbh_for('master1');
35
36diag(`$trunk/sandbox/start-sandbox slave 12349 12348 >/dev/null`);
37my $slave_dbh = $sb->get_dbh_for('master2');
38
39if ( !$master1_dbh ) {
40 plan skip_all => 'Cannot connect to sandbox master';
41}
42elsif ( !$master2_dbh ) {
43 plan skip_all => 'Cannot connect to second sandbox master';
44}
45elsif ( !$slave_dbh ) {
46 plan skip_all => 'Cannot connect to sandbox slave1';
47}
48
49$sb->load_file('master', 't/pt-heartbeat/samples/precision-time-table.sql');
50$sb->load_file('master1', 't/pt-heartbeat/samples/precision-time-table.sql');
51
52# Get master relay pos before we create heartbeat table and row,
53# else slave 12349 won't reply these queries and get heartbeat
54# from master 12345.
55my $s = $master1_dbh->selectrow_hashref("SHOW MASTER STATUS");
56
57$master1_dbh->do("INSERT INTO test.heartbeat (ts, server_id) VALUES (UTC_TIMESTAMP(), 12345)");
58$master2_dbh->do("INSERT INTO test.heartbeat (ts, server_id) VALUES (UTC_TIMESTAMP(), 12348)");
59
60# ###########################################################################
61# Helper subs
62# ###########################################################################
63
64my $base_pidfile = (tempfile("/tmp/pt-heartbeat-test.XXXXXXXX", OPEN => 0, UNLINK => 0))[1];
65my $master_port = $sb->port_for('master');
66
67my @exec_pids;
68my @pidfiles;
69
70sub start_update_instance {
71 my ($port) = @_;
72 my $pidfile = "$base_pidfile.$port.pid";
73 push @pidfiles, $pidfile;
74
75 my $pid = fork();
76 if ( $pid == 0 ) {
77 my $cmd = "$trunk/bin/pt-heartbeat";
78 exec { $cmd } $cmd, qw(-h 127.0.0.1 -u msandbox -p msandbox -P), $port,
79 qw(--database test --table heartbeat --create-table),
80 qw(--utc --update --interval 0.5 --pid), $pidfile;
81 exit 1;
82 }
83 push @exec_pids, $pid;
84
85 PerconaTest::wait_for_files($pidfile);
86 ok(
87 -f $pidfile,
88 "--update on $port started"
89 );
90}
91
92sub stop_all_instances {
93 my @pids = @exec_pids, map { chomp; $_ } map { slurp_file($_) } @pidfiles;
94 diag(`$trunk/bin/pt-heartbeat --stop >/dev/null`);
95
96 waitpid($_, 0) for @pids;
97 PerconaTest::wait_until(sub{ !-e $_ }) for @pidfiles;
98
99 unlink '/tmp/pt-heartbeat-sentinel';
100}
101
102# ###########################################################################
103# Test --check-master-server-id
104# ###########################################################################
105
106# Start --update on both masters.
107#diag("Starting --update instances...");
108start_update_instance(12345);
109start_update_instance(12348);
110
111# Start --monitor on slave currently attached to master 12348.
112my $output_file = "/tmp/pt-heartbeat-monitor.$PID";
113system("$trunk/bin/pt-heartbeat --monitor h=127.1,P=12349,u=msandbox,p=msandbox -D test --utc --check-master-server-id 1s --interval 0.5 --file $output_file --daemonize --run-time 5");
114#diag("Waiting for slave monitor to start...");
115PerconaTest::wait_for_files($output_file);
116
117# Slave monitor should report master ID is 12348.
118#diag("Waiting for slave monitor output...");
119my $output;
120for (1..3) {
121 $output = `cat $output_file`;
122 if ($output ne "") {
123 last;
124 }
125 sleep(1);
126}
127
128like(
129 $output,
130 qr/12348$/,
131 "Monitor sees master 12348"
132);
133
134# Simulate master VIP change by actually changing slave's master.
135#diag("Changing slave's master...");
136$slave_dbh->do("STOP SLAVE");
137
138$slave_dbh->do("CHANGE MASTER TO master_host='127.0.0.1', master_user='msandbox', master_password='msandbox', master_port=12345, master_log_file='$s->{file}', master_log_pos=$s->{position}");
139
140$slave_dbh->do("START SLAVE");
141
142# Give pt-heartbeat time to detect change.
143#diag("Waiting for pt-heartbeat to check master server ID...");
144sleep(2);
145
146# Slave monitor should report new master ID 12345.
147#diag("Waiting for slave monitor output...");
148for (1..3) {
149 $output = `cat $output_file`;
150 if ($output ne "") {
151 last;
152 }
153 sleep(1);
154}
155
156like(
157 $output,
158 qr/12345$/,
159 "Monitor changed to master 12345"
160);
161
162#diag("Stopping --update instances...");
163stop_all_instances();
164
165# #############################################################################
166# Done.
167# #############################################################################
168#diag("Stopping extra sandboxes...");
169diag(`$trunk/sandbox/stop-sandbox 12349 >/dev/null`);
170diag(`$trunk/sandbox/stop-sandbox 12348 >/dev/null`);
171diag(`rm -rf $output_file >/dev/null`);
172$sb->wipe_clean($master1_dbh);
173ok($sb->ok(), "Sandbox servers") or BAIL_OUT(__FILE__ . " broke the sandbox");
174done_testing;

Subscribers

People subscribed via source and target branches