Merge lp:~mysql-mmm-core/mysql-mmm/devel into lp:mysql-mmm
- devel
- Merge into trunk-2.x
Proposed by
Pascal Hofmann
| Status: | Merged |
|---|---|
| Merged at revision: | not available |
| Proposed branch: | lp:~mysql-mmm-core/mysql-mmm/devel |
| Merge into: | lp:mysql-mmm |
| Diff against target: |
1821 lines (+914/-368) 13 files modified
doc/mmm_configuration.texi (+23/-0) doc/mmm_control.texi (+18/-95) doc/mmm_monitor.texi (+19/-16) lib/Agent/Agent.pm (+21/-2) lib/Common/Config.pm (+4/-1) lib/Monitor/Checker.pm (+2/-2) lib/Monitor/Commands.pm (+92/-35) lib/Monitor/Monitor.pm (+278/-190) lib/Monitor/NetworkChecker.pm (+18/-15) lib/Monitor/Roles.pm (+136/-2) lib/Monitor/StartupStatus.pm (+298/-0) lib/Monitor/t/Roles.t (+2/-2) sbin/mmm_mond (+3/-8) |
| To merge this branch: | bzr merge lp:~mysql-mmm-core/mysql-mmm/devel |
| Related bugs: |
| Reviewer | Review Type | Date Requested | Status |
|---|---|---|---|
| Pascal Hofmann | Approve | ||
|
Review via email:
|
|||
Commit message
* Added manual mode (bug #531011), wait mode, config values 'mode' and 'wait_for_
* Don't die at startup when no network connection is available - wait for it to appear instead (bug #416572)
* Changed startup behaviour. mmm_mond will only go into passive mode if it detects the active_master_role on more than one host.
* Added config value 'careful_startup' (bug #422549). If set to 0 mmm_mond won't ever switch into passive mode at startup.
* Added check for invalid agent commands (prevents crash when mmmd_mon version 1.x talks to an 2.x agent).
Description of the change
To post a comment you must log in.
Revision history for this message
| Pascal Hofmann (pascalhofmann) : | # |
review:
Approve
Preview Diff
[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
| 1 | === modified file 'doc/mmm_configuration.texi' | |||
| 2 | --- doc/mmm_configuration.texi 2010-02-05 17:27:32 +0000 | |||
| 3 | +++ doc/mmm_configuration.texi 2010-03-09 10:25:26 +0000 | |||
| 4 | @@ -231,6 +231,29 @@ | |||
| 5 | 231 | @item Used by: @tab monitor | 231 | @item Used by: @tab monitor |
| 6 | 232 | @end multitable | 232 | @end multitable |
| 7 | 233 | 233 | ||
| 8 | 234 | @item @strong{careful_startup} | ||
| 9 | 235 | @multitable @columnfractions 0.2 0.8 | ||
| 10 | 236 | @item Description: @tab Startup carefully i.e. switch into passive mode when writer role is configured on multiple hosts. | ||
| 11 | 237 | @item Allowed values: @tab true/yes/1/on false/no/0/off | ||
| 12 | 238 | @item Default value: @tab 0 | ||
| 13 | 239 | @item Used by: @tab monitor | ||
| 14 | 240 | @end multitable | ||
| 15 | 241 | |||
| 16 | 242 | @item @strong{mode} | ||
| 17 | 243 | @multitable @columnfractions 0.2 0.8 | ||
| 18 | 244 | @item Description: @tab Default mode of monitor. | ||
| 19 | 245 | @item Allowed values: @tab active manual wait passive | ||
| 20 | 246 | @item Default value: @tab active | ||
| 21 | 247 | @item Used by: @tab monitor | ||
| 22 | 248 | @end multitable | ||
| 23 | 249 | |||
| 24 | 250 | @item @strong{wait_for_other_master} | ||
| 25 | 251 | @multitable @columnfractions 0.2 0.8 | ||
| 26 | 252 | @item Description: @tab How many seconds to wait for other master to become @code{ONLINE} before switching from mode @code{WAIT} to mode @code{ACTIVE}. 0 = infinite. | ||
| 27 | 253 | @item Default value: @tab 120 | ||
| 28 | 254 | @item Used by: @tab monitor | ||
| 29 | 255 | @end multitable | ||
| 30 | 256 | |||
| 31 | 234 | @end itemize | 257 | @end itemize |
| 32 | 235 | 258 | ||
| 33 | 236 | 259 | ||
| 34 | 237 | 260 | ||
| 35 | === modified file 'doc/mmm_control.texi' | |||
| 36 | --- doc/mmm_control.texi 2010-02-18 14:32:50 +0000 | |||
| 37 | +++ doc/mmm_control.texi 2010-03-09 10:25:26 +0000 | |||
| 38 | @@ -132,7 +132,7 @@ | |||
| 39 | 132 | @end example | 132 | @end example |
| 40 | 133 | 133 | ||
| 41 | 134 | @noindent | 134 | @noindent |
| 43 | 135 | See @ref{Passive mode}. | 135 | See @ref{Modes}. |
| 44 | 136 | 136 | ||
| 45 | 137 | @section @code{set_active} | 137 | @section @code{set_active} |
| 46 | 138 | Switch the monitor into @code{ACTIVE} mode: | 138 | Switch the monitor into @code{ACTIVE} mode: |
| 47 | @@ -143,7 +143,18 @@ | |||
| 48 | 143 | @end example | 143 | @end example |
| 49 | 144 | 144 | ||
| 50 | 145 | @noindent | 145 | @noindent |
| 52 | 146 | See @ref{Passive mode}. | 146 | See @ref{Modes}. |
| 53 | 147 | |||
| 54 | 148 | @section @code{set_manual} | ||
| 55 | 149 | Switch the monitor into @code{MANUAL} mode: | ||
| 56 | 150 | |||
| 57 | 151 | @example | ||
| 58 | 152 | # mmm_control set_manual | ||
| 59 | 153 | OK: Switched into manual mode. | ||
| 60 | 154 | @end example | ||
| 61 | 155 | |||
| 62 | 156 | @noindent | ||
| 63 | 157 | See @ref{Modes}. | ||
| 64 | 147 | 158 | ||
| 65 | 148 | @section @code{set_passive} | 159 | @section @code{set_passive} |
| 66 | 149 | Switch the monitor into @code{PASSIVE} mode: | 160 | Switch the monitor into @code{PASSIVE} mode: |
| 67 | @@ -154,10 +165,10 @@ | |||
| 68 | 154 | @end example | 165 | @end example |
| 69 | 155 | 166 | ||
| 70 | 156 | @noindent | 167 | @noindent |
| 72 | 157 | See @ref{Passive mode}. | 168 | See @ref{Modes}. |
| 73 | 158 | 169 | ||
| 74 | 159 | @section @code{move_role @var{role} @var{host}} | 170 | @section @code{move_role @var{role} @var{host}} |
| 76 | 160 | Used to move an exclusive role between the cluster nodes. This command is available in @code{ACTIVE} mode only. Lets assume the following situation: | 171 | Used to move an exclusive role between the cluster nodes. This command is not available in @code{PASSIVE} mode. Lets assume the following situation: |
| 77 | 161 | 172 | ||
| 78 | 162 | @smallexample | 173 | @smallexample |
| 79 | 163 | # mmm_control show | 174 | # mmm_control show |
| 80 | @@ -179,96 +190,8 @@ | |||
| 81 | 179 | @end smallexample | 190 | @end smallexample |
| 82 | 180 | 191 | ||
| 83 | 181 | @section @code{move_role --force @var{role} @var{host}} | 192 | @section @code{move_role --force @var{role} @var{host}} |
| 85 | 182 | Can be used to move the @var{active_master_role} to a host with state @code{REPLICATION_FAIL} or @code{REPLICATION_BACKLOG}. Use this with caution! This command is available in @code{ACTIVE} mode only. | 193 | Can be used to move the @var{active_master_role} to a host with state @code{REPLICATION_FAIL} or @code{REPLICATION_BACKLOG}. Use this with caution! This command is not available in @code{PASSIVE} mode. |
| 86 | 183 | 194 | ||
| 87 | 184 | @section @code{set_ip @var{ip} @var{host}} | 195 | @section @code{set_ip @var{ip} @var{host}} |
| 178 | 185 | @code{set_ip} can be used to manipulate the roles in @code{PASSIVE} mode. The changes won't be applied until the monitor is switched into @code{ACTIVE} mode via @code{set_active}. | 196 | @code{set_ip} can be used to manipulate the roles in @code{PASSIVE} mode. The changes won't be applied until the monitor is switched into @code{ACTIVE} or @code{manual} mode via @code{set_active} or @code{set_manual}. |
| 179 | 186 | 197 | ||
| 90 | 187 | @* | ||
| 91 | 188 | Let's assume we have our cluster up and running with the following status: | ||
| 92 | 189 | |||
| 93 | 190 | @smallexample | ||
| 94 | 191 | # mmm_control show | ||
| 95 | 192 | db1(192.168.0.31) master/ONLINE. Roles: writer(192.168.0.50) | ||
| 96 | 193 | db2(192.168.0.32) master/ONLINE. Roles: reader(192.168.0.51) | ||
| 97 | 194 | db3(192.168.0.33) slave/ONLINE. Roles: reader(192.168.0.52), reader(192.168.0.53) | ||
| 98 | 195 | @end smallexample | ||
| 99 | 196 | |||
| 100 | 197 | @noindent | ||
| 101 | 198 | Now, several bad things happen: | ||
| 102 | 199 | @enumerate | ||
| 103 | 200 | @item network connection to db1 fails | ||
| 104 | 201 | @item mmm_mond detects that db1 has failed | ||
| 105 | 202 | @item mmm_mond moves the writer role to db2, but can't remove it from db1 (because it can't connect to it) | ||
| 106 | 203 | @item mmm_mond crashes and the status file gets corrupted. | ||
| 107 | 204 | @item network connection to db1 recovers | ||
| 108 | 205 | @item The admin restarts mmm_mond | ||
| 109 | 206 | @end enumerate | ||
| 110 | 207 | |||
| 111 | 208 | mmm_mond has no status information now, and two nodes report, that they have the | ||
| 112 | 209 | @code{writer} role, so mmm_mond doesn't know what it should do and will switch | ||
| 113 | 210 | into @code{PASSIVE} mode. | ||
| 114 | 211 | |||
| 115 | 212 | @smallexample | ||
| 116 | 213 | # mmm_control mode | ||
| 117 | 214 | PASSIVE | ||
| 118 | 215 | |||
| 119 | 216 | # mmm_control show | ||
| 120 | 217 | # --- Monitor is in PASSIVE MODE --- | ||
| 121 | 218 | # Cause: Discrepancies between stored status, agent status and system status during startup. | ||
| 122 | 219 | # | ||
| 123 | 220 | # Stored status: | ||
| 124 | 221 | # db1(192.168.0.31) master/UNKNOWN. Roles: | ||
| 125 | 222 | # db2(192.168.0.32) master/UNKNOWN. Roles: | ||
| 126 | 223 | # db3(192.168.0.33) slave/UNKNOWN. Roles: | ||
| 127 | 224 | # | ||
| 128 | 225 | # Agent status: | ||
| 129 | 226 | # db1 ONLINE. Roles: writer(192.168.0.50). Master: ? | ||
| 130 | 227 | # db2 ONLINE. Roles: writer(192.168.0.50), reader(192.168.0.51). Master: ? | ||
| 131 | 228 | # db3 ONLINE. Roles: reader(192.168.0.52), reader(192.168.0.53). Master: db2 | ||
| 132 | 229 | # | ||
| 133 | 230 | # System status: | ||
| 134 | 231 | # db1 writable. Roles: writer(192.168.0.50) | ||
| 135 | 232 | # db2 writable. Roles: writer(192.168.0.50), reader(192.168.0.51) | ||
| 136 | 233 | # db3 readonly. Roles: reader(192.168.0.52), reader(192.168.0.53) | ||
| 137 | 234 | # | ||
| 138 | 235 | db1(192.168.0.31) master/ONLINE. Roles: writer(192.168.0.50) | ||
| 139 | 236 | db2(192.168.0.32) master/ONLINE. Roles: reader(192.168.0.51) | ||
| 140 | 237 | db3(192.168.0.33) slave/ONLINE. Roles: reader(192.168.0.52), reader(192.168.0.53) | ||
| 141 | 238 | @end smallexample | ||
| 142 | 239 | |||
| 143 | 240 | @noindent | ||
| 144 | 241 | As you see, mmm_mond tries to recover the status as well as possible. But in this situation it's wrong so one must move the writer role to db2 manually: | ||
| 145 | 242 | |||
| 146 | 243 | @smallexample | ||
| 147 | 244 | # mmm_control set_ip 192.168.0.50 db2 | ||
| 148 | 245 | OK: Set role 'writer(192.168.0.50)' to host 'db2'. | ||
| 149 | 246 | @end smallexample | ||
| 150 | 247 | |||
| 151 | 248 | @noindent | ||
| 152 | 249 | Now take a look at the status, everything looks ok: | ||
| 153 | 250 | |||
| 154 | 251 | @smallexample | ||
| 155 | 252 | # mmm_control show | ||
| 156 | 253 | # --- Monitor is in PASSIVE MODE --- | ||
| 157 | 254 | # [...] | ||
| 158 | 255 | db1(192.168.0.31) master/ONLINE. Roles: | ||
| 159 | 256 | db2(192.168.0.32) master/ONLINE. Roles: writer(192.168.0.50), reader(192.168.0.51) | ||
| 160 | 257 | db3(192.168.0.33) slave/ONLINE. Roles: reader(192.168.0.52), reader(192.168.0.53) | ||
| 161 | 258 | @end smallexample | ||
| 162 | 259 | |||
| 163 | 260 | @noindent | ||
| 164 | 261 | Finally switch the monitor into active mode, so that it will apply the roles: | ||
| 165 | 262 | |||
| 166 | 263 | @smallexample | ||
| 167 | 264 | # mmm_control set_active | ||
| 168 | 265 | OK: Switched into active mode. | ||
| 169 | 266 | |||
| 170 | 267 | # mmm_control show | ||
| 171 | 268 | db1(192.168.0.31) master/ONLINE. Roles: reader(192.168.0.51) | ||
| 172 | 269 | db2(192.168.0.32) master/ONLINE. Roles: writer(192.168.0.50) | ||
| 173 | 270 | db3(192.168.0.33) slave/ONLINE. Roles: reader(192.168.0.52), reader(192.168.0.53) | ||
| 174 | 271 | @end smallexample | ||
| 175 | 272 | |||
| 176 | 273 | @* | ||
| 177 | 274 | @strong{Note:} The role @code{reader(192.168.0.51)} has been moved to db1, because @code{reader} is a @code{balanced} role. | ||
| 180 | 275 | 198 | ||
| 181 | === modified file 'doc/mmm_monitor.texi' | |||
| 182 | --- doc/mmm_monitor.texi 2010-02-05 17:27:32 +0000 | |||
| 183 | +++ doc/mmm_monitor.texi 2010-03-09 10:25:26 +0000 | |||
| 184 | @@ -108,7 +108,7 @@ | |||
| 185 | 108 | @end itemize | 108 | @end itemize |
| 186 | 109 | 109 | ||
| 187 | 110 | @noindent | 110 | @noindent |
| 189 | 111 | If the network connection doesn't work during startup, mmm_mond will switch into passive mode (@pxref{Passive mode}). | 111 | If the network connection doesn't work during startup, mmm_mond will delay startup until it's available again. |
| 190 | 112 | 112 | ||
| 191 | 113 | 113 | ||
| 192 | 114 | @node Flapping | 114 | @node Flapping |
| 193 | @@ -130,20 +130,22 @@ | |||
| 194 | 130 | If @var{auto_set_online} is > 0, flapping hosts will automatically be set to @code{ONLINE} | 130 | If @var{auto_set_online} is > 0, flapping hosts will automatically be set to @code{ONLINE} |
| 195 | 131 | after @var{flap_duration} seconds. | 131 | after @var{flap_duration} seconds. |
| 196 | 132 | 132 | ||
| 211 | 133 | @node Passive mode | 133 | @node Modes |
| 212 | 134 | @section Passive mode | 134 | @section Modes |
| 213 | 135 | @cindex passive mode | 135 | @cindex Modes |
| 214 | 136 | 136 | ||
| 215 | 137 | entered if no network connection during startup | 137 | @subsection Active mode |
| 216 | 138 | entered if discrepancies are detected during startup | 138 | The monitor will remove roles from failed hosts and assign them to other hosts automatically. |
| 217 | 139 | entered with set_passive | 139 | @subsection Manual mode |
| 218 | 140 | 140 | The monitor will only distribute @code{balanced} roles across the hosts, but will not remove roles from failed hosts automatically. You can remove roles from failed hosts manually with @code{move_role}. | |
| 219 | 141 | roles can be changed (unclean) with set_ip | 141 | @subsection Wait mode |
| 220 | 142 | changed to active with set_active | 142 | Like @code{MANUAL} mode, but mode will be changed into @code{ACTIVE} mode when both master hosts are @code{ONLINE} or after @code{wait_for_other_master} seconds have elapsed. |
| 221 | 143 | 143 | @subsection Passive mode | |
| 222 | 144 | roles get never changed automatically | 144 | In passive mode the monitor doesn't change roles, update the status file nor send anything to agents. |
| 223 | 145 | nothing is send to agents | 145 | In passive mode you can modify roles (unclean) with @code{set_ip} - the changes won't be applied until the monitor is switched to @code{ACTIVE} or @code{MANUAL} mode with @code{set_active} or @code{set_manual}. |
| 224 | 146 | status file won't be updated | 146 | Passive mode will be entered if conflicting roles are detected during startup. You should then analyze the situation, fix the role information (if needed) and switch into @code{ACTIVE} or @code{MANUAL} mode. |
| 225 | 147 | It also can be entered manually with @code{set_passive}. | ||
| 226 | 148 | |||
| 227 | 147 | 149 | ||
| 228 | 148 | @node Startup | 150 | @node Startup |
| 229 | 149 | @section Startup | 151 | @section Startup |
| 230 | @@ -152,6 +154,7 @@ | |||
| 231 | 152 | @itemize | 154 | @itemize |
| 232 | 153 | 155 | ||
| 233 | 154 | @item Initial network check | 156 | @item Initial network check |
| 234 | 157 | @item If network is down startup will be delayed until it's reachable again. | ||
| 235 | 155 | @item Initial host checks | 158 | @item Initial host checks |
| 236 | 156 | @item reads status information from ... | 159 | @item reads status information from ... |
| 237 | 157 | @itemize @minus | 160 | @itemize @minus |
| 238 | @@ -159,7 +162,7 @@ | |||
| 239 | 159 | @item agents (agent info) | 162 | @item agents (agent info) |
| 240 | 160 | @item hosts (system info) | 163 | @item hosts (system info) |
| 241 | 161 | @end itemize | 164 | @end itemize |
| 243 | 162 | @item If status information doesn't match or network is down @code{PASSIVE} mode will be entered. | 165 | and tries to figure out the cluster status. |
| 244 | 163 | @end itemize | 166 | @end itemize |
| 245 | 164 | 167 | ||
| 246 | 165 | @node Role transition | 168 | @node Role transition |
| 247 | 166 | 169 | ||
| 248 | === modified file 'lib/Agent/Agent.pm' | |||
| 249 | --- lib/Agent/Agent.pm 2009-10-30 07:19:35 +0000 | |||
| 250 | +++ lib/Agent/Agent.pm 2010-03-09 10:25:26 +0000 | |||
| 251 | @@ -4,7 +4,9 @@ | |||
| 252 | 4 | use warnings FATAL => 'all'; | 4 | use warnings FATAL => 'all'; |
| 253 | 5 | use English qw(EVAL_ERROR); | 5 | use English qw(EVAL_ERROR); |
| 254 | 6 | use Algorithm::Diff; | 6 | use Algorithm::Diff; |
| 255 | 7 | use DBI; | ||
| 256 | 7 | use Class::Struct; | 8 | use Class::Struct; |
| 257 | 9 | use Errno qw(EINTR); | ||
| 258 | 8 | use Log::Log4perl qw(:easy); | 10 | use Log::Log4perl qw(:easy); |
| 259 | 9 | use MMM::Common::Role; | 11 | use MMM::Common::Role; |
| 260 | 10 | use MMM::Common::Socket; | 12 | use MMM::Common::Socket; |
| 261 | @@ -81,6 +83,7 @@ | |||
| 262 | 81 | DEBUG "Received Command $cmd"; | 83 | DEBUG "Received Command $cmd"; |
| 263 | 82 | my ($cmd_name, $version, $host, @params) = split('\|', $cmd, -1); | 84 | my ($cmd_name, $version, $host, @params) = split('\|', $cmd, -1); |
| 264 | 83 | 85 | ||
| 265 | 86 | return "ERROR: Invalid command '$cmd'!" unless (defined($host)); | ||
| 266 | 84 | return "ERROR: Invalid hostname in command ($host)! My name is '" . $self->name . "'" if ($host ne $self->name); | 87 | return "ERROR: Invalid hostname in command ($host)! My name is '" . $self->name . "'" if ($host ne $self->name); |
| 267 | 85 | 88 | ||
| 268 | 86 | if ($version > main::MMM_PROTOCOL_VERSION) { | 89 | if ($version > main::MMM_PROTOCOL_VERSION) { |
| 269 | @@ -114,7 +117,23 @@ | |||
| 270 | 114 | sub cmd_get_system_status($) { | 117 | sub cmd_get_system_status($) { |
| 271 | 115 | my $self = shift; | 118 | my $self = shift; |
| 272 | 116 | 119 | ||
| 274 | 117 | # TODO maybe determine and send master info if we are a slave host. | 120 | # determine master info |
| 275 | 121 | my $dsn = sprintf("DBI:mysql:host=%s;port=%s;mysql_connect_timeout=3", $self->ip, $self->mysql_port); | ||
| 276 | 122 | my $eintr = EINTR; | ||
| 277 | 123 | my $master_ip = ''; | ||
| 278 | 124 | |||
| 279 | 125 | my $dbh; | ||
| 280 | 126 | CONNECT: { | ||
| 281 | 127 | DEBUG "Connecting to mysql"; | ||
| 282 | 128 | $dbh = DBI->connect($dsn, $self->mysql_user, $self->mysql_password, { PrintError => 0 }); | ||
| 283 | 129 | unless ($dbh) { | ||
| 284 | 130 | redo CONNECT if ($DBI::err == 2003 && $DBI::errstr =~ /\($eintr\)/); | ||
| 285 | 131 | WARN "Couldn't connect to mysql. Can't determine current master host." . $DBI::err . " " . $DBI::errstr; | ||
| 286 | 132 | } | ||
| 287 | 133 | } | ||
| 288 | 134 | |||
| 289 | 135 | my $slave_status = $dbh->selectrow_hashref('SHOW SLAVE STATUS'); | ||
| 290 | 136 | $master_ip = $slave_status->{Master_Host} if (defined($slave_status)); | ||
| 291 | 118 | 137 | ||
| 292 | 119 | my @roles; | 138 | my @roles; |
| 293 | 120 | foreach my $role (keys(%{$main::config->{role}})) { | 139 | foreach my $role (keys(%{$main::config->{role}})) { |
| 294 | @@ -133,7 +152,7 @@ | |||
| 295 | 133 | return "ERROR: Could not check if MySQL is writable: $res" if ($ret == 255); | 152 | return "ERROR: Could not check if MySQL is writable: $res" if ($ret == 255); |
| 296 | 134 | my $writable = ($ret == 1); | 153 | my $writable = ($ret == 1); |
| 297 | 135 | 154 | ||
| 299 | 136 | my $answer = join('|', ($writable, join(',', @roles))); | 155 | my $answer = join('|', ($writable, join(',', @roles), $master_ip)); |
| 300 | 137 | return "OK: Returning status!|$answer"; | 156 | return "OK: Returning status!|$answer"; |
| 301 | 138 | } | 157 | } |
| 302 | 139 | 158 | ||
| 303 | 140 | 159 | ||
| 304 | === modified file 'lib/Common/Config.pm' | |||
| 305 | --- lib/Common/Config.pm 2010-02-03 09:06:11 +0000 | |||
| 306 | +++ lib/Common/Config.pm 2010-03-09 10:25:26 +0000 | |||
| 307 | @@ -37,7 +37,10 @@ | |||
| 308 | 37 | 'flap_duration' => { 'default' => 60 * 60 }, | 37 | 'flap_duration' => { 'default' => 60 * 60 }, |
| 309 | 38 | 'flap_count' => { 'default' => 3 }, | 38 | 'flap_count' => { 'default' => 3 }, |
| 310 | 39 | 'auto_set_online' => { 'default' => 0 }, | 39 | 'auto_set_online' => { 'default' => 0 }, |
| 312 | 40 | 'kill_host_bin' => { 'default' => 'kill_host' } | 40 | 'kill_host_bin' => { 'default' => 'kill_host' }, |
| 313 | 41 | 'careful_startup' => { 'default' => 1, 'boolean' => 1 }, | ||
| 314 | 42 | 'mode' => { 'default' => 'active', 'values' => ['passive', 'active', 'manual', 'wait'] }, | ||
| 315 | 43 | 'wait_for_other_master' => { 'default' => 120 } | ||
| 316 | 41 | } | 44 | } |
| 317 | 42 | }, | 45 | }, |
| 318 | 43 | 'socket' => { 'create_if_empty' => ['AGENT', 'CONTROL', 'MONITOR'], 'section' => { | 46 | 'socket' => { 'create_if_empty' => ['AGENT', 'CONTROL', 'MONITOR'], 'section' => { |
| 319 | 44 | 47 | ||
| 320 | === modified file 'lib/Monitor/Checker.pm' | |||
| 321 | --- lib/Monitor/Checker.pm 2010-02-08 15:06:09 +0000 | |||
| 322 | +++ lib/Monitor/Checker.pm 2010-03-09 10:25:26 +0000 | |||
| 323 | @@ -184,7 +184,7 @@ | |||
| 324 | 184 | my $self = shift; | 184 | my $self = shift; |
| 325 | 185 | my $name = $self->{name}; | 185 | my $name = $self->{name}; |
| 326 | 186 | 186 | ||
| 328 | 187 | DEBUG "Pinging checker '$name'..."; | 187 | # DEBUG "Pinging checker '$name'..."; |
| 329 | 188 | 188 | ||
| 330 | 189 | my $reader = $self->{reader}; | 189 | my $reader = $self->{reader}; |
| 331 | 190 | my $writer = $self->{writer}; | 190 | my $writer = $self->{writer}; |
| 332 | @@ -202,7 +202,7 @@ | |||
| 333 | 202 | return 0; | 202 | return 0; |
| 334 | 203 | } | 203 | } |
| 335 | 204 | 204 | ||
| 337 | 205 | DEBUG "Checker '$name' is OK ($recv_res)"; | 205 | # DEBUG "Checker '$name' is OK ($recv_res)"; |
| 338 | 206 | return 1; | 206 | return 1; |
| 339 | 207 | } | 207 | } |
| 340 | 208 | 208 | ||
| 341 | 209 | 209 | ||
| 342 | === modified file 'lib/Monitor/Commands.pm' | |||
| 343 | --- lib/Monitor/Commands.pm 2010-03-03 00:34:21 +0000 | |||
| 344 | +++ lib/Monitor/Commands.pm 2010-03-09 10:25:26 +0000 | |||
| 345 | @@ -61,7 +61,7 @@ | |||
| 346 | 61 | my $roles = MMM::Monitor::Roles->instance(); | 61 | my $roles = MMM::Monitor::Roles->instance(); |
| 347 | 62 | 62 | ||
| 348 | 63 | my $ret = ''; | 63 | my $ret = ''; |
| 350 | 64 | if ($monitor->passive) { | 64 | if ($monitor->is_passive) { |
| 351 | 65 | $ret .= "--- Monitor is in PASSIVE MODE ---\n"; | 65 | $ret .= "--- Monitor is in PASSIVE MODE ---\n"; |
| 352 | 66 | $ret .= sprintf("Cause: %s\n", $monitor->passive_info); | 66 | $ret .= sprintf("Cause: %s\n", $monitor->passive_info); |
| 353 | 67 | $ret =~ s/^/# /mg; | 67 | $ret =~ s/^/# /mg; |
| 354 | @@ -193,7 +193,7 @@ | |||
| 355 | 193 | 193 | ||
| 356 | 194 | FATAL "Admin changed state of '$host' from $host_state to ADMIN_OFFLINE"; | 194 | FATAL "Admin changed state of '$host' from $host_state to ADMIN_OFFLINE"; |
| 357 | 195 | $agents->set_state($host, 'ADMIN_OFFLINE'); | 195 | $agents->set_state($host, 'ADMIN_OFFLINE'); |
| 359 | 196 | MMM::Monitor::Roles->instance()->clear_host_roles($host); | 196 | MMM::Monitor::Roles->instance()->clear_roles($host); |
| 360 | 197 | MMM::Monitor::Monitor->instance()->send_agent_status($host); | 197 | MMM::Monitor::Monitor->instance()->send_agent_status($host); |
| 361 | 198 | 198 | ||
| 362 | 199 | return "OK: State of '$host' changed to ADMIN_OFFLINE. Now you can wait some time and check all roles!"; | 199 | return "OK: State of '$host' changed to ADMIN_OFFLINE. Now you can wait some time and check all roles!"; |
| 363 | @@ -203,7 +203,7 @@ | |||
| 364 | 203 | my $ip = shift; | 203 | my $ip = shift; |
| 365 | 204 | my $host = shift; | 204 | my $host = shift; |
| 366 | 205 | 205 | ||
| 368 | 206 | return "ERROR: This command is only allowed in passive mode" unless (MMM::Monitor::Monitor->instance()->passive); | 206 | return "ERROR: This command is only allowed in passive mode" unless (MMM::Monitor::Monitor->instance()->is_passive); |
| 369 | 207 | 207 | ||
| 370 | 208 | my $agents = MMM::Monitor::Agents->instance(); | 208 | my $agents = MMM::Monitor::Agents->instance(); |
| 371 | 209 | my $roles = MMM::Monitor::Roles->instance(); | 209 | my $roles = MMM::Monitor::Roles->instance(); |
| 372 | @@ -239,14 +239,19 @@ | |||
| 373 | 239 | my $role = shift; | 239 | my $role = shift; |
| 374 | 240 | my $host = shift; | 240 | my $host = shift; |
| 375 | 241 | 241 | ||
| 377 | 242 | return "ERROR: This command is only allowed in active mode" if (MMM::Monitor::Monitor->instance()->passive); | 242 | my $monitor = MMM::Monitor::Monitor->instance(); |
| 378 | 243 | return "ERROR: This command is not allowed in passive mode" if ($monitor->is_passive); | ||
| 379 | 243 | 244 | ||
| 380 | 244 | my $agents = MMM::Monitor::Agents->instance(); | 245 | my $agents = MMM::Monitor::Agents->instance(); |
| 381 | 245 | my $roles = MMM::Monitor::Roles->instance(); | 246 | my $roles = MMM::Monitor::Roles->instance(); |
| 382 | 246 | 247 | ||
| 383 | 247 | return "ERROR: Unknown role name '$role'!" unless ($roles->exists($role)); | 248 | return "ERROR: Unknown role name '$role'!" unless ($roles->exists($role)); |
| 384 | 248 | return "ERROR: Unknown host name '$host'!" unless ($agents->exists($host)); | 249 | return "ERROR: Unknown host name '$host'!" unless ($agents->exists($host)); |
| 386 | 249 | return "ERROR: move_role may be used for exclusive roles only!" unless ($roles->is_exclusive($role)); | 250 | |
| 387 | 251 | unless ($roles->is_exclusive($role)) { | ||
| 388 | 252 | $roles->clear_balanced_role($host, $role); | ||
| 389 | 253 | return "OK: Balanced role $role has been removed from host '$host'. Now you can wait some time and check new roles info!"; | ||
| 390 | 254 | } | ||
| 391 | 250 | 255 | ||
| 392 | 251 | my $host_state = $agents->state($host); | 256 | my $host_state = $agents->state($host); |
| 393 | 252 | return "ERROR: Can't move role to host with state $host_state." unless ($host_state eq 'ONLINE'); | 257 | return "ERROR: Can't move role to host with state $host_state." unless ($host_state eq 'ONLINE'); |
| 394 | @@ -261,7 +266,9 @@ | |||
| 395 | 261 | my $agent = MMM::Monitor::Agents->instance()->get($host); | 266 | my $agent = MMM::Monitor::Agents->instance()->get($host); |
| 396 | 262 | return "ERROR: Can't reach agent daemon on '$host'! Can't move roles there!" unless ($agent->cmd_ping()); | 267 | return "ERROR: Can't reach agent daemon on '$host'! Can't move roles there!" unless ($agent->cmd_ping()); |
| 397 | 263 | 268 | ||
| 399 | 264 | return "ERROR: Role '$role' is assigned to preferred host '$old_owner'. Can't move it!" if ($roles->assigned_to_preferred_host($role)); | 269 | if ($monitor->is_active && $roles->assigned_to_preferred_host($role)) { |
| 400 | 270 | return "ERROR: Role '$role' is assigned to preferred host '$old_owner'. Can't move it!"; | ||
| 401 | 271 | } | ||
| 402 | 265 | 272 | ||
| 403 | 266 | my $ip = $roles->get_exclusive_role_ip($role); | 273 | my $ip = $roles->get_exclusive_role_ip($role); |
| 404 | 267 | return "Error: Role $role has no IP." unless ($ip); | 274 | return "Error: Role $role has no IP." unless ($ip); |
| 405 | @@ -272,13 +279,13 @@ | |||
| 406 | 272 | $roles->set_role($role, $ip, $host); | 279 | $roles->set_role($role, $ip, $host); |
| 407 | 273 | 280 | ||
| 408 | 274 | # Notify old host (if is_active_master_role($role) this will make the host non writable) | 281 | # Notify old host (if is_active_master_role($role) this will make the host non writable) |
| 410 | 275 | MMM::Monitor::Monitor->instance()->send_agent_status($old_owner); | 282 | $monitor->send_agent_status($old_owner); |
| 411 | 276 | 283 | ||
| 412 | 277 | # Notify slaves (this will make them switch the master) | 284 | # Notify slaves (this will make them switch the master) |
| 414 | 278 | MMM::Monitor::Monitor->instance()->notify_slaves($host) if ($roles->is_active_master_role($role)); | 285 | $monitor->notify_slaves($host) if ($roles->is_active_master_role($role)); |
| 415 | 279 | 286 | ||
| 416 | 280 | # Notify new host (if is_active_master_role($role) this will make the host writable) | 287 | # Notify new host (if is_active_master_role($role) this will make the host writable) |
| 418 | 281 | MMM::Monitor::Monitor->instance()->send_agent_status($host); | 288 | $monitor->send_agent_status($host); |
| 419 | 282 | 289 | ||
| 420 | 283 | return "OK: Role '$role' has been moved from '$old_owner' to '$host'. Now you can wait some time and check new roles info!"; | 290 | return "OK: Role '$role' has been moved from '$old_owner' to '$host'. Now you can wait some time and check new roles info!"; |
| 421 | 284 | 291 | ||
| 422 | @@ -288,7 +295,8 @@ | |||
| 423 | 288 | my $role = shift; | 295 | my $role = shift; |
| 424 | 289 | my $host = shift; | 296 | my $host = shift; |
| 425 | 290 | 297 | ||
| 427 | 291 | return "ERROR: This command is only allowed in active mode" if (MMM::Monitor::Monitor->instance()->passive); | 298 | my $monitor = MMM::Monitor::Monitor->instance(); |
| 428 | 299 | return "ERROR: This command is not allowed in passive mode" if (MMM::Monitor::Monitor->instance()->is_passive); | ||
| 429 | 292 | 300 | ||
| 430 | 293 | my $agents = MMM::Monitor::Agents->instance(); | 301 | my $agents = MMM::Monitor::Agents->instance(); |
| 431 | 294 | my $roles = MMM::Monitor::Roles->instance(); | 302 | my $roles = MMM::Monitor::Roles->instance(); |
| 432 | @@ -328,12 +336,12 @@ | |||
| 433 | 328 | if (!$checks->rep_threads($old_owner)) { | 336 | if (!$checks->rep_threads($old_owner)) { |
| 434 | 329 | FATAL "State of host '$old_owner' changed from ONLINE to REPLICATION_FAIL (because of move_role --force)"; | 337 | FATAL "State of host '$old_owner' changed from ONLINE to REPLICATION_FAIL (because of move_role --force)"; |
| 435 | 330 | $old_agent->state('REPLICATION_FAIL'); | 338 | $old_agent->state('REPLICATION_FAIL'); |
| 437 | 331 | $roles->clear_host_roles($old_owner); | 339 | $roles->clear_roles($old_owner) if ($monitor->is_active); |
| 438 | 332 | } | 340 | } |
| 439 | 333 | elsif (!$checks->rep_backlog($old_owner)) { | 341 | elsif (!$checks->rep_backlog($old_owner)) { |
| 440 | 334 | FATAL "State of host '$old_owner' changed from ONLINE to REPLICATION_BACKLOG (because of move_role --force)"; | 342 | FATAL "State of host '$old_owner' changed from ONLINE to REPLICATION_BACKLOG (because of move_role --force)"; |
| 441 | 335 | $old_agent->state('REPLICATION_BACKLOG'); | 343 | $old_agent->state('REPLICATION_BACKLOG'); |
| 443 | 336 | $roles->clear_host_roles($old_owner); | 344 | $roles->clear_roles($old_owner) if ($monitor->is_active); |
| 444 | 337 | } | 345 | } |
| 445 | 338 | 346 | ||
| 446 | 339 | # Notify old host (this will make the host non writable) | 347 | # Notify old host (this will make the host non writable) |
| 447 | @@ -352,13 +360,13 @@ | |||
| 448 | 352 | 360 | ||
| 449 | 353 | =item mode | 361 | =item mode |
| 450 | 354 | 362 | ||
| 452 | 355 | Get information about current mode (active or passive) | 363 | Get information about current mode (active, manual or passive) |
| 453 | 356 | 364 | ||
| 454 | 357 | =cut | 365 | =cut |
| 455 | 358 | 366 | ||
| 456 | 359 | sub mode() { | 367 | sub mode() { |
| 459 | 360 | return 'PASSIVE' if (MMM::Monitor::Monitor->instance()->passive); | 368 | my $monitor = MMM::Monitor::Monitor->instance(); |
| 460 | 361 | return 'ACTIVE'; | 369 | return $monitor->get_mode_string(); |
| 461 | 362 | } | 370 | } |
| 462 | 363 | 371 | ||
| 463 | 364 | 372 | ||
| 464 | @@ -369,26 +377,69 @@ | |||
| 465 | 369 | =cut | 377 | =cut |
| 466 | 370 | 378 | ||
| 467 | 371 | sub set_active() { | 379 | sub set_active() { |
| 484 | 372 | return 'OK: Already in active mode.' unless (MMM::Monitor::Monitor->instance()->passive); | 380 | my $monitor = MMM::Monitor::Monitor->instance(); |
| 485 | 373 | 381 | ||
| 486 | 374 | 382 | return 'OK: Already in active mode.' if ($monitor->is_active); | |
| 487 | 375 | # Send status to agents | 383 | |
| 488 | 376 | MMM::Monitor::Monitor->instance()->send_status_to_agents(); | 384 | my $old_mode = $monitor->get_mode_string(); |
| 489 | 377 | 385 | INFO "Admin changed mode from '$old_mode' to 'ACTIVE'"; | |
| 490 | 378 | # Clear 'bad' roles | 386 | |
| 491 | 379 | my $agents = MMM::Monitor::Agents->instance(); | 387 | if ($monitor->is_passive) { |
| 492 | 380 | foreach my $host (keys(%{$main::config->{host}})) { | 388 | $monitor->set_active(); # so that we can send status to agents |
| 493 | 381 | my $agent = $agents->get($host); | 389 | $monitor->cleanup_and_send_status(); |
| 494 | 382 | $agent->cmd_clear_bad_roles(); # TODO check result | 390 | $monitor->passive_info(''); |
| 495 | 383 | } | 391 | } |
| 496 | 384 | 392 | elsif ($monitor->is_manual) { | |
| 497 | 385 | 393 | # remove all roles from hosts which are not ONLINE | |
| 498 | 386 | MMM::Monitor::Monitor->instance()->passive(0); | 394 | my $roles = MMM::Monitor::Roles->instance(); |
| 499 | 387 | MMM::Monitor::Monitor->instance()->passive_info(''); | 395 | my $agents = MMM::Monitor::Agents->instance(); |
| 500 | 396 | my $checks = MMM::Monitor::ChecksStatus->instance(); | ||
| 501 | 397 | foreach my $host (keys(%{$main::config->{host}})) { | ||
| 502 | 398 | my $host_state = $agents->state($host); | ||
| 503 | 399 | next if ($host_state eq 'ONLINE' || $roles->get_host_roles($host) == 0); | ||
| 504 | 400 | my $agent = $agents->get($host); | ||
| 505 | 401 | $roles->clear_roles($host); | ||
| 506 | 402 | my $ret = $monitor->send_agent_status($host); | ||
| 507 | 403 | # next if ($host_state eq 'REPLICATION_FAIL'); | ||
| 508 | 404 | # next if ($host_state eq 'REPLICATION_BACKLOG'); | ||
| 509 | 405 | # NOTE host_state should never be ADMIN_OFFLINE at this point | ||
| 510 | 406 | if (!$ret) { | ||
| 511 | 407 | ERROR sprintf("Can't send offline status notification to '%s' - killing it!", $host); | ||
| 512 | 408 | $monitor->_kill_host($host, $checks->ping($host)); | ||
| 513 | 409 | } | ||
| 514 | 410 | } | ||
| 515 | 411 | } | ||
| 516 | 412 | |||
| 517 | 413 | $monitor->set_active(); | ||
| 518 | 388 | return 'OK: Switched into active mode.'; | 414 | return 'OK: Switched into active mode.'; |
| 519 | 389 | } | 415 | } |
| 520 | 390 | 416 | ||
| 521 | 391 | 417 | ||
| 522 | 418 | =item set_manual | ||
| 523 | 419 | |||
| 524 | 420 | Switch to manual mode. | ||
| 525 | 421 | |||
| 526 | 422 | =cut | ||
| 527 | 423 | |||
| 528 | 424 | sub set_manual() { | ||
| 529 | 425 | my $monitor = MMM::Monitor::Monitor->instance(); | ||
| 530 | 426 | |||
| 531 | 427 | return 'OK: Already in manual mode.' if ($monitor->is_manual); | ||
| 532 | 428 | |||
| 533 | 429 | my $old_mode = $monitor->get_mode_string(); | ||
| 534 | 430 | INFO "Admin changed mode from '$old_mode' to 'MANUAL'"; | ||
| 535 | 431 | |||
| 536 | 432 | if ($monitor->is_passive) { | ||
| 537 | 433 | $monitor->set_manual(); # so that we can send status to agents | ||
| 538 | 434 | $monitor->cleanup_and_send_status(); | ||
| 539 | 435 | $monitor->passive_info(''); | ||
| 540 | 436 | } | ||
| 541 | 437 | |||
| 542 | 438 | $monitor->set_manual(); | ||
| 543 | 439 | return 'OK: Switched into manual mode.'; | ||
| 544 | 440 | } | ||
| 545 | 441 | |||
| 546 | 442 | |||
| 547 | 392 | =item set_passive | 443 | =item set_passive |
| 548 | 393 | 444 | ||
| 549 | 394 | Switch to passive mode. | 445 | Switch to passive mode. |
| 550 | @@ -396,10 +447,15 @@ | |||
| 551 | 396 | =cut | 447 | =cut |
| 552 | 397 | 448 | ||
| 553 | 398 | sub set_passive() { | 449 | sub set_passive() { |
| 558 | 399 | return 'OK: Already in passive mode.' if (MMM::Monitor::Monitor->instance()->passive); | 450 | my $monitor = MMM::Monitor::Monitor->instance(); |
| 559 | 400 | 451 | ||
| 560 | 401 | MMM::Monitor::Monitor->instance()->passive(1); | 452 | return 'OK: Already in passive mode.' if ($monitor->is_passive); |
| 561 | 402 | MMM::Monitor::Monitor->instance()->passive_info('Admin switched to passive mode.'); | 453 | |
| 562 | 454 | my $old_mode = $monitor->get_mode_string(); | ||
| 563 | 455 | INFO "Admin changed mode from '$old_mode' to 'PASSIVE'"; | ||
| 564 | 456 | |||
| 565 | 457 | $monitor->set_passive(); | ||
| 566 | 458 | $monitor->passive_info('Admin switched to passive mode.'); | ||
| 567 | 403 | return 'OK: Switched into passive mode.'; | 459 | return 'OK: Switched into passive mode.'; |
| 568 | 404 | } | 460 | } |
| 569 | 405 | 461 | ||
| 570 | @@ -413,6 +469,7 @@ | |||
| 571 | 413 | set_offline <host> - set host <host> offline | 469 | set_offline <host> - set host <host> offline |
| 572 | 414 | mode - print current mode. | 470 | mode - print current mode. |
| 573 | 415 | set_active - switch into active mode. | 471 | set_active - switch into active mode. |
| 574 | 472 | set_manual - switch into manual mode. | ||
| 575 | 416 | set_passive - switch into passive mode. | 473 | set_passive - switch into passive mode. |
| 576 | 417 | move_role [--force] <role> <host> - move exclusive role <role> to host <host> | 474 | move_role [--force] <role> <host> - move exclusive role <role> to host <host> |
| 577 | 418 | (Only use --force if you know what you are doing!) | 475 | (Only use --force if you know what you are doing!) |
| 578 | 419 | 476 | ||
| 579 | === modified file 'lib/Monitor/Monitor.pm' | |||
| 580 | --- lib/Monitor/Monitor.pm 2010-02-11 01:05:09 +0000 | |||
| 581 | +++ lib/Monitor/Monitor.pm 2010-03-09 10:25:26 +0000 | |||
| 582 | @@ -19,6 +19,7 @@ | |||
| 583 | 19 | use MMM::Monitor::NetworkChecker; | 19 | use MMM::Monitor::NetworkChecker; |
| 584 | 20 | use MMM::Monitor::Role; | 20 | use MMM::Monitor::Role; |
| 585 | 21 | use MMM::Monitor::Roles; | 21 | use MMM::Monitor::Roles; |
| 586 | 22 | use MMM::Monitor::StartupStatus; | ||
| 587 | 22 | 23 | ||
| 588 | 23 | =head1 NAME | 24 | =head1 NAME |
| 589 | 24 | 25 | ||
| 590 | @@ -28,6 +29,11 @@ | |||
| 591 | 28 | 29 | ||
| 592 | 29 | our $VERSION = '0.01'; | 30 | our $VERSION = '0.01'; |
| 593 | 30 | 31 | ||
| 594 | 32 | use constant MMM_MONITOR_MODE_PASSIVE => 0; | ||
| 595 | 33 | use constant MMM_MONITOR_MODE_ACTIVE => 1; | ||
| 596 | 34 | use constant MMM_MONITOR_MODE_MANUAL => 2; | ||
| 597 | 35 | use constant MMM_MONITOR_MODE_WAIT => 3; | ||
| 598 | 36 | |||
| 599 | 31 | use Class::Struct; | 37 | use Class::Struct; |
| 600 | 32 | 38 | ||
| 601 | 33 | sub instance() { | 39 | sub instance() { |
| 602 | @@ -40,12 +46,13 @@ | |||
| 603 | 40 | command_queue => 'Thread::Queue', | 46 | command_queue => 'Thread::Queue', |
| 604 | 41 | result_queue => 'Thread::Queue', | 47 | result_queue => 'Thread::Queue', |
| 605 | 42 | roles => 'MMM::Monitor::Roles', | 48 | roles => 'MMM::Monitor::Roles', |
| 607 | 43 | passive => '$', | 49 | mode => '$', |
| 608 | 44 | passive_info => '$', | 50 | passive_info => '$', |
| 609 | 45 | kill_host_bin => '$' | 51 | kill_host_bin => '$' |
| 610 | 46 | }; | 52 | }; |
| 611 | 47 | 53 | ||
| 612 | 48 | 54 | ||
| 613 | 55 | |||
| 614 | 49 | =head1 FUNCTIONS | 56 | =head1 FUNCTIONS |
| 615 | 50 | 57 | ||
| 616 | 51 | =over 4 | 58 | =over 4 |
| 617 | @@ -59,6 +66,24 @@ | |||
| 618 | 59 | sub init($) { | 66 | sub init($) { |
| 619 | 60 | my $self = shift; | 67 | my $self = shift; |
| 620 | 61 | 68 | ||
| 621 | 69 | #___________________________________________________________________________ | ||
| 622 | 70 | # | ||
| 623 | 71 | # Wait until network connection is available | ||
| 624 | 72 | #___________________________________________________________________________ | ||
| 625 | 73 | |||
| 626 | 74 | INFO "Waiting for network connection..."; | ||
| 627 | 75 | unless (MMM::Monitor::NetworkChecker->wait_for_network()) { | ||
| 628 | 76 | INFO "Received shutdown request while waiting for network connection."; | ||
| 629 | 77 | return 0; | ||
| 630 | 78 | } | ||
| 631 | 79 | INFO "Network connection is available."; | ||
| 632 | 80 | |||
| 633 | 81 | |||
| 634 | 82 | #___________________________________________________________________________ | ||
| 635 | 83 | # | ||
| 636 | 84 | # Create thread queues and other stuff... | ||
| 637 | 85 | #___________________________________________________________________________ | ||
| 638 | 86 | |||
| 639 | 62 | my $agents = MMM::Monitor::Agents->instance(); | 87 | my $agents = MMM::Monitor::Agents->instance(); |
| 640 | 63 | 88 | ||
| 641 | 64 | $self->checker_queue(new Thread::Queue::); | 89 | $self->checker_queue(new Thread::Queue::); |
| 642 | @@ -68,6 +93,23 @@ | |||
| 643 | 68 | $self->roles(MMM::Monitor::Roles->instance()); | 93 | $self->roles(MMM::Monitor::Roles->instance()); |
| 644 | 69 | $self->passive_info(''); | 94 | $self->passive_info(''); |
| 645 | 70 | 95 | ||
| 646 | 96 | if ($main::config->{monitor}->{mode} eq 'active') { | ||
| 647 | 97 | $self->mode(MMM_MONITOR_MODE_ACTIVE); | ||
| 648 | 98 | } | ||
| 649 | 99 | elsif ($main::config->{monitor}->{mode} eq 'manual') { | ||
| 650 | 100 | $self->mode(MMM_MONITOR_MODE_MANUAL); | ||
| 651 | 101 | } | ||
| 652 | 102 | elsif ($main::config->{monitor}->{mode} eq 'wait') { | ||
| 653 | 103 | $self->mode(MMM_MONITOR_MODE_WAIT); | ||
| 654 | 104 | } | ||
| 655 | 105 | elsif ($main::config->{monitor}->{mode} eq 'passive') { | ||
| 656 | 106 | $self->mode(MMM_MONITOR_MODE_PASSIVE); | ||
| 657 | 107 | $self->passive_info('Configured to start up in passive mode.'); | ||
| 658 | 108 | } | ||
| 659 | 109 | else { | ||
| 660 | 110 | LOGDIE "Something very, very strange just happend - dieing..." | ||
| 661 | 111 | } | ||
| 662 | 112 | |||
| 663 | 71 | 113 | ||
| 664 | 72 | #___________________________________________________________________________ | 114 | #___________________________________________________________________________ |
| 665 | 73 | # | 115 | # |
| 666 | @@ -89,14 +131,6 @@ | |||
| 667 | 89 | 131 | ||
| 668 | 90 | my $checks = $self->checks_status; | 132 | my $checks = $self->checks_status; |
| 669 | 91 | 133 | ||
| 670 | 92 | #___________________________________________________________________________ | ||
| 671 | 93 | # | ||
| 672 | 94 | # Go into passive mode if we have no network connection at startup | ||
| 673 | 95 | #___________________________________________________________________________ | ||
| 674 | 96 | |||
| 675 | 97 | $self->passive(!$main::have_net); | ||
| 676 | 98 | $self->passive_info('No network connection during startup.') unless ($main::have_net); | ||
| 677 | 99 | |||
| 678 | 100 | 134 | ||
| 679 | 101 | #___________________________________________________________________________ | 135 | #___________________________________________________________________________ |
| 680 | 102 | # | 136 | # |
| 681 | @@ -108,21 +142,21 @@ | |||
| 682 | 108 | 142 | ||
| 683 | 109 | #___________________________________________________________________________ | 143 | #___________________________________________________________________________ |
| 684 | 110 | # | 144 | # |
| 686 | 111 | # Figure out current status. Go into passive mode if there are discrepancies | 145 | # Fetch stored status, agent status and system status |
| 687 | 112 | #___________________________________________________________________________ | 146 | #___________________________________________________________________________ |
| 688 | 113 | 147 | ||
| 694 | 114 | $agents->load_status(); | 148 | $agents->load_status(); # load stored status |
| 695 | 115 | 149 | ||
| 696 | 116 | my $system_status = {}; | 150 | |
| 697 | 117 | my $agent_status = {}; | 151 | my $startup_status = new MMM::Monitor::StartupStatus; |
| 698 | 118 | my $status = 1; | 152 | |
| 699 | 119 | my $res; | 153 | my $res; |
| 700 | 120 | 154 | ||
| 701 | 121 | foreach my $host (keys(%{$main::config->{host}})) { | 155 | foreach my $host (keys(%{$main::config->{host}})) { |
| 702 | 122 | 156 | ||
| 703 | 123 | my $agent = $agents->get($host); | 157 | my $agent = $agents->get($host); |
| 704 | 124 | my $host_status = 1; | ||
| 705 | 125 | 158 | ||
| 706 | 159 | $startup_status->set_stored_status($host, $agent->state, $agent->roles); | ||
| 707 | 126 | 160 | ||
| 708 | 127 | #_______________________________________________________________________ | 161 | #_______________________________________________________________________ |
| 709 | 128 | # | 162 | # |
| 710 | @@ -132,28 +166,23 @@ | |||
| 711 | 132 | $res = $agent->cmd_get_agent_status(2); | 166 | $res = $agent->cmd_get_agent_status(2); |
| 712 | 133 | 167 | ||
| 713 | 134 | if ($res =~ /^OK/) { | 168 | if ($res =~ /^OK/) { |
| 714 | 135 | |||
| 715 | 136 | my ($msg, $state, $roles_str, $master) = split('\|', $res); | 169 | my ($msg, $state, $roles_str, $master) = split('\|', $res); |
| 716 | 137 | my @roles_str_arr = sort(split(/\,/, $roles_str)); | 170 | my @roles_str_arr = sort(split(/\,/, $roles_str)); |
| 717 | 138 | my @roles; | 171 | my @roles; |
| 718 | 139 | 172 | ||
| 719 | 140 | foreach my $role_str (@roles_str_arr) { | 173 | foreach my $role_str (@roles_str_arr) { |
| 720 | 141 | my $role = MMM::Monitor::Role->from_string($role_str); | 174 | my $role = MMM::Monitor::Role->from_string($role_str); |
| 724 | 142 | if (defined($role)) { | 175 | push(@roles, $role) if (defined($role)); |
| 722 | 143 | push @roles, $role; | ||
| 723 | 144 | } | ||
| 725 | 145 | } | 176 | } |
| 726 | 146 | 177 | ||
| 728 | 147 | $agent_status->{$host} = { state => $state, roles => \@roles, master => $master }; | 178 | $startup_status->set_agent_status($host, $state, \@roles, $master); |
| 729 | 148 | } | 179 | } |
| 730 | 149 | elsif ($agent->state ne 'ADMIN_OFFLINE') { | 180 | elsif ($agent->state ne 'ADMIN_OFFLINE') { |
| 731 | 150 | if ($checks->ping($host) && $checks->mysql($host) && !$agent->agent_down()) { | 181 | if ($checks->ping($host) && $checks->mysql($host) && !$agent->agent_down()) { |
| 732 | 151 | ERROR "Can't reach agent on host '$host'"; | 182 | ERROR "Can't reach agent on host '$host'"; |
| 733 | 152 | $agent->agent_down(1); | 183 | $agent->agent_down(1); |
| 734 | 153 | } | 184 | } |
| 738 | 154 | ERROR "Switching to passive mode: The status of the agent on host '$host' could not be determined (answer was: $res)."; | 185 | ERROR "The status of the agent on host '$host' could not be determined (answer was: $res)."; |
| 736 | 155 | $status = 0; | ||
| 737 | 156 | $host_status = 0; | ||
| 739 | 157 | } | 186 | } |
| 740 | 158 | 187 | ||
| 741 | 159 | 188 | ||
| 742 | @@ -163,180 +192,61 @@ | |||
| 743 | 163 | #_______________________________________________________________________ | 192 | #_______________________________________________________________________ |
| 744 | 164 | 193 | ||
| 745 | 165 | $res = $agent->cmd_get_system_status(2); | 194 | $res = $agent->cmd_get_system_status(2); |
| 746 | 195 | |||
| 747 | 166 | if ($res =~ /^OK/) { | 196 | if ($res =~ /^OK/) { |
| 749 | 167 | my ($msg, $writable, $roles_str) = split('\|', $res); | 197 | my ($msg, $writable, $roles_str, $master_ip) = split('\|', $res); |
| 750 | 168 | my @roles_str_arr = sort(split(/\,/, $roles_str)); | 198 | my @roles_str_arr = sort(split(/\,/, $roles_str)); |
| 751 | 169 | my @roles; | 199 | my @roles; |
| 752 | 200 | |||
| 753 | 170 | foreach my $role_str (@roles_str_arr) { | 201 | foreach my $role_str (@roles_str_arr) { |
| 754 | 171 | my $role = MMM::Monitor::Role->from_string($role_str); | 202 | my $role = MMM::Monitor::Role->from_string($role_str); |
| 757 | 172 | if (defined($role)) { | 203 | push(@roles, $role) if (defined($role)); |
| 758 | 173 | push @roles, $role; | 204 | } |
| 759 | 205 | |||
| 760 | 206 | my $master = ''; | ||
| 761 | 207 | if (defined($master_ip)) { | ||
| 762 | 208 | foreach my $a_host (keys(%{$main::config->{host}})) { | ||
| 763 | 209 | $master = $a_host if ($main::config->{host}->{$a_host}->{ip} eq $master_ip); | ||
| 764 | 174 | } | 210 | } |
| 765 | 175 | } | 211 | } |
| 770 | 176 | $system_status->{$host} = { | 212 | $startup_status->set_system_status($host, $writable, \@roles, $master); |
| 767 | 177 | writable => $writable, | ||
| 768 | 178 | roles => \@roles | ||
| 769 | 179 | }; | ||
| 771 | 180 | } | 213 | } |
| 772 | 181 | elsif ($agent->state ne 'ADMIN_OFFLINE') { | 214 | elsif ($agent->state ne 'ADMIN_OFFLINE') { |
| 773 | 182 | if ($checks->ping($host) && $checks->mysql($host) && !$agent->agent_down()) { | 215 | if ($checks->ping($host) && $checks->mysql($host) && !$agent->agent_down()) { |
| 774 | 183 | ERROR "Can't reach agent on host '$host'"; | 216 | ERROR "Can't reach agent on host '$host'"; |
| 775 | 184 | $agent->agent_down(1); | 217 | $agent->agent_down(1); |
| 776 | 185 | } | 218 | } |
| 907 | 186 | ERROR "Switching to passive mode: The status of the system '$host' could not be determined (answer was: $res)."; | 219 | ERROR "The status of the system '$host' could not be determined (answer was: $res)."; |
| 908 | 187 | $status = 0; | 220 | } |
| 909 | 188 | $host_status = 0; | 221 | } |
| 910 | 189 | 222 | ||
| 911 | 190 | } | 223 | my $conflict = $startup_status->determine_status(); |
| 912 | 191 | 224 | ||
| 913 | 192 | 225 | DEBUG "STATE INFO\n", Data::Dumper->Dump([$startup_status], ['Startup status']); | |
| 914 | 193 | #_______________________________________________________________________ | 226 | INFO $startup_status->to_string(); |
| 915 | 194 | # | 227 | |
| 916 | 195 | # Skip comparison, if we coult not fetch AGENT/SYSTEM status | 228 | foreach my $host (keys(%{$startup_status->{result}})) { |
| 787 | 196 | #_______________________________________________________________________ | ||
| 788 | 197 | |||
| 789 | 198 | next unless (defined($agent_status->{$host})); | ||
| 790 | 199 | next unless (defined($system_status->{$host})); | ||
| 791 | 200 | |||
| 792 | 201 | |||
| 793 | 202 | #_______________________________________________________________________ | ||
| 794 | 203 | # | ||
| 795 | 204 | # Compare agent and system status ... | ||
| 796 | 205 | #_______________________________________________________________________ | ||
| 797 | 206 | |||
| 798 | 207 | if ($agent_status->{$host}->{state} ne 'UNKNOWN' && $agent_status->{$host}->{state} ne $agent->state) { | ||
| 799 | 208 | ERROR "Switching to passive mode: Agent state '", $agent_status->{$host}->{state}, "' differs from stored one '", $agent->state, "' for host '$host'."; | ||
| 800 | 209 | $status = 0; | ||
| 801 | 210 | $host_status = 0; | ||
| 802 | 211 | next; | ||
| 803 | 212 | } | ||
| 804 | 213 | |||
| 805 | 214 | |||
| 806 | 215 | #_______________________________________________________________________ | ||
| 807 | 216 | # | ||
| 808 | 217 | # ... determine if roles differ | ||
| 809 | 218 | #_______________________________________________________________________ | ||
| 810 | 219 | |||
| 811 | 220 | my $changes = 0; | ||
| 812 | 221 | my $diff = new Algorithm::Diff:: ( | ||
| 813 | 222 | $system_status->{$host}->{roles}, | ||
| 814 | 223 | $agent->roles, | ||
| 815 | 224 | { keyGen => \&MMM::Common::Role::to_string } | ||
| 816 | 225 | ); | ||
| 817 | 226 | |||
| 818 | 227 | while ($diff->Next) { | ||
| 819 | 228 | next if ($diff->Same); | ||
| 820 | 229 | |||
| 821 | 230 | ERROR sprintf( | ||
| 822 | 231 | "Switching to passive mode: Roles of host '$host' [%s] differ from stored ones [%s]", | ||
| 823 | 232 | join(', ', @{$system_status->{$host}->{roles}}), | ||
| 824 | 233 | join(', ', @{$agent->roles}) | ||
| 825 | 234 | ); | ||
| 826 | 235 | $status = 0; | ||
| 827 | 236 | $host_status = 0; | ||
| 828 | 237 | last; | ||
| 829 | 238 | } | ||
| 830 | 239 | |||
| 831 | 240 | next unless ($host_status); | ||
| 832 | 241 | foreach my $role (@{$agent->roles}) { | ||
| 833 | 242 | next unless ($self->roles->is_active_master_role($role->name)); | ||
| 834 | 243 | next if ($system_status->{$host}->{writable}); | ||
| 835 | 244 | WARN "Active master $host was not writable at monitor startup. (Don't mind, the host will be made writable soon)" | ||
| 836 | 245 | } | ||
| 837 | 246 | |||
| 838 | 247 | } | ||
| 839 | 248 | |||
| 840 | 249 | DEBUG "STATE INFO\n", Data::Dumper->Dump([$agents, $agent_status, $system_status], ['Stored status', 'Agent status', 'System status']); | ||
| 841 | 250 | |||
| 842 | 251 | |||
| 843 | 252 | #___________________________________________________________________________ | ||
| 844 | 253 | # | ||
| 845 | 254 | # Maybe switch into passive mode? | ||
| 846 | 255 | #___________________________________________________________________________ | ||
| 847 | 256 | |||
| 848 | 257 | unless ($status) { | ||
| 849 | 258 | # Enter PASSIVE MODE | ||
| 850 | 259 | $self->passive(1); | ||
| 851 | 260 | my $agent_status_str = ''; | ||
| 852 | 261 | foreach my $host (sort(keys(%{$agent_status}))) { | ||
| 853 | 262 | $agent_status_str .= sprintf( | ||
| 854 | 263 | " %s %s. Roles: %s. Master: %s\n", | ||
| 855 | 264 | $host, | ||
| 856 | 265 | $agent_status->{$host}->{state}, | ||
| 857 | 266 | scalar(@{$agent_status->{$host}->{roles}}) > 0 ? join(', ', sort(@{$agent_status->{$host}->{roles}})) : 'none', | ||
| 858 | 267 | $agent_status->{$host}->{master} ? $agent_status->{$host}->{master} : '?' | ||
| 859 | 268 | ); | ||
| 860 | 269 | } | ||
| 861 | 270 | my $system_status_str = ''; | ||
| 862 | 271 | foreach my $host (sort(keys(%{$system_status}))) { | ||
| 863 | 272 | $system_status_str .= sprintf( | ||
| 864 | 273 | " %s %s. Roles: %s\n", | ||
| 865 | 274 | $host, | ||
| 866 | 275 | $system_status->{$host}->{writable} ? 'writable' : 'readonly', | ||
| 867 | 276 | scalar(@{$system_status->{$host}->{roles}}) > 0 ? join(', ', sort(@{$system_status->{$host}->{roles}})) : 'none' | ||
| 868 | 277 | ); | ||
| 869 | 278 | } | ||
| 870 | 279 | my $status_str = sprintf("\nStored status:\n%s\nAgent status:\n%s\nSystem status:\n%s", $agents->get_status_info(), $agent_status_str, $system_status_str); | ||
| 871 | 280 | $self->passive_info("Discrepancies between stored status, agent status and system status during startup.\n" . $status_str); | ||
| 872 | 281 | FATAL "Switching to passive mode now. See output of 'mmm_control show' for details."; | ||
| 873 | 282 | INFO $status_str; | ||
| 874 | 283 | |||
| 875 | 284 | foreach my $host (keys(%{$main::config->{host}})) { | ||
| 876 | 285 | my $agent = $agents->get($host); | ||
| 877 | 286 | |||
| 878 | 287 | # Set all unknown hosts to AWAITING_RECOVERY | ||
| 879 | 288 | $agent->state('AWAITING_RECOVERY') if ($agent->state eq 'UNKNOWN'); | ||
| 880 | 289 | |||
| 881 | 290 | next unless ($system_status->{$host}); | ||
| 882 | 291 | next unless (scalar(@{$system_status->{$host}->{roles}})); | ||
| 883 | 292 | # Set status restored from agent systems | ||
| 884 | 293 | $agent->state('ONLINE'); | ||
| 885 | 294 | foreach my $role (@{$system_status->{$host}->{roles}}) { | ||
| 886 | 295 | next unless ($self->roles->exists_ip($role->name, $role->ip)); | ||
| 887 | 296 | next unless ($self->roles->can_handle($role->name, $host)); | ||
| 888 | 297 | $self->roles->set_role($role->name, $role->ip, $host); | ||
| 889 | 298 | } | ||
| 890 | 299 | } | ||
| 891 | 300 | |||
| 892 | 301 | # propagate roles to agent objects | ||
| 893 | 302 | foreach my $host (keys(%{$main::config->{host}})) { | ||
| 894 | 303 | my $agent = $agents->get($host); | ||
| 895 | 304 | my @roles = sort($self->roles->get_host_roles($host)); | ||
| 896 | 305 | $agent->roles(\@roles); | ||
| 897 | 306 | } | ||
| 898 | 307 | |||
| 899 | 308 | WARN "Monitor started in passive mode."; | ||
| 900 | 309 | |||
| 901 | 310 | return; | ||
| 902 | 311 | } | ||
| 903 | 312 | |||
| 904 | 313 | # Stay in ACTIVE MODE | ||
| 905 | 314 | # Everything is okay, apply roles from status file. | ||
| 906 | 315 | foreach my $host (keys(%{$main::config->{host}})) { | ||
| 917 | 316 | my $agent = $agents->get($host); | 229 | my $agent = $agents->get($host); |
| 935 | 317 | 230 | $agent->state($startup_status->{result}->{$host}->{state}); | |
| 936 | 318 | # Set new hosts to AWAITING_RECOVERY | 231 | foreach my $role (@{$startup_status->{result}->{$host}->{roles}}) { |
| 920 | 319 | if ($agent->state eq 'UNKNOWN') { | ||
| 921 | 320 | WARN "Detected new host '$host': Setting its initial state to 'AWAITING_RECOVERY'. Use 'mmm_control set_online $host' to switch it online."; | ||
| 922 | 321 | $agent->state('AWAITING_RECOVERY'); | ||
| 923 | 322 | } | ||
| 924 | 323 | |||
| 925 | 324 | # Apply roles loaded from status file | ||
| 926 | 325 | foreach my $role (@{$agent->roles}) { | ||
| 927 | 326 | unless ($self->roles->exists_ip($role->name, $role->ip)) { | ||
| 928 | 327 | WARN "Detected change in role definitions: Role '$role' was removed."; | ||
| 929 | 328 | next; | ||
| 930 | 329 | } | ||
| 931 | 330 | unless ($self->roles->can_handle($role->name, $host)) { | ||
| 932 | 331 | WARN "Detected change in role definitions: Host '$host' can't handle role '$role' anymore."; | ||
| 933 | 332 | next; | ||
| 934 | 333 | } | ||
| 937 | 334 | $self->roles->set_role($role->name, $role->ip, $host); | 232 | $self->roles->set_role($role->name, $role->ip, $host); |
| 938 | 335 | } | 233 | } |
| 939 | 336 | } | 234 | } |
| 940 | 337 | 235 | ||
| 943 | 338 | INFO "Monitor started in active mode." unless ($self->passive); | 236 | if ($conflict && $main::config->{monitor}->{careful_startup}) { |
| 944 | 339 | WARN "Monitor started in passive mode." if ($self->passive); | 237 | $self->set_passive(); |
| 945 | 238 | $self->passive_info("Conflicting roles during startup:\n\n" . $startup_status->to_string()); | ||
| 946 | 239 | } | ||
| 947 | 240 | elsif (!$self->is_passive) { | ||
| 948 | 241 | $self->cleanup_and_send_status(); | ||
| 949 | 242 | } | ||
| 950 | 243 | |||
| 951 | 244 | INFO "Monitor started in active mode." if ($self->mode == MMM_MONITOR_MODE_ACTIVE); | ||
| 952 | 245 | INFO "Monitor started in manual mode." if ($self->mode == MMM_MONITOR_MODE_MANUAL); | ||
| 953 | 246 | INFO "Monitor started in wait mode." if ($self->mode == MMM_MONITOR_MODE_WAIT); | ||
| 954 | 247 | INFO "Monitor started in passive mode." if ($self->mode == MMM_MONITOR_MODE_PASSIVE); | ||
| 955 | 248 | |||
| 956 | 249 | return 1; | ||
| 957 | 340 | } | 250 | } |
| 958 | 341 | 251 | ||
| 959 | 342 | sub check_master_configuration($) { | 252 | sub check_master_configuration($) { |
| 960 | @@ -507,7 +417,7 @@ | |||
| 961 | 507 | 417 | ||
| 962 | 508 | foreach my $host (keys(%{$main::config->{host}})) { | 418 | foreach my $host (keys(%{$main::config->{host}})) { |
| 963 | 509 | 419 | ||
| 965 | 510 | $agents->save_status() unless ($self->passive); | 420 | $agents->save_status() unless ($self->is_passive); |
| 966 | 511 | 421 | ||
| 967 | 512 | my $agent = $agents->get($host); | 422 | my $agent = $agents->get($host); |
| 968 | 513 | my $state = $agent->state; | 423 | my $state = $agent->state; |
| 969 | @@ -539,7 +449,8 @@ | |||
| 970 | 539 | unless ($ping && $mysql) { | 449 | unless ($ping && $mysql) { |
| 971 | 540 | FATAL sprintf("State of host '%s' changed from %s to HARD_OFFLINE (ping: %s, mysql: %s)", $host, $state, ($ping? 'OK' : 'not OK'), ($mysql? 'OK' : 'not OK')); | 450 | FATAL sprintf("State of host '%s' changed from %s to HARD_OFFLINE (ping: %s, mysql: %s)", $host, $state, ($ping? 'OK' : 'not OK'), ($mysql? 'OK' : 'not OK')); |
| 972 | 541 | $agent->state('HARD_OFFLINE'); | 451 | $agent->state('HARD_OFFLINE'); |
| 974 | 542 | $self->roles->clear_host_roles($host); | 452 | next if ($self->is_manual); |
| 975 | 453 | $self->roles->clear_roles($host); | ||
| 976 | 543 | if (!$self->send_agent_status($host)) { | 454 | if (!$self->send_agent_status($host)) { |
| 977 | 544 | ERROR sprintf("Can't send offline status notification to '%s' - killing it!", $host); | 455 | ERROR sprintf("Can't send offline status notification to '%s' - killing it!", $host); |
| 978 | 545 | $self->_kill_host($host, $checks->ping($host)); | 456 | $self->_kill_host($host, $checks->ping($host)); |
| 979 | @@ -557,8 +468,12 @@ | |||
| 980 | 557 | if ($ping && $mysql && !$rep_threads && $peer_state eq 'ONLINE' && $checks->ping($peer) && $checks->mysql($peer)) { | 468 | if ($ping && $mysql && !$rep_threads && $peer_state eq 'ONLINE' && $checks->ping($peer) && $checks->mysql($peer)) { |
| 981 | 558 | FATAL "State of host '$host' changed from $state to REPLICATION_FAIL"; | 469 | FATAL "State of host '$host' changed from $state to REPLICATION_FAIL"; |
| 982 | 559 | $agent->state('REPLICATION_FAIL'); | 470 | $agent->state('REPLICATION_FAIL'); |
| 985 | 560 | $self->roles->clear_host_roles($host); | 471 | next if ($self->is_manual); |
| 986 | 561 | $self->send_agent_status($host); | 472 | $self->roles->clear_roles($host); |
| 987 | 473 | if (!$self->send_agent_status($host)) { | ||
| 988 | 474 | ERROR sprintf("Can't send offline status notification to '%s' - killing it!", $host); | ||
| 989 | 475 | $self->_kill_host($host, $checks->ping($host)); | ||
| 990 | 476 | } | ||
| 991 | 562 | next; | 477 | next; |
| 992 | 563 | } | 478 | } |
| 993 | 564 | 479 | ||
| 994 | @@ -566,8 +481,12 @@ | |||
| 995 | 566 | if ($ping && $mysql && !$rep_backlog && $rep_threads && $peer_state eq 'ONLINE' && $checks->ping($peer) && $checks->mysql($peer)) { | 481 | if ($ping && $mysql && !$rep_backlog && $rep_threads && $peer_state eq 'ONLINE' && $checks->ping($peer) && $checks->mysql($peer)) { |
| 996 | 567 | FATAL "State of host '$host' changed from $state to REPLICATION_DELAY"; | 482 | FATAL "State of host '$host' changed from $state to REPLICATION_DELAY"; |
| 997 | 568 | $agent->state('REPLICATION_DELAY'); | 483 | $agent->state('REPLICATION_DELAY'); |
| 1000 | 569 | $self->roles->clear_host_roles($host); | 484 | next if ($self->is_manual); |
| 1001 | 570 | $self->send_agent_status($host); | 485 | $self->roles->clear_roles($host); |
| 1002 | 486 | if (!$self->send_agent_status($host)) { | ||
| 1003 | 487 | ERROR sprintf("Can't send offline status notification to '%s' - killing it!", $host); | ||
| 1004 | 488 | $self->_kill_host($host, $checks->ping($host)); | ||
| 1005 | 489 | } | ||
| 1006 | 571 | next; | 490 | next; |
| 1007 | 572 | } | 491 | } |
| 1008 | 573 | next; | 492 | next; |
| 1009 | @@ -711,7 +630,47 @@ | |||
| 1010 | 711 | next; | 630 | next; |
| 1011 | 712 | } | 631 | } |
| 1012 | 713 | } | 632 | } |
| 1014 | 714 | $agents->save_status() unless ($self->passive); | 633 | |
| 1015 | 634 | if ($self->mode == MMM_MONITOR_MODE_WAIT) { | ||
| 1016 | 635 | my $master_one = $self->roles->get_first_master(); | ||
| 1017 | 636 | my $master_two = $self->roles->get_second_master(); | ||
| 1018 | 637 | my $state_one = $agents->state($master_one); | ||
| 1019 | 638 | my $state_two = $agents->state($master_two); | ||
| 1020 | 639 | |||
| 1021 | 640 | if ($state_one eq 'ONLINE' && $state_two eq 'ONLINE') { | ||
| 1022 | 641 | INFO "Nodes $master_one and $master_two are ONLINE, switching from mode 'WAIT' to 'ACTIVE'."; | ||
| 1023 | 642 | $self->set_active(); | ||
| 1024 | 643 | } | ||
| 1025 | 644 | elsif ($main::config->{monitor}->{wait_for_other_master} > 0 && ($state_one eq 'ONLINE' || $state_two eq 'ONLINE')) { | ||
| 1026 | 645 | my $living_master = $state_one eq 'ONLINE' ? $master_one : $master_two; | ||
| 1027 | 646 | my $dead_master = $state_one eq 'ONLINE' ? $master_two : $master_one; | ||
| 1028 | 647 | |||
| 1029 | 648 | if ($main::config->{monitor}->{wait_for_other_master} <= time() - $agents->online_since($living_master)) { | ||
| 1030 | 649 | $self->set_active(); | ||
| 1031 | 650 | WARN sprintf("Master $dead_master did not come online for %d(wait_for_other_master) seconds. Switching from mode 'WAIT' to 'ACTIVE'", $main::config->{monitor}->{wait_for_other_master}); | ||
| 1032 | 651 | } | ||
| 1033 | 652 | |||
| 1034 | 653 | } | ||
| 1035 | 654 | if ($self->is_active) { | ||
| 1036 | 655 | # cleanup | ||
| 1037 | 656 | foreach my $host (keys(%{$main::config->{host}})) { | ||
| 1038 | 657 | my $host_state = $agents->state($host); | ||
| 1039 | 658 | next if ($host_state eq 'ONLINE' || $self->roles->get_host_roles($host) == 0); | ||
| 1040 | 659 | my $agent = $agents->get($host); | ||
| 1041 | 660 | $self->roles->clear_roles($host); | ||
| 1042 | 661 | my $ret = $self->send_agent_status($host); | ||
| 1043 | 662 | # next if ($host_state eq 'REPLICATION_FAIL'); | ||
| 1044 | 663 | # next if ($host_state eq 'REPLICATION_BACKLOG'); | ||
| 1045 | 664 | # NOTE host_state should never be ADMIN_OFFLINE at this point | ||
| 1046 | 665 | if (!$ret) { | ||
| 1047 | 666 | ERROR sprintf("Can't send offline status notification to '%s' - killing it!", $host); | ||
| 1048 | 667 | $self->_kill_host($host, $checks->ping($host)); | ||
| 1049 | 668 | } | ||
| 1050 | 669 | } | ||
| 1051 | 670 | } | ||
| 1052 | 671 | } | ||
| 1053 | 672 | |||
| 1054 | 673 | $agents->save_status() unless ($self->is_passive); | ||
| 1055 | 715 | } | 674 | } |
| 1056 | 716 | 675 | ||
| 1057 | 717 | 676 | ||
| 1058 | @@ -725,7 +684,7 @@ | |||
| 1059 | 725 | my $self = shift; | 684 | my $self = shift; |
| 1060 | 726 | 685 | ||
| 1061 | 727 | # Never change roles if we are in PASSIVE mode | 686 | # Never change roles if we are in PASSIVE mode |
| 1063 | 728 | return if ($self->passive); | 687 | return if ($self->is_passive); |
| 1064 | 729 | 688 | ||
| 1065 | 730 | my $old_active_master = $self->roles->get_active_master(); | 689 | my $old_active_master = $self->roles->get_active_master(); |
| 1066 | 731 | 690 | ||
| 1067 | @@ -734,7 +693,7 @@ | |||
| 1068 | 734 | $self->roles->process_orphans('balanced'); | 693 | $self->roles->process_orphans('balanced'); |
| 1069 | 735 | 694 | ||
| 1070 | 736 | # obey preferences | 695 | # obey preferences |
| 1072 | 737 | $self->roles->obey_preferences(); | 696 | $self->roles->obey_preferences() if ($self->is_active); |
| 1073 | 738 | 697 | ||
| 1074 | 739 | # Balance roles | 698 | # Balance roles |
| 1075 | 740 | $self->roles->balance(); | 699 | $self->roles->balance(); |
| 1076 | @@ -749,6 +708,46 @@ | |||
| 1077 | 749 | } | 708 | } |
| 1078 | 750 | 709 | ||
| 1079 | 751 | 710 | ||
| 1080 | 711 | =item cleanup_and_send_status() | ||
| 1081 | 712 | |||
| 1082 | 713 | Send status information to all agents and clean up old roles. | ||
| 1083 | 714 | |||
| 1084 | 715 | =cut | ||
| 1085 | 716 | sub cleanup_and_send_status($) { | ||
| 1086 | 717 | my $self = shift; | ||
| 1087 | 718 | |||
| 1088 | 719 | my $agents = MMM::Monitor::Agents->instance(); | ||
| 1089 | 720 | my $roles = MMM::Monitor::Roles->instance(); | ||
| 1090 | 721 | |||
| 1091 | 722 | my $active_master = $roles->get_active_master(); | ||
| 1092 | 723 | my $passive_master = $roles->get_passive_master(); | ||
| 1093 | 724 | |||
| 1094 | 725 | # Notify passive master first | ||
| 1095 | 726 | if ($passive_master ne '') { | ||
| 1096 | 727 | my $host = $passive_master; | ||
| 1097 | 728 | $self->send_agent_status($host); | ||
| 1098 | 729 | my $agent = $agents->get($host); | ||
| 1099 | 730 | $agent->cmd_clear_bad_roles(); # TODO check result | ||
| 1100 | 731 | } | ||
| 1101 | 732 | |||
| 1102 | 733 | # Notify all slave hosts | ||
| 1103 | 734 | foreach my $host (keys(%{$main::config->{host}})) { | ||
| 1104 | 735 | next if ($self->roles->is_master($host)); | ||
| 1105 | 736 | $self->send_agent_status($host); | ||
| 1106 | 737 | my $agent = $agents->get($host); | ||
| 1107 | 738 | $agent->cmd_clear_bad_roles(); # TODO check result | ||
| 1108 | 739 | } | ||
| 1109 | 740 | |||
| 1110 | 741 | # Notify active master at the end | ||
| 1111 | 742 | if ($active_master ne '') { | ||
| 1112 | 743 | my $host = $active_master; | ||
| 1113 | 744 | $self->send_agent_status($host); | ||
| 1114 | 745 | my $agent = $agents->get($host); | ||
| 1115 | 746 | $agent->cmd_clear_bad_roles(); # TODO check result | ||
| 1116 | 747 | } | ||
| 1117 | 748 | } | ||
| 1118 | 749 | |||
| 1119 | 750 | |||
| 1120 | 752 | =item send_status_to_agents | 751 | =item send_status_to_agents |
| 1121 | 753 | 752 | ||
| 1122 | 754 | Send status information to all agents. | 753 | Send status information to all agents. |
| 1123 | @@ -797,7 +796,7 @@ | |||
| 1124 | 797 | 796 | ||
| 1125 | 798 | # Never send anything to agents if we are in PASSIVE mode | 797 | # Never send anything to agents if we are in PASSIVE mode |
| 1126 | 799 | # Never send anything to agents if we have no network connection | 798 | # Never send anything to agents if we have no network connection |
| 1128 | 800 | return if ($self->passive || !$main::have_net); | 799 | return if ($self->is_passive || !$main::have_net); |
| 1129 | 801 | 800 | ||
| 1130 | 802 | # Determine active master if it was not passed | 801 | # Determine active master if it was not passed |
| 1131 | 803 | $master = $self->roles->get_active_master() unless (defined($master)); | 802 | $master = $self->roles->get_active_master() unless (defined($master)); |
| 1132 | @@ -903,6 +902,7 @@ | |||
| 1133 | 903 | elsif ($command eq 'mode' && $arg_cnt == 0) { $res = MMM::Monitor::Commands::mode(); } | 902 | elsif ($command eq 'mode' && $arg_cnt == 0) { $res = MMM::Monitor::Commands::mode(); } |
| 1134 | 904 | elsif ($command eq 'set_active' && $arg_cnt == 0) { $res = MMM::Monitor::Commands::set_active(); } | 903 | elsif ($command eq 'set_active' && $arg_cnt == 0) { $res = MMM::Monitor::Commands::set_active(); } |
| 1135 | 905 | elsif ($command eq 'set_passive' && $arg_cnt == 0) { $res = MMM::Monitor::Commands::set_passive(); } | 904 | elsif ($command eq 'set_passive' && $arg_cnt == 0) { $res = MMM::Monitor::Commands::set_passive(); } |
| 1136 | 905 | elsif ($command eq 'set_manual' && $arg_cnt == 0) { $res = MMM::Monitor::Commands::set_manual(); } | ||
| 1137 | 906 | elsif ($command eq 'set_online' && $arg_cnt == 1) { $res = MMM::Monitor::Commands::set_online ($args[0]); } | 906 | elsif ($command eq 'set_online' && $arg_cnt == 1) { $res = MMM::Monitor::Commands::set_online ($args[0]); } |
| 1138 | 907 | elsif ($command eq 'set_offline' && $arg_cnt == 1) { $res = MMM::Monitor::Commands::set_offline($args[0]); } | 907 | elsif ($command eq 'set_offline' && $arg_cnt == 1) { $res = MMM::Monitor::Commands::set_offline($args[0]); } |
| 1139 | 908 | elsif ($command eq 'move_role' && $arg_cnt == 2) { $res = MMM::Monitor::Commands::move_role($args[0], $args[1]); } | 908 | elsif ($command eq 'move_role' && $arg_cnt == 2) { $res = MMM::Monitor::Commands::move_role($args[0], $args[1]); } |
| 1140 | @@ -917,5 +917,93 @@ | |||
| 1141 | 917 | } | 917 | } |
| 1142 | 918 | } | 918 | } |
| 1143 | 919 | 919 | ||
| 1144 | 920 | |||
| 1145 | 921 | =item is_active() | ||
| 1146 | 922 | |||
| 1147 | 923 | Check if monitor is in active mode | ||
| 1148 | 924 | |||
| 1149 | 925 | =cut | ||
| 1150 | 926 | |||
| 1151 | 927 | sub is_active($$) { | ||
| 1152 | 928 | my $self = shift; | ||
| 1153 | 929 | return ($self->mode == MMM_MONITOR_MODE_ACTIVE); | ||
| 1154 | 930 | } | ||
| 1155 | 931 | |||
| 1156 | 932 | |||
| 1157 | 933 | =item is_manual() | ||
| 1158 | 934 | |||
| 1159 | 935 | Check if monitor is in manual mode | ||
| 1160 | 936 | |||
| 1161 | 937 | =cut | ||
| 1162 | 938 | |||
| 1163 | 939 | sub is_manual($$) { | ||
| 1164 | 940 | my $self = shift; | ||
| 1165 | 941 | return ($self->mode == MMM_MONITOR_MODE_MANUAL || $self->mode == MMM_MONITOR_MODE_WAIT); | ||
| 1166 | 942 | } | ||
| 1167 | 943 | |||
| 1168 | 944 | |||
| 1169 | 945 | =item is_passive() | ||
| 1170 | 946 | |||
| 1171 | 947 | Check if monitor is in passive mode | ||
| 1172 | 948 | |||
| 1173 | 949 | =cut | ||
| 1174 | 950 | |||
| 1175 | 951 | sub is_passive($$) { | ||
| 1176 | 952 | my $self = shift; | ||
| 1177 | 953 | return ($self->mode == MMM_MONITOR_MODE_PASSIVE); | ||
| 1178 | 954 | } | ||
| 1179 | 955 | |||
| 1180 | 956 | |||
| 1181 | 957 | =item set_active() | ||
| 1182 | 958 | |||
| 1183 | 959 | Set mode to active | ||
| 1184 | 960 | |||
| 1185 | 961 | =cut | ||
| 1186 | 962 | |||
| 1187 | 963 | sub set_active($$) { | ||
| 1188 | 964 | my $self = shift; | ||
| 1189 | 965 | $self->mode(MMM_MONITOR_MODE_ACTIVE); | ||
| 1190 | 966 | } | ||
| 1191 | 967 | |||
| 1192 | 968 | |||
| 1193 | 969 | =item set_manual() | ||
| 1194 | 970 | |||
| 1195 | 971 | Set mode to manual | ||
| 1196 | 972 | |||
| 1197 | 973 | =cut | ||
| 1198 | 974 | |||
| 1199 | 975 | sub set_manual($$) { | ||
| 1200 | 976 | my $self = shift; | ||
| 1201 | 977 | $self->mode(MMM_MONITOR_MODE_MANUAL); | ||
| 1202 | 978 | } | ||
| 1203 | 979 | |||
| 1204 | 980 | |||
| 1205 | 981 | =item set_passive() | ||
| 1206 | 982 | |||
| 1207 | 983 | Set mode to passive | ||
| 1208 | 984 | |||
| 1209 | 985 | =cut | ||
| 1210 | 986 | |||
| 1211 | 987 | sub set_passive($$) { | ||
| 1212 | 988 | my $self = shift; | ||
| 1213 | 989 | $self->mode(MMM_MONITOR_MODE_PASSIVE); | ||
| 1214 | 990 | } | ||
| 1215 | 991 | |||
| 1216 | 992 | |||
| 1217 | 993 | =item get_mode_string() | ||
| 1218 | 994 | |||
| 1219 | 995 | Get string representation of current mode | ||
| 1220 | 996 | |||
| 1221 | 997 | =cut | ||
| 1222 | 998 | |||
| 1223 | 999 | sub get_mode_string($) { | ||
| 1224 | 1000 | my $self = shift; | ||
| 1225 | 1001 | return 'ACTIVE' if ($self->mode == MMM_MONITOR_MODE_ACTIVE); | ||
| 1226 | 1002 | return 'MANUAL' if ($self->mode == MMM_MONITOR_MODE_MANUAL); | ||
| 1227 | 1003 | return 'WAIT' if ($self->mode == MMM_MONITOR_MODE_WAIT); | ||
| 1228 | 1004 | return 'PASSIVE' if ($self->mode == MMM_MONITOR_MODE_PASSIVE); | ||
| 1229 | 1005 | return 'UNKNOWN'; # should never happen | ||
| 1230 | 1006 | } | ||
| 1231 | 1007 | |||
| 1232 | 920 | 1; | 1008 | 1; |
| 1233 | 921 | 1009 | ||
| 1234 | 922 | 1010 | ||
| 1235 | === modified file 'lib/Monitor/NetworkChecker.pm' | |||
| 1236 | --- lib/Monitor/NetworkChecker.pm 2009-02-10 08:18:57 +0000 | |||
| 1237 | +++ lib/Monitor/NetworkChecker.pm 2010-03-09 10:25:26 +0000 | |||
| 1238 | @@ -54,29 +54,32 @@ | |||
| 1239 | 54 | $checker->shutdown(); | 54 | $checker->shutdown(); |
| 1240 | 55 | } | 55 | } |
| 1241 | 56 | 56 | ||
| 1243 | 57 | sub initial_check() { | 57 | sub wait_for_network() { |
| 1244 | 58 | my @ips = @{$main::config->{monitor}->{ping_ips}}; | 58 | my @ips = @{$main::config->{monitor}->{ping_ips}}; |
| 1245 | 59 | my $state = 0; | ||
| 1246 | 60 | 59 | ||
| 1247 | 61 | # Create checker | 60 | # Create checker |
| 1248 | 62 | my $checker = new MMM::Monitor::Checker::('ping_ip'); | 61 | my $checker = new MMM::Monitor::Checker::('ping_ip'); |
| 1249 | 63 | 62 | ||
| 1260 | 64 | # Ping all ips | 63 | while (!$main::shutdown) { |
| 1261 | 65 | foreach my $ip (@ips) { | 64 | # Ping all ips |
| 1262 | 66 | # Ping checker | 65 | foreach my $ip (@ips) { |
| 1263 | 67 | $checker->spawn() unless $checker->ping(); | 66 | last if ($main::shutdown); |
| 1264 | 68 | 67 | # Ping checker | |
| 1265 | 69 | my $res = $checker->check($ip); | 68 | $checker->spawn() unless $checker->ping(); |
| 1266 | 70 | if ($res =~ /^OK/) { | 69 | |
| 1267 | 71 | DEBUG "IP '$ip' is reachable: $res"; | 70 | my $res = $checker->check($ip); |
| 1268 | 72 | $state = 1; | 71 | if ($res =~ /^OK/) { |
| 1269 | 73 | last; | 72 | DEBUG "IP '$ip' is reachable: $res"; |
| 1270 | 73 | $checker->shutdown(); | ||
| 1271 | 74 | return 1; | ||
| 1272 | 75 | } | ||
| 1273 | 74 | } | 76 | } |
| 1275 | 75 | DEBUG "IP '$ip' is not reachable: $res"; | 77 | |
| 1276 | 78 | # Sleep a while before checking every ip again | ||
| 1277 | 79 | sleep($main::config->{monitor}->{ping_interval}); | ||
| 1278 | 76 | } | 80 | } |
| 1279 | 77 | $checker->shutdown(); | 81 | $checker->shutdown(); |
| 1282 | 78 | 82 | return 0; | |
| 1281 | 79 | return $state; | ||
| 1283 | 80 | } | 83 | } |
| 1284 | 81 | 84 | ||
| 1285 | 82 | 1; | 85 | 1; |
| 1286 | 83 | 86 | ||
| 1287 | === modified file 'lib/Monitor/Roles.pm' | |||
| 1288 | --- lib/Monitor/Roles.pm 2009-10-29 15:27:32 +0000 | |||
| 1289 | +++ lib/Monitor/Roles.pm 2010-03-09 10:25:26 +0000 | |||
| 1290 | @@ -112,6 +112,29 @@ | |||
| 1291 | 112 | } | 112 | } |
| 1292 | 113 | 113 | ||
| 1293 | 114 | 114 | ||
| 1294 | 115 | =item host_has_roles($host) | ||
| 1295 | 116 | |||
| 1296 | 117 | Check whether there are roles assigned to host $host | ||
| 1297 | 118 | |||
| 1298 | 119 | =cut | ||
| 1299 | 120 | |||
| 1300 | 121 | sub host_has_roles($$) { | ||
| 1301 | 122 | my $self = shift; | ||
| 1302 | 123 | my $host = shift; | ||
| 1303 | 124 | |||
| 1304 | 125 | return 0 unless (defined($host)); | ||
| 1305 | 126 | |||
| 1306 | 127 | foreach my $role (keys(%$self)) { | ||
| 1307 | 128 | my $role_info = $self->{$role}; | ||
| 1308 | 129 | foreach my $ip (keys(%{$role_info->{ips}})) { | ||
| 1309 | 130 | my $ip_info = $role_info->{ips}->{$ip}; | ||
| 1310 | 131 | return 1 if ($ip_info->{assigned_to} eq $host); | ||
| 1311 | 132 | } | ||
| 1312 | 133 | } | ||
| 1313 | 134 | return 0; | ||
| 1314 | 135 | } | ||
| 1315 | 136 | |||
| 1316 | 137 | |||
| 1317 | 115 | =item count_host_roles($host) | 138 | =item count_host_roles($host) |
| 1318 | 116 | 139 | ||
| 1319 | 117 | Count all roles assigned to host $host | 140 | Count all roles assigned to host $host |
| 1320 | @@ -155,6 +178,74 @@ | |||
| 1321 | 155 | } | 178 | } |
| 1322 | 156 | 179 | ||
| 1323 | 157 | 180 | ||
| 1324 | 181 | =item get_passive_master | ||
| 1325 | 182 | |||
| 1326 | 183 | Get the passive master | ||
| 1327 | 184 | |||
| 1328 | 185 | =cut | ||
| 1329 | 186 | |||
| 1330 | 187 | sub get_passive_master($) { | ||
| 1331 | 188 | my $self = shift; | ||
| 1332 | 189 | |||
| 1333 | 190 | my $role = $self->{$main::config->{active_master_role}}; | ||
| 1334 | 191 | my $active_master = $self->get_active_master(); | ||
| 1335 | 192 | return '' unless $role; | ||
| 1336 | 193 | return '' unless $active_master; | ||
| 1337 | 194 | |||
| 1338 | 195 | foreach my $host ( @{ $role->{hosts} } ) { | ||
| 1339 | 196 | return $host if ($host ne $active_master); | ||
| 1340 | 197 | } | ||
| 1341 | 198 | return ''; | ||
| 1342 | 199 | } | ||
| 1343 | 200 | |||
| 1344 | 201 | |||
| 1345 | 202 | =item get_first_master | ||
| 1346 | 203 | |||
| 1347 | 204 | Get the first master | ||
| 1348 | 205 | |||
| 1349 | 206 | =cut | ||
| 1350 | 207 | |||
| 1351 | 208 | sub get_first_master($) { | ||
| 1352 | 209 | my $self = shift; | ||
| 1353 | 210 | |||
| 1354 | 211 | my $role = $self->{$main::config->{active_master_role}}; | ||
| 1355 | 212 | return '' unless $role; | ||
| 1356 | 213 | return '' unless $role->{hosts}[0]; | ||
| 1357 | 214 | return $role->{hosts}[0]; | ||
| 1358 | 215 | } | ||
| 1359 | 216 | |||
| 1360 | 217 | |||
| 1361 | 218 | =item get_second_master | ||
| 1362 | 219 | |||
| 1363 | 220 | Get the second master | ||
| 1364 | 221 | |||
| 1365 | 222 | =cut | ||
| 1366 | 223 | |||
| 1367 | 224 | sub get_second_master($) { | ||
| 1368 | 225 | my $self = shift; | ||
| 1369 | 226 | |||
| 1370 | 227 | my $role = $self->{$main::config->{active_master_role}}; | ||
| 1371 | 228 | return '' unless $role; | ||
| 1372 | 229 | return '' unless $role->{hosts}[1]; | ||
| 1373 | 230 | return $role->{hosts}[1]; | ||
| 1374 | 231 | } | ||
| 1375 | 232 | |||
| 1376 | 233 | |||
| 1377 | 234 | =item get_master_hosts | ||
| 1378 | 235 | |||
| 1379 | 236 | Get the hosts which can handle the active master-role | ||
| 1380 | 237 | |||
| 1381 | 238 | =cut | ||
| 1382 | 239 | |||
| 1383 | 240 | sub get_master_hosts($) { | ||
| 1384 | 241 | my $self = shift; | ||
| 1385 | 242 | |||
| 1386 | 243 | my $role = $self->{$main::config->{active_master_role}}; | ||
| 1387 | 244 | return '' unless $role; | ||
| 1388 | 245 | return $self->{$role}->{hosts}; | ||
| 1389 | 246 | } | ||
| 1390 | 247 | |||
| 1391 | 248 | |||
| 1392 | 158 | =item get_exclusive_role_owner($role) | 249 | =item get_exclusive_role_owner($role) |
| 1393 | 159 | 250 | ||
| 1394 | 160 | Get the host which has the exclusive role $role assigned | 251 | Get the host which has the exclusive role $role assigned |
| 1395 | @@ -211,13 +302,13 @@ | |||
| 1396 | 211 | } | 302 | } |
| 1397 | 212 | 303 | ||
| 1398 | 213 | 304 | ||
| 1400 | 214 | =item clear_host_roles($host) | 305 | =item clear_roles($host) |
| 1401 | 215 | 306 | ||
| 1402 | 216 | Remove all roles from host $host. | 307 | Remove all roles from host $host. |
| 1403 | 217 | 308 | ||
| 1404 | 218 | =cut | 309 | =cut |
| 1405 | 219 | 310 | ||
| 1407 | 220 | sub clear_host_roles($$) { | 311 | sub clear_roles($$) { |
| 1408 | 221 | my $self = shift; | 312 | my $self = shift; |
| 1409 | 222 | my $host = shift; | 313 | my $host = shift; |
| 1410 | 223 | 314 | ||
| 1411 | @@ -238,6 +329,34 @@ | |||
| 1412 | 238 | } | 329 | } |
| 1413 | 239 | 330 | ||
| 1414 | 240 | 331 | ||
| 1415 | 332 | =item clear_balanced_role($host, $role) | ||
| 1416 | 333 | |||
| 1417 | 334 | Remove balanced role $role from host $host. | ||
| 1418 | 335 | |||
| 1419 | 336 | =cut | ||
| 1420 | 337 | |||
| 1421 | 338 | sub clear_balanced_role($$$) { | ||
| 1422 | 339 | my $self = shift; | ||
| 1423 | 340 | my $host = shift; | ||
| 1424 | 341 | my $role = shift; | ||
| 1425 | 342 | |||
| 1426 | 343 | INFO "Removing balanced role $role from host '$host':"; | ||
| 1427 | 344 | |||
| 1428 | 345 | my $role_info = $self->{$role}; | ||
| 1429 | 346 | return 0 unless $role_info; | ||
| 1430 | 347 | my $cnt = 0; | ||
| 1431 | 348 | next unless ($role_info->{mode} eq 'balanced'); | ||
| 1432 | 349 | foreach my $ip (keys(%{$role_info->{ips}})) { | ||
| 1433 | 350 | my $ip_info = $role_info->{ips}->{$ip}; | ||
| 1434 | 351 | next unless ($ip_info->{assigned_to} eq $host); | ||
| 1435 | 352 | $cnt++; | ||
| 1436 | 353 | INFO " Removed role '$role($ip)' from host '$host'"; | ||
| 1437 | 354 | $ip_info->{assigned_to} = ''; | ||
| 1438 | 355 | } | ||
| 1439 | 356 | return $cnt; | ||
| 1440 | 357 | } | ||
| 1441 | 358 | |||
| 1442 | 359 | |||
| 1443 | 241 | =item find_eligible_host($role) | 360 | =item find_eligible_host($role) |
| 1444 | 242 | 361 | ||
| 1445 | 243 | find host which can take over the role $role | 362 | find host which can take over the role $role |
| 1446 | @@ -562,6 +681,21 @@ | |||
| 1447 | 562 | } | 681 | } |
| 1448 | 563 | 682 | ||
| 1449 | 564 | 683 | ||
| 1450 | 684 | =item is_master($host) | ||
| 1451 | 685 | |||
| 1452 | 686 | Check if host $host can handle role $role. | ||
| 1453 | 687 | |||
| 1454 | 688 | =cut | ||
| 1455 | 689 | |||
| 1456 | 690 | sub is_master($$) { | ||
| 1457 | 691 | my $self = shift; | ||
| 1458 | 692 | my $host = shift; | ||
| 1459 | 693 | my $role = $self->{$main::config->{active_master_role}}; | ||
| 1460 | 694 | return 0 unless defined($role); | ||
| 1461 | 695 | return grep({$_ eq $host} @{$role->{hosts}}); | ||
| 1462 | 696 | } | ||
| 1463 | 697 | |||
| 1464 | 698 | |||
| 1465 | 565 | =item is_active_master_role($role) | 699 | =item is_active_master_role($role) |
| 1466 | 566 | 700 | ||
| 1467 | 567 | Check whether $role is the active master role. | 701 | Check whether $role is the active master role. |
| 1468 | 568 | 702 | ||
| 1469 | === added file 'lib/Monitor/StartupStatus.pm' | |||
| 1470 | --- lib/Monitor/StartupStatus.pm 1970-01-01 00:00:00 +0000 | |||
| 1471 | +++ lib/Monitor/StartupStatus.pm 2010-03-09 10:25:26 +0000 | |||
| 1472 | @@ -0,0 +1,298 @@ | |||
| 1473 | 1 | package MMM::Monitor::StartupStatus; | ||
| 1474 | 2 | |||
| 1475 | 3 | use strict; | ||
| 1476 | 4 | use warnings FATAL => 'all'; | ||
| 1477 | 5 | use List::Util qw(max); | ||
| 1478 | 6 | use Log::Log4perl qw(:easy); | ||
| 1479 | 7 | use MMM::Common::Role; | ||
| 1480 | 8 | use MMM::Monitor::Role; | ||
| 1481 | 9 | use MMM::Monitor::Roles; | ||
| 1482 | 10 | |||
| 1483 | 11 | our $VERSION = '0.01'; | ||
| 1484 | 12 | |||
| 1485 | 13 | =head1 NAME | ||
| 1486 | 14 | |||
| 1487 | 15 | MMM::Monitor::StartupStatus - holds information about agent/system/stored status during startup | ||
| 1488 | 16 | |||
| 1489 | 17 | =cut | ||
| 1490 | 18 | |||
| 1491 | 19 | sub new($) { | ||
| 1492 | 20 | my $class = shift; | ||
| 1493 | 21 | |||
| 1494 | 22 | my $self = { | ||
| 1495 | 23 | roles => {}, | ||
| 1496 | 24 | hosts => {}, | ||
| 1497 | 25 | result=> {} | ||
| 1498 | 26 | }; | ||
| 1499 | 27 | return bless $self, $class; | ||
| 1500 | 28 | } | ||
| 1501 | 29 | |||
| 1502 | 30 | |||
| 1503 | 31 | =head1 FUNCTIONS | ||
| 1504 | 32 | |||
| 1505 | 33 | =over 4 | ||
| 1506 | 34 | |||
| 1507 | 35 | =item set_agent_status($host, $state, $roles, $master) | ||
| 1508 | 36 | |||
| 1509 | 37 | Set agent status | ||
| 1510 | 38 | |||
| 1511 | 39 | =cut | ||
| 1512 | 40 | |||
| 1513 | 41 | sub set_agent_status($$\@$) { | ||
| 1514 | 42 | my $self = shift; | ||
| 1515 | 43 | my $host = shift; | ||
| 1516 | 44 | my $state = shift; | ||
| 1517 | 45 | my $roles = shift; | ||
| 1518 | 46 | my $master = shift; | ||
| 1519 | 47 | |||
| 1520 | 48 | $self->{hosts}->{$host} = {} unless (defined($self->{hosts}->{$host})); | ||
| 1521 | 49 | $self->{hosts}->{$host}->{agent} = { | ||
| 1522 | 50 | state => $state, | ||
| 1523 | 51 | master => $master | ||
| 1524 | 52 | }; | ||
| 1525 | 53 | foreach my $role (@{$roles}) { | ||
| 1526 | 54 | unless (MMM::Monitor::Roles->instance()->exists_ip($role->name, $role->ip)) { | ||
| 1527 | 55 | WARN "Detected change in role definitions: Role '$role' was removed."; | ||
| 1528 | 56 | next; | ||
| 1529 | 57 | } | ||
| 1530 | 58 | unless (MMM::Monitor::Roles->instance()->can_handle($role->name, $host)) { | ||
| 1531 | 59 | WARN "Detected change in role definitions: Host '$host' can't handle role '$role' anymore."; | ||
| 1532 | 60 | next; | ||
| 1533 | 61 | } | ||
| 1534 | 62 | my $role_str = $role->to_string(); | ||
| 1535 | 63 | $self->{roles}->{$role_str} = {} unless (defined($self->{roles}->{$role_str})); | ||
| 1536 | 64 | $self->{roles}->{$role_str}->{$host} = {} unless (defined($self->{roles}->{$role_str}->{$host})); | ||
| 1537 | 65 | $self->{roles}->{$role_str}->{$host}->{agent} = 1; | ||
| 1538 | 66 | } | ||
| 1539 | 67 | } | ||
| 1540 | 68 | |||
| 1541 | 69 | |||
| 1542 | 70 | =item set_stored_status($host, $state, $roles) | ||
| 1543 | 71 | |||
| 1544 | 72 | Set stored status | ||
| 1545 | 73 | |||
| 1546 | 74 | =cut | ||
| 1547 | 75 | |||
| 1548 | 76 | sub set_stored_status($$\@$) { | ||
| 1549 | 77 | my $self = shift; | ||
| 1550 | 78 | my $host = shift; | ||
| 1551 | 79 | my $state = shift; | ||
| 1552 | 80 | my $roles = shift; | ||
| 1553 | 81 | |||
| 1554 | 82 | $self->{hosts}->{$host} = {} unless (defined($self->{hosts}->{$host})); | ||
| 1555 | 83 | $self->{hosts}->{$host}->{stored} = { | ||
| 1556 | 84 | state => $state, | ||
| 1557 | 85 | }; | ||
| 1558 | 86 | foreach my $role (@{$roles}) { | ||
| 1559 | 87 | unless (MMM::Monitor::Roles->instance()->exists_ip($role->name, $role->ip)) { | ||
| 1560 | 88 | WARN "Detected change in role definitions: Role '$role' was removed."; | ||
| 1561 | 89 | next; | ||
| 1562 | 90 | } | ||
| 1563 | 91 | unless (MMM::Monitor::Roles->instance()->can_handle($role->name, $host)) { | ||
| 1564 | 92 | WARN "Detected change in role definitions: Host '$host' can't handle role '$role' anymore."; | ||
| 1565 | 93 | next; | ||
| 1566 | 94 | } | ||
| 1567 | 95 | my $role_str = $role->to_string(); | ||
| 1568 | 96 | $self->{roles}->{$role_str} = {} unless (defined($self->{roles}->{$role_str})); | ||
| 1569 | 97 | $self->{roles}->{$role_str}->{$host} = {} unless (defined($self->{roles}->{$role_str}->{$host})); | ||
| 1570 | 98 | $self->{roles}->{$role_str}->{$host}->{stored} = 1; | ||
| 1571 | 99 | } | ||
| 1572 | 100 | } | ||
| 1573 | 101 | |||
| 1574 | 102 | |||
| 1575 | 103 | =item set_system_status($host, $writable, $roles, $master) | ||
| 1576 | 104 | |||
| 1577 | 105 | Set system status | ||
| 1578 | 106 | |||
| 1579 | 107 | =cut | ||
| 1580 | 108 | |||
| 1581 | 109 | sub set_system_status($$\@$) { | ||
| 1582 | 110 | my $self = shift; | ||
| 1583 | 111 | my $host = shift; | ||
| 1584 | 112 | my $writable= shift; | ||
| 1585 | 113 | my $roles = shift; | ||
| 1586 | 114 | my $master = shift; | ||
| 1587 | 115 | |||
| 1588 | 116 | $self->{hosts}->{$host} = {} unless (defined($self->{hosts}->{$host})); | ||
| 1589 | 117 | $self->{hosts}->{$host}->{system} = { | ||
| 1590 | 118 | writable=> $writable, | ||
| 1591 | 119 | master => $master | ||
| 1592 | 120 | }; | ||
| 1593 | 121 | foreach my $role (@{$roles}) { | ||
| 1594 | 122 | unless (MMM::Monitor::Roles->instance()->exists_ip($role->name, $role->ip)) { | ||
| 1595 | 123 | WARN "Detected change in role definitions: Role '$role' was removed."; | ||
| 1596 | 124 | next; | ||
| 1597 | 125 | } | ||
| 1598 | 126 | unless (MMM::Monitor::Roles->instance()->can_handle($role->name, $host)) { | ||
| 1599 | 127 | WARN "Detected change in role definitions: Host '$host' can't handle role '$role' anymore."; | ||
| 1600 | 128 | next; | ||
| 1601 | 129 | } | ||
| 1602 | 130 | my $role_str = $role->to_string(); | ||
| 1603 | 131 | $self->{roles}->{$role_str} = {} unless (defined($self->{roles}->{$role_str})); | ||
| 1604 | 132 | $self->{roles}->{$role_str}->{$host} = {} unless (defined($self->{roles}->{$role_str}->{$host})); | ||
| 1605 | 133 | $self->{roles}->{$role_str}->{$host}->{system} = 1; | ||
| 1606 | 134 | } | ||
| 1607 | 135 | } | ||
| 1608 | 136 | |||
| 1609 | 137 | sub determine_status() { | ||
| 1610 | 138 | my $self = shift; | ||
| 1611 | 139 | my $roles = MMM::Monitor::Roles->instance(); | ||
| 1612 | 140 | |||
| 1613 | 141 | my $is_manual = MMM::Monitor::Monitor->instance()->is_manual(); | ||
| 1614 | 142 | |||
| 1615 | 143 | my $conflict = 0; | ||
| 1616 | 144 | |||
| 1617 | 145 | foreach my $host (keys(%{$main::config->{host}})) { | ||
| 1618 | 146 | |||
| 1619 | 147 | # Figure out host state | ||
| 1620 | 148 | |||
| 1621 | 149 | my $stored_state = 'UNKNOWN'; | ||
| 1622 | 150 | my $agent_state = 'UNKNOWN'; | ||
| 1623 | 151 | my $state; | ||
| 1624 | 152 | |||
| 1625 | 153 | $stored_state = $self->{hosts}->{$host}->{stored}->{state} if (defined($self->{hosts}->{$host}->{stored}->{state})); | ||
| 1626 | 154 | $agent_state = $self->{hosts}->{$host}->{agent}->{state} if (defined($self->{hosts}->{$host}->{agent}->{state} )); | ||
| 1627 | 155 | |||
| 1628 | 156 | if ( $stored_state eq 'ADMIN_OFFLINE' || $agent_state eq 'ADMIN_OFFLINE' ) { $state = 'ADMIN_OFFLINE'; } | ||
| 1629 | 157 | elsif ($stored_state eq 'HARD_OFFLINE' || $agent_state eq 'HARD_OFFLINE' ) { $state = 'HARD_OFFLINE'; } | ||
| 1630 | 158 | elsif ($stored_state eq 'REPLICATION_FAIL' || $agent_state eq 'REPLICATION_FAIL' ) { $state = 'REPLICATION_FAIL'; } | ||
| 1631 | 159 | elsif ($stored_state eq 'REPLICATION_DELAY' || $agent_state eq 'REPLICATION_DELAY') { $state = 'REPLICATION_DELAY'; } | ||
| 1632 | 160 | elsif ($stored_state eq 'ONLINE' || $agent_state eq 'ONLINE' ) { $state = 'ONLINE'; } | ||
| 1633 | 161 | else { $state = 'AWAITING_RECOVERY'; } | ||
| 1634 | 162 | |||
| 1635 | 163 | $self->{result}->{$host} = { state => $state, roles => [] }; | ||
| 1636 | 164 | } | ||
| 1637 | 165 | |||
| 1638 | 166 | foreach my $role_str (keys(%{$self->{roles}})) { | ||
| 1639 | 167 | my $role = MMM::Monitor::Role->from_string($role_str); | ||
| 1640 | 168 | next unless(defined($role)); | ||
| 1641 | 169 | |||
| 1642 | 170 | if ($roles->is_active_master_role($role->name)) { | ||
| 1643 | 171 | # active master role | ||
| 1644 | 172 | my $max = 0; | ||
| 1645 | 173 | my $target = undef; | ||
| 1646 | 174 | my $system_cnt = 0; | ||
| 1647 | 175 | foreach my $host (keys(%{$self->{roles}->{$role_str}})) { | ||
| 1648 | 176 | my $votes = 0; | ||
| 1649 | 177 | my $info = $self->{roles}->{$role_str}->{$host}; | ||
| 1650 | 178 | my $host_info = $self->{hosts}->{$host}; | ||
| 1651 | 179 | |||
| 1652 | 180 | # host is writable | ||
| 1653 | 181 | $votes += 4 if (defined($host_info->{system}->{writable}) && $host_info->{system}->{writable}); | ||
| 1654 | 182 | |||
| 1655 | 183 | # IP is configured | ||
| 1656 | 184 | if (defined($info->{system})) { | ||
| 1657 | 185 | $votes += 2; | ||
| 1658 | 186 | $system_cnt++; | ||
| 1659 | 187 | } | ||
| 1660 | 188 | |||
| 1661 | 189 | $votes += 1 if (defined($info->{stored})); | ||
| 1662 | 190 | $votes += 1 if (defined($info->{agent})); | ||
| 1663 | 191 | |||
| 1664 | 192 | foreach my $slave_host (keys(%{$self->{hosts}})) { | ||
| 1665 | 193 | my $slave_info = $self->{hosts}->{$slave_host}; | ||
| 1666 | 194 | next if MMM::Monitor::Roles->instance()->is_master($slave_host); | ||
| 1667 | 195 | $votes++ if (defined($slave_info->{system}->{master}) && $slave_info->{system}->{master} eq $host); | ||
| 1668 | 196 | } | ||
| 1669 | 197 | |||
| 1670 | 198 | |||
| 1671 | 199 | my $state = $self->{result}->{$host}->{state}; | ||
| 1672 | 200 | $votes = 0 if ($state eq 'ADMIN_OFFLINE'); | ||
| 1673 | 201 | $votes = 0 if ($state eq 'HARD_OFFLINE' && !$is_manual); | ||
| 1674 | 202 | |||
| 1675 | 203 | if ($votes > $max) { | ||
| 1676 | 204 | $target = $host; | ||
| 1677 | 205 | $max = $votes; | ||
| 1678 | 206 | } | ||
| 1679 | 207 | } | ||
| 1680 | 208 | if ($system_cnt > 1) { | ||
| 1681 | 209 | WARN "Role '$role_str' was configured on $system_cnt hosts during monitor startup."; | ||
| 1682 | 210 | $conflict = 1; | ||
| 1683 | 211 | } | ||
| 1684 | 212 | if (defined($target)) { | ||
| 1685 | 213 | push (@{$self->{result}->{$target}->{roles}}, $role); | ||
| 1686 | 214 | my $state = $self->{result}->{$target}->{state}; | ||
| 1687 | 215 | $self->{result}->{$target}->{state} = 'ONLINE' if (!$is_manual || $state eq 'REPLICATION_FAIL' || $state eq 'REPLICATION_DELAY'); | ||
| 1688 | 216 | } | ||
| 1689 | 217 | next; | ||
| 1690 | 218 | } | ||
| 1691 | 219 | |||
| 1692 | 220 | # Handle non-writer roles | ||
| 1693 | 221 | my $max = 0; | ||
| 1694 | 222 | my $target = undef; | ||
| 1695 | 223 | my $system_cnt = 0; | ||
| 1696 | 224 | foreach my $host (keys(%{$self->{roles}->{$role_str}})) { | ||
| 1697 | 225 | my $votes = 0; | ||
| 1698 | 226 | my $info = $self->{roles}->{$role_str}->{$host}; | ||
| 1699 | 227 | |||
| 1700 | 228 | # IP is configured | ||
| 1701 | 229 | if (defined($info->{system})) { | ||
| 1702 | 230 | $votes += 4; | ||
| 1703 | 231 | $system_cnt++; | ||
| 1704 | 232 | } | ||
| 1705 | 233 | |||
| 1706 | 234 | $votes += 2 if (defined($info->{stored})); | ||
| 1707 | 235 | $votes += 1 if (defined($info->{agent})); | ||
| 1708 | 236 | |||
| 1709 | 237 | |||
| 1710 | 238 | my $state = $self->{result}->{$host}->{state}; | ||
| 1711 | 239 | if ($state eq 'ADMIN_OFFLINE' || (!$is_manual && $state ne 'ONLINE' && $state ne 'AWAITING_RECOVERY')) { | ||
| 1712 | 240 | $votes = 0; | ||
| 1713 | 241 | } | ||
| 1714 | 242 | if ($votes > $max) { | ||
| 1715 | 243 | $target = $host; | ||
| 1716 | 244 | $max = $votes; | ||
| 1717 | 245 | } | ||
| 1718 | 246 | } | ||
| 1719 | 247 | if ($system_cnt > 1) { | ||
| 1720 | 248 | WARN "Role '$role_str' was configured on $system_cnt hosts during monitor startup."; | ||
| 1721 | 249 | } | ||
| 1722 | 250 | if (defined($target)) { | ||
| 1723 | 251 | push (@{$self->{result}->{$target}->{roles}}, $role); | ||
| 1724 | 252 | $self->{result}->{$target}->{state} = 'ONLINE' if ($self->{result}->{$target}->{state} eq 'AWAITING_RECOVERY'); | ||
| 1725 | 253 | } | ||
| 1726 | 254 | } | ||
| 1727 | 255 | return $conflict; | ||
| 1728 | 256 | } | ||
| 1729 | 257 | |||
| 1730 | 258 | |||
| 1731 | 259 | sub to_string($) { | ||
| 1732 | 260 | my $self = shift; | ||
| 1733 | 261 | my $ret = "Startup status:\n"; | ||
| 1734 | 262 | $ret .= "\nRoles:\n"; | ||
| 1735 | 263 | |||
| 1736 | 264 | my $role_len = 4; # "Role" | ||
| 1737 | 265 | my $host_len = 6; # "Master" | ||
| 1738 | 266 | |||
| 1739 | 267 | foreach my $role (keys(%{$main::config->{role}})) { $role_len = max($role_len, length $role) } | ||
| 1740 | 268 | foreach my $host (keys(%{$main::config->{host}})) { $host_len = max($host_len, length $host) } | ||
| 1741 | 269 | $role_len += 17; # "(999.999.999.999)" | ||
| 1742 | 270 | |||
| 1743 | 271 | $ret .= sprintf(" %-*s %-*s %-6s %-6s %-5s\n", $role_len, 'Role', $host_len, 'Host', 'Stored', 'System', 'Agent'); | ||
| 1744 | 272 | foreach my $role (keys(%{$self->{roles}})) { | ||
| 1745 | 273 | foreach my $host (keys(%{$self->{roles}->{$role}})) { | ||
| 1746 | 274 | my $info = $self->{roles}->{$role}->{$host}; | ||
| 1747 | 275 | $ret .= sprintf(" %-*s %-*s %-6s %-6s %-5s\n", $role_len, $role, $host_len, $host, | ||
| 1748 | 276 | defined($info->{stored}) ? 'Yes' : '-', | ||
| 1749 | 277 | defined($info->{system}) ? 'Yes' : '-', | ||
| 1750 | 278 | defined($info->{agent}) ? 'Yes' : '-' | ||
| 1751 | 279 | ); | ||
| 1752 | 280 | } | ||
| 1753 | 281 | } | ||
| 1754 | 282 | |||
| 1755 | 283 | $ret .= "\nHosts:\n"; | ||
| 1756 | 284 | $ret .= sprintf(" %-*s %-*s %-8s %-16s %-16s\n", $host_len, 'Host', $host_len, 'Master', 'Writable', 'Stored state', 'Agent state'); | ||
| 1757 | 285 | foreach my $host (keys(%{$self->{hosts}})) { | ||
| 1758 | 286 | my $info = $self->{hosts}->{$host}; | ||
| 1759 | 287 | my $is_master = MMM::Monitor::Roles->instance()->is_master($host); | ||
| 1760 | 288 | $ret .= sprintf(" %-*s %-*s %-8s %-16s %-16s\n", $host_len, $host, $host_len, | ||
| 1761 | 289 | $is_master ? '-' : (defined($info->{system}->{master}) ? $info->{system}->{master} : '?'), | ||
| 1762 | 290 | defined($info->{system}->{writable}) ? ($info->{system}->{writable} ? 'Yes' : 'No') : '?', | ||
| 1763 | 291 | defined($info->{stored}->{state}) ? $info->{stored}->{state} : '?', | ||
| 1764 | 292 | defined($info->{agent}->{state}) ? $info->{agent}->{state} : '?', | ||
| 1765 | 293 | ); | ||
| 1766 | 294 | } | ||
| 1767 | 295 | return $ret; | ||
| 1768 | 296 | } | ||
| 1769 | 297 | |||
| 1770 | 298 | 1; | ||
| 1771 | 0 | 299 | ||
| 1772 | === modified file 'lib/Monitor/t/Roles.t' | |||
| 1773 | --- lib/Monitor/t/Roles.t 2009-02-05 08:43:52 +0000 | |||
| 1774 | +++ lib/Monitor/t/Roles.t 2010-03-09 10:25:26 +0000 | |||
| 1775 | @@ -55,7 +55,7 @@ | |||
| 1776 | 55 | $roles->assign($role_writer, 'db1'); | 55 | $roles->assign($role_writer, 'db1'); |
| 1777 | 56 | is($roles->get_active_master(), 'db1', 'Active master after assigning writer role'); | 56 | is($roles->get_active_master(), 'db1', 'Active master after assigning writer role'); |
| 1778 | 57 | 57 | ||
| 1780 | 58 | $roles->clear_host_roles($roles->get_active_master()); | 58 | $roles->clear_roles($roles->get_active_master()); |
| 1781 | 59 | is($roles->get_active_master(), '', 'No active master with active master host cleared'); | 59 | is($roles->get_active_master(), '', 'No active master with active master host cleared'); |
| 1782 | 60 | 60 | ||
| 1783 | 61 | $roles->assign($role_writer, 'db2'); | 61 | $roles->assign($role_writer, 'db2'); |
| 1784 | @@ -84,7 +84,7 @@ | |||
| 1785 | 84 | is($roles->count_host_roles('db2'), 2, 'balance roles (role count db2)'); | 84 | is($roles->count_host_roles('db2'), 2, 'balance roles (role count db2)'); |
| 1786 | 85 | 85 | ||
| 1787 | 86 | $agents->{db2}->state('HARD_OFFLINE'); | 86 | $agents->{db2}->state('HARD_OFFLINE'); |
| 1789 | 87 | $roles->clear_host_roles('db2'); | 87 | $roles->clear_roles('db2'); |
| 1790 | 88 | $roles->process_orphans('exclusive'); | 88 | $roles->process_orphans('exclusive'); |
| 1791 | 89 | $roles->process_orphans('balanced'); | 89 | $roles->process_orphans('balanced'); |
| 1792 | 90 | is($roles->count_host_roles('db1'), 4, 'process orphans assigns all orphaned roles'); | 90 | is($roles->count_host_roles('db1'), 4, 'process orphans assigns all orphaned roles'); |
| 1793 | 91 | 91 | ||
| 1794 | === modified file 'sbin/mmm_mond' | |||
| 1795 | --- sbin/mmm_mond 2010-02-11 02:23:38 +0000 | |||
| 1796 | +++ sbin/mmm_mond 2010-03-09 10:25:26 +0000 | |||
| 1797 | @@ -72,11 +72,6 @@ | |||
| 1798 | 72 | 72 | ||
| 1799 | 73 | our $monitor = new MMM::Monitor::Monitor::(); | 73 | our $monitor = new MMM::Monitor::Monitor::(); |
| 1800 | 74 | 74 | ||
| 1801 | 75 | if (!MMM::Monitor::NetworkChecker->initial_check()) { | ||
| 1802 | 76 | LOGDIE "None of the 'ping_ips' could be reached during startup. Network seems to be down - mmm_mond will shutdown now."; | ||
| 1803 | 77 | } | ||
| 1804 | 78 | |||
| 1805 | 79 | |||
| 1806 | 80 | my $pidfilename = $config->{monitor}->{pid_path}; | 75 | my $pidfilename = $config->{monitor}->{pid_path}; |
| 1807 | 81 | my $pidfile = new MMM::Common::PidFile:: $pidfilename; | 76 | my $pidfile = new MMM::Common::PidFile:: $pidfilename; |
| 1808 | 82 | 77 | ||
| 1809 | @@ -106,9 +101,9 @@ | |||
| 1810 | 106 | $SIG{PIPE} = 'IGNORE'; | 101 | $SIG{PIPE} = 'IGNORE'; |
| 1811 | 107 | $SIG{CHLD} = \&ChildHandler; | 102 | $SIG{CHLD} = \&ChildHandler; |
| 1812 | 108 | 103 | ||
| 1816 | 109 | $monitor->init(); | 104 | if ($monitor->init()) { |
| 1817 | 110 | 105 | $monitor->main(); | |
| 1818 | 111 | $monitor->main(); | 106 | } |
| 1819 | 112 | 107 | ||
| 1820 | 113 | INFO 'END'; | 108 | INFO 'END'; |
| 1821 | 114 | exit(0); | 109 | exit(0); |