Merge lp:~stub/charms/precise/postgresql/fix-races into lp:charms/postgresql
- Precise Pangolin (12.04)
- fix-races
- Merge into trunk
Proposed by
Stuart Bishop
Status: | Merged |
---|---|
Approved by: | Mark Mims |
Approved revision: | 75 |
Merged at revision: | 63 |
Proposed branch: | lp:~stub/charms/precise/postgresql/fix-races |
Merge into: | lp:charms/postgresql |
Prerequisite: | lp:~stub/charms/precise/postgresql/cleanups |
Diff against target: |
650 lines (+247/-131) 3 files modified
config.yaml (+7/-0) hooks/hooks.py (+208/-109) test.py (+32/-22) |
To merge this branch: | bzr merge lp:~stub/charms/precise/postgresql/fix-races |
Related bugs: |
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
Mark Mims (community) | Approve | ||
Review via email: mp+181740@code.launchpad.net |
Commit message
Description of the change
The new local provider is quite fast, and exposes race conditions in the PostgreSQL charm.
This branch reworks the replication peer relationship so the test suite runs more reliably. For example, election has been rewritten to cope with situations such as, when creating a new service of 3 units, units 1 and 2 may have already elected a master amongst themselves before unit 0 has joined the relation, so the assumption that the lowest numbered unit in a new replication peer relation is the master is false.
To post a comment you must log in.
- 75. By Stuart Bishop
-
Merged cleanups into fix-races.
Revision history for this message
Mark Mims (mark-mims) : | # |
review:
Approve
Preview Diff
[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1 | === modified file 'config.yaml' | |||
2 | --- config.yaml 2013-05-27 14:37:19 +0000 | |||
3 | +++ config.yaml 2013-08-23 09:40:16 +0000 | |||
4 | @@ -313,3 +313,10 @@ | |||
5 | 313 | type: string | 313 | type: string |
6 | 314 | description: | | 314 | description: | |
7 | 315 | Extra archives to add with add-apt-repository(1). | 315 | Extra archives to add with add-apt-repository(1). |
8 | 316 | advisory_lock_restart_key: | ||
9 | 317 | default: 765 | ||
10 | 318 | type: int | ||
11 | 319 | description: | | ||
12 | 320 | An advisory lock key used internally by the charm. You do not need | ||
13 | 321 | to change it unless it happens to conflict with an advisory lock key | ||
14 | 322 | being used by your applications. | ||
15 | 316 | 323 | ||
16 | === modified file 'hooks/hooks.py' | |||
17 | --- hooks/hooks.py 2013-08-23 09:40:15 +0000 | |||
18 | +++ hooks/hooks.py 2013-08-23 09:40:16 +0000 | |||
19 | @@ -20,7 +20,7 @@ | |||
20 | 20 | 20 | ||
21 | 21 | from charmhelpers.core import hookenv, host | 21 | from charmhelpers.core import hookenv, host |
22 | 22 | from charmhelpers.core.hookenv import ( | 22 | from charmhelpers.core.hookenv import ( |
24 | 23 | CRITICAL, ERROR, WARNING, INFO, DEBUG, log, | 23 | CRITICAL, ERROR, WARNING, INFO, DEBUG, |
25 | 24 | ) | 24 | ) |
26 | 25 | 25 | ||
27 | 26 | hooks = hookenv.Hooks() | 26 | hooks = hookenv.Hooks() |
28 | @@ -28,16 +28,24 @@ | |||
29 | 28 | # jinja2 may not be importable until the install hook has installed the | 28 | # jinja2 may not be importable until the install hook has installed the |
30 | 29 | # required packages. | 29 | # required packages. |
31 | 30 | def Template(*args, **kw): | 30 | def Template(*args, **kw): |
32 | 31 | """jinja2.Template with deferred jinja2 import""" | ||
33 | 31 | from jinja2 import Template | 32 | from jinja2 import Template |
34 | 32 | return Template(*args, **kw) | 33 | return Template(*args, **kw) |
35 | 33 | 34 | ||
36 | 34 | 35 | ||
37 | 35 | def log(msg, lvl=INFO): | 36 | def log(msg, lvl=INFO): |
40 | 36 | # Per Bug #1208787, log messages sent via juju-log are being lost. | 37 | '''Log a message. |
41 | 37 | # Spit messages out to a log file to work around the problem. | 38 | |
42 | 39 | Per Bug #1208787, log messages sent via juju-log are being lost. | ||
43 | 40 | Spit messages out to a log file to work around the problem. | ||
44 | 41 | It is also rather nice to have the log messages we explicitly emit | ||
45 | 42 | in a separate log file, rather than just mashed up with all the | ||
46 | 43 | juju noise. | ||
47 | 44 | ''' | ||
48 | 38 | myname = hookenv.local_unit().replace('/', '-') | 45 | myname = hookenv.local_unit().replace('/', '-') |
51 | 39 | with open('/tmp/{}-debug.log'.format(myname), 'a') as f: | 46 | ts = time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime()) |
52 | 40 | f.write('{}: {}\n'.format(lvl, msg)) | 47 | with open('/var/log/juju/{}-debug.log'.format(myname), 'a') as f: |
53 | 48 | f.write('{} {}: {}\n'.format(ts, lvl, msg)) | ||
54 | 41 | hookenv.log(msg, lvl) | 49 | hookenv.log(msg, lvl) |
55 | 42 | 50 | ||
56 | 43 | 51 | ||
57 | @@ -49,6 +57,7 @@ | |||
58 | 49 | self.load() | 57 | self.load() |
59 | 50 | 58 | ||
60 | 51 | def load(self): | 59 | def load(self): |
61 | 60 | '''Load stored state from local disk.''' | ||
62 | 52 | if os.path.exists(self._state_file): | 61 | if os.path.exists(self._state_file): |
63 | 53 | state = pickle.load(open(self._state_file, 'rb')) | 62 | state = pickle.load(open(self._state_file, 'rb')) |
64 | 54 | else: | 63 | else: |
65 | @@ -58,6 +67,7 @@ | |||
66 | 58 | self.update(state) | 67 | self.update(state) |
67 | 59 | 68 | ||
68 | 60 | def save(self): | 69 | def save(self): |
69 | 70 | '''Store state to local disk.''' | ||
70 | 61 | state = {} | 71 | state = {} |
71 | 62 | state.update(self) | 72 | state.update(self) |
72 | 63 | pickle.dump(state, open(self._state_file, 'wb')) | 73 | pickle.dump(state, open(self._state_file, 'wb')) |
73 | @@ -181,13 +191,13 @@ | |||
74 | 181 | 191 | ||
75 | 182 | 192 | ||
76 | 183 | def postgresql_autostart(enabled): | 193 | def postgresql_autostart(enabled): |
77 | 194 | startup_file = os.path.join(postgresql_config_dir, 'start.conf') | ||
78 | 184 | if enabled: | 195 | if enabled: |
79 | 185 | log("Enabling PostgreSQL startup in {}".format(startup_file)) | 196 | log("Enabling PostgreSQL startup in {}".format(startup_file)) |
80 | 186 | mode = 'auto' | 197 | mode = 'auto' |
81 | 187 | else: | 198 | else: |
82 | 188 | log("Disabling PostgreSQL startup in {}".format(startup_file)) | 199 | log("Disabling PostgreSQL startup in {}".format(startup_file)) |
83 | 189 | mode = 'manual' | 200 | mode = 'manual' |
84 | 190 | startup_file = os.path.join(postgresql_config_dir, 'start.conf') | ||
85 | 191 | contents = Template(open("templates/start_conf.tmpl").read()).render( | 201 | contents = Template(open("templates/start_conf.tmpl").read()).render( |
86 | 192 | {'mode': mode}) | 202 | {'mode': mode}) |
87 | 193 | host.write_file( | 203 | host.write_file( |
88 | @@ -209,6 +219,7 @@ | |||
89 | 209 | 219 | ||
90 | 210 | 220 | ||
91 | 211 | def postgresql_is_running(): | 221 | def postgresql_is_running(): |
92 | 222 | '''Return true if PostgreSQL is running.''' | ||
93 | 212 | # init script always return true (9.1), add extra check to make it useful | 223 | # init script always return true (9.1), add extra check to make it useful |
94 | 213 | status, output = commands.getstatusoutput("invoke-rc.d postgresql status") | 224 | status, output = commands.getstatusoutput("invoke-rc.d postgresql status") |
95 | 214 | if status != 0: | 225 | if status != 0: |
96 | @@ -219,72 +230,65 @@ | |||
97 | 219 | 230 | ||
98 | 220 | 231 | ||
99 | 221 | def postgresql_stop(): | 232 | def postgresql_stop(): |
102 | 222 | host.service_stop('postgresql') | 233 | '''Shutdown PostgreSQL.''' |
103 | 223 | return not postgresql_is_running() | 234 | success = host.service_stop('postgresql') |
104 | 235 | return not (success and postgresql_is_running()) | ||
105 | 224 | 236 | ||
106 | 225 | 237 | ||
107 | 226 | def postgresql_start(): | 238 | def postgresql_start(): |
110 | 227 | host.service_start('postgresql') | 239 | '''Start PostgreSQL if it is not already running.''' |
111 | 228 | return postgresql_is_running() | 240 | success = host.service_start('postgresql') |
112 | 241 | return success and postgresql_is_running() | ||
113 | 229 | 242 | ||
114 | 230 | 243 | ||
115 | 231 | def postgresql_restart(): | 244 | def postgresql_restart(): |
116 | 245 | '''Restart PostgreSQL, or start it if it is not already running.''' | ||
117 | 232 | if postgresql_is_running(): | 246 | if postgresql_is_running(): |
134 | 233 | # If the database is in backup mode, we don't want to restart | 247 | with restart_lock(hookenv.local_unit(), True): |
135 | 234 | # PostgreSQL and abort the procedure. This may be another unit being | 248 | # 'service postgresql restart' fails; it only does a reload. |
136 | 235 | # cloned, or a filesystem level backup is being made. There is no | 249 | # success = host.service_restart('postgresql') |
137 | 236 | # timeout here, as backups can take hours or days. Instead, keep | 250 | try: |
138 | 237 | # logging so admins know wtf is going on. | 251 | run('pg_ctlcluster -force {version} {cluster_name} ' |
139 | 238 | last_warning = time.time() | 252 | 'restart'.format(**config_data)) |
140 | 239 | while postgresql_is_in_backup_mode(): | 253 | success = True |
141 | 240 | if time.time() + 120 > last_warning: | 254 | except subprocess.CalledProcessError as e: |
142 | 241 | log("In backup mode. PostgreSQL restart blocked.", WARNING) | 255 | success = False |
127 | 242 | log( | ||
128 | 243 | "Run \"psql -U postgres -c 'SELECT pg_stop_backup()'\"" | ||
129 | 244 | "to cancel backup mode and forcefully unblock this hook.") | ||
130 | 245 | last_warning = time.time() | ||
131 | 246 | time.sleep(5) | ||
132 | 247 | |||
133 | 248 | return host.service_restart('postgresql') | ||
143 | 249 | else: | 256 | else: |
145 | 250 | return host.service_start('postgresql') | 257 | success = host.service_start('postgresql') |
146 | 251 | 258 | ||
147 | 252 | # Store a copy of our known live configuration so | 259 | # Store a copy of our known live configuration so |
148 | 253 | # postgresql_reload_or_restart() can make good choices. | 260 | # postgresql_reload_or_restart() can make good choices. |
150 | 254 | if 'saved_config' in local_state: | 261 | if success and 'saved_config' in local_state: |
151 | 255 | local_state['live_config'] = local_state['saved_config'] | 262 | local_state['live_config'] = local_state['saved_config'] |
152 | 256 | local_state.save() | 263 | local_state.save() |
153 | 257 | 264 | ||
155 | 258 | return postgresql_is_running() | 265 | return success and postgresql_is_running() |
156 | 259 | 266 | ||
157 | 260 | 267 | ||
158 | 261 | def postgresql_reload(): | 268 | def postgresql_reload(): |
159 | 269 | '''Make PostgreSQL reload its configuration.''' | ||
160 | 262 | # reload returns a reliable exit status | 270 | # reload returns a reliable exit status |
161 | 263 | status, output = commands.getstatusoutput("invoke-rc.d postgresql reload") | 271 | status, output = commands.getstatusoutput("invoke-rc.d postgresql reload") |
162 | 264 | return (status == 0) | 272 | return (status == 0) |
163 | 265 | 273 | ||
164 | 266 | 274 | ||
169 | 267 | def postgresql_reload_or_restart(): | 275 | def requires_restart(): |
170 | 268 | """Reload PostgreSQL configuration, restarting if necessary.""" | 276 | '''Check for configuration changes requiring a restart to take effect.''' |
167 | 269 | # Pull in current values of settings that can only be changed on | ||
168 | 270 | # server restart. | ||
171 | 271 | if not postgresql_is_running(): | 277 | if not postgresql_is_running(): |
173 | 272 | return postgresql_restart() | 278 | return True |
174 | 273 | 279 | ||
175 | 274 | # Suck in the config last written to postgresql.conf. | ||
176 | 275 | saved_config = local_state.get('saved_config', None) | 280 | saved_config = local_state.get('saved_config', None) |
177 | 276 | if not saved_config: | 281 | if not saved_config: |
178 | 277 | # No record of postgresql.conf state, perhaps an upgrade. | 282 | # No record of postgresql.conf state, perhaps an upgrade. |
179 | 278 | # Better restart. | 283 | # Better restart. |
181 | 279 | return postgresql_restart() | 284 | return True |
182 | 280 | 285 | ||
183 | 281 | # Suck in our live config from last time we restarted. | ||
184 | 282 | live_config = local_state.setdefault('live_config', {}) | 286 | live_config = local_state.setdefault('live_config', {}) |
185 | 283 | 287 | ||
186 | 284 | # Pull in a list of PostgreSQL settings. | 288 | # Pull in a list of PostgreSQL settings. |
187 | 285 | cur = db_cursor() | 289 | cur = db_cursor() |
188 | 286 | cur.execute("SELECT name, context FROM pg_settings") | 290 | cur.execute("SELECT name, context FROM pg_settings") |
190 | 287 | requires_restart = False | 291 | restart = False |
191 | 288 | for name, context in cur.fetchall(): | 292 | for name, context in cur.fetchall(): |
192 | 289 | live_value = live_config.get(name, None) | 293 | live_value = live_config.get(name, None) |
193 | 290 | new_value = saved_config.get(name, None) | 294 | new_value = saved_config.get(name, None) |
194 | @@ -296,23 +300,27 @@ | |||
195 | 296 | if context == 'postmaster': | 300 | if context == 'postmaster': |
196 | 297 | # A setting has changed that requires PostgreSQL to be | 301 | # A setting has changed that requires PostgreSQL to be |
197 | 298 | # restarted before it will take effect. | 302 | # restarted before it will take effect. |
204 | 299 | requires_restart = True | 303 | restart = True |
205 | 300 | 304 | return restart | |
206 | 301 | if requires_restart: | 305 | |
207 | 302 | # A change has been requested that requires a restart. | 306 | |
208 | 303 | log( | 307 | def postgresql_reload_or_restart(): |
209 | 304 | "Configuration change requires PostgreSQL restart. Restarting.", | 308 | """Reload PostgreSQL configuration, restarting if necessary.""" |
210 | 309 | if requires_restart(): | ||
211 | 310 | log("Configuration change requires PostgreSQL restart. Restarting.", | ||
212 | 305 | WARNING) | 311 | WARNING) |
214 | 306 | rc = postgresql_restart() | 312 | success = postgresql_restart() |
215 | 313 | if not success or requires_restart(): | ||
216 | 314 | log("Configuration changes failed to apply", WARNING) | ||
217 | 315 | success = False | ||
218 | 307 | else: | 316 | else: |
221 | 308 | log("PostgreSQL reload, config changes taking effect.", DEBUG) | 317 | success = host.service_reload('postgresql') |
220 | 309 | rc = postgresql_reload() # No pending need to bounce, just reload. | ||
222 | 310 | 318 | ||
225 | 311 | if rc == 0 and 'saved_config' in local_state: | 319 | if success: |
226 | 312 | local_state['live_config'] = local_state['saved_config'] | 320 | local_state['saved_config'] = local_state['live_config'] |
227 | 313 | local_state.save() | 321 | local_state.save() |
228 | 314 | 322 | ||
230 | 315 | return rc | 323 | return success |
231 | 316 | 324 | ||
232 | 317 | 325 | ||
233 | 318 | def get_service_port(postgresql_config): | 326 | def get_service_port(postgresql_config): |
234 | @@ -344,8 +352,6 @@ | |||
235 | 344 | config_data["shared_buffers"] = \ | 352 | config_data["shared_buffers"] = \ |
236 | 345 | "%sMB" % (int(int(total_ram) * 0.15),) | 353 | "%sMB" % (int(int(total_ram) * 0.15),) |
237 | 346 | # XXX: This is very messy - should probably be a subordinate charm | 354 | # XXX: This is very messy - should probably be a subordinate charm |
238 | 347 | # file overlaps with __builtin__.file ... renaming to conf_file | ||
239 | 348 | # negronjl | ||
240 | 349 | conf_file = open("/etc/sysctl.d/50-postgresql.conf", "w") | 355 | conf_file = open("/etc/sysctl.d/50-postgresql.conf", "w") |
241 | 350 | conf_file.write("kernel.sem = 250 32000 100 1024\n") | 356 | conf_file.write("kernel.sem = 250 32000 100 1024\n") |
242 | 351 | conf_file.write("kernel.shmall = %s\n" % | 357 | conf_file.write("kernel.shmall = %s\n" % |
243 | @@ -579,7 +585,7 @@ | |||
244 | 579 | 585 | ||
245 | 580 | 586 | ||
246 | 581 | def db_cursor(autocommit=False, db='template1', user='postgres', | 587 | def db_cursor(autocommit=False, db='template1', user='postgres', |
248 | 582 | host=None, timeout=120): | 588 | host=None, timeout=30): |
249 | 583 | import psycopg2 | 589 | import psycopg2 |
250 | 584 | if host: | 590 | if host: |
251 | 585 | conn_str = "dbname={} host={} user={}".format(db, host, user) | 591 | conn_str = "dbname={} host={} user={}".format(db, host, user) |
252 | @@ -855,14 +861,16 @@ | |||
253 | 855 | 861 | ||
254 | 856 | @hooks.hook() | 862 | @hooks.hook() |
255 | 857 | def start(): | 863 | def start(): |
257 | 858 | if not postgresql_restart(): | 864 | if not postgresql_reload_or_restart(): |
258 | 859 | raise SystemExit(1) | 865 | raise SystemExit(1) |
259 | 860 | 866 | ||
260 | 861 | 867 | ||
261 | 862 | @hooks.hook() | 868 | @hooks.hook() |
262 | 863 | def stop(): | 869 | def stop(): |
265 | 864 | if not postgresql_stop(): | 870 | if postgresql_is_running(): |
266 | 865 | raise SystemExit(1) | 871 | with restart_lock(hookenv.local_unit(), True): |
267 | 872 | if not postgresql_stop(): | ||
268 | 873 | raise SystemExit(1) | ||
269 | 866 | 874 | ||
270 | 867 | 875 | ||
271 | 868 | def quote_identifier(identifier): | 876 | def quote_identifier(identifier): |
272 | @@ -1163,7 +1171,7 @@ | |||
273 | 1163 | def db_relation_broken(): | 1171 | def db_relation_broken(): |
274 | 1164 | from psycopg2.extensions import AsIs | 1172 | from psycopg2.extensions import AsIs |
275 | 1165 | 1173 | ||
277 | 1166 | relid = os.environ['JUJU_RELATION_ID'] | 1174 | relid = hookenv.relation_id() |
278 | 1167 | if relid not in local_state['relations']['db']: | 1175 | if relid not in local_state['relations']['db']: |
279 | 1168 | # This was to be a hot standby, but it had not yet got as far as | 1176 | # This was to be a hot standby, but it had not yet got as far as |
280 | 1169 | # receiving and handling credentials from the master. | 1177 | # receiving and handling credentials from the master. |
281 | @@ -1174,7 +1182,7 @@ | |||
282 | 1174 | # we used from there. Instead, we have to persist this information | 1182 | # we used from there. Instead, we have to persist this information |
283 | 1175 | # ourselves. | 1183 | # ourselves. |
284 | 1176 | relation = local_state['relations']['db'][relid] | 1184 | relation = local_state['relations']['db'][relid] |
286 | 1177 | unit_relation_data = relation[os.environ['JUJU_UNIT_NAME']] | 1185 | unit_relation_data = relation[hookenv.local_unit()] |
287 | 1178 | 1186 | ||
288 | 1179 | if local_state['state'] in ('master', 'standalone'): | 1187 | if local_state['state'] in ('master', 'standalone'): |
289 | 1180 | user = unit_relation_data.get('user', None) | 1188 | user = unit_relation_data.get('user', None) |
290 | @@ -1303,27 +1311,75 @@ | |||
291 | 1303 | log("I am already the master", DEBUG) | 1311 | log("I am already the master", DEBUG) |
292 | 1304 | return hookenv.local_unit() | 1312 | return hookenv.local_unit() |
293 | 1305 | 1313 | ||
294 | 1314 | if local_state['state'] == 'hot standby': | ||
295 | 1315 | log("I am already following {}".format( | ||
296 | 1316 | local_state['following']), DEBUG) | ||
297 | 1317 | return local_state['following'] | ||
298 | 1318 | |||
299 | 1319 | replication_relid = hookenv.relation_ids('replication')[0] | ||
300 | 1320 | replication_units = hookenv.related_units(replication_relid) | ||
301 | 1321 | |||
302 | 1322 | if local_state['state'] == 'standalone': | ||
303 | 1323 | log("I'm a standalone unit wanting to participate in replication") | ||
304 | 1324 | existing_replication = False | ||
305 | 1325 | for unit in replication_units: | ||
306 | 1326 | # If another peer thinks it is the master, believe it. | ||
307 | 1327 | remote_state = hookenv.relation_get( | ||
308 | 1328 | 'state', unit, replication_relid) | ||
309 | 1329 | if remote_state == 'master': | ||
310 | 1330 | log("{} thinks it is the master, believing it".format( | ||
311 | 1331 | unit), DEBUG) | ||
312 | 1332 | return unit | ||
313 | 1333 | |||
314 | 1334 | # If we find a peer that isn't standalone, we know | ||
315 | 1335 | # replication has already been setup at some point. | ||
316 | 1336 | if remote_state != 'standalone': | ||
317 | 1337 | existing_replication = True | ||
318 | 1338 | |||
319 | 1339 | # If we are joining a peer relation where replication has | ||
320 | 1340 | # already been setup, but there is currently no master, wait | ||
321 | 1341 | # until one of the remaining participating units has been | ||
322 | 1342 | # promoted to master. Only they have the data we need to | ||
323 | 1343 | # preserve. | ||
324 | 1344 | if existing_replication: | ||
325 | 1345 | log("Peers participating in replication need to elect a master", | ||
326 | 1346 | DEBUG) | ||
327 | 1347 | return None | ||
328 | 1348 | |||
329 | 1349 | # There are no peers claiming to be master, and there is no | ||
330 | 1350 | # election in progress, so lowest numbered unit wins. | ||
331 | 1351 | units = replication_units + [hookenv.local_unit()] | ||
332 | 1352 | master = unit_sorted(units)[0] | ||
333 | 1353 | if master == hookenv.local_unit(): | ||
334 | 1354 | log("I'm Master - lowest numbered unit in new peer group") | ||
335 | 1355 | return master | ||
336 | 1356 | else: | ||
337 | 1357 | log("Waiting on {} to declare itself Master".format(master), DEBUG) | ||
338 | 1358 | return None | ||
339 | 1359 | |||
340 | 1306 | if local_state['state'] == 'failover': | 1360 | if local_state['state'] == 'failover': |
341 | 1307 | former_master = local_state['following'] | 1361 | former_master = local_state['following'] |
342 | 1308 | log("Failover from {}".format(former_master)) | 1362 | log("Failover from {}".format(former_master)) |
343 | 1309 | 1363 | ||
344 | 1310 | units_not_in_failover = set() | 1364 | units_not_in_failover = set() |
361 | 1311 | for relid in hookenv.relation_ids('replication'): | 1365 | candidates = set() |
362 | 1312 | for unit in hookenv.related_units(relid): | 1366 | for unit in replication_units: |
363 | 1313 | if unit == former_master: | 1367 | if unit == former_master: |
364 | 1314 | log("Found dying master {}".format(unit), DEBUG) | 1368 | log("Found dying master {}".format(unit), DEBUG) |
365 | 1315 | continue | 1369 | continue |
366 | 1316 | 1370 | ||
367 | 1317 | relation = hookenv.relation_get(unit=unit, rid=relid) | 1371 | relation = hookenv.relation_get(unit=unit, rid=replication_relid) |
368 | 1318 | 1372 | ||
369 | 1319 | if relation['state'] == 'master': | 1373 | if relation['state'] == 'master': |
370 | 1320 | log( | 1374 | log("{} says it already won the election".format(unit), |
371 | 1321 | "{} says it already won the election".format(unit), | 1375 | INFO) |
372 | 1322 | INFO) | 1376 | return unit |
373 | 1323 | return unit | 1377 | |
374 | 1324 | 1378 | if relation['state'] == 'failover': | |
375 | 1325 | if relation['state'] != 'failover': | 1379 | candidates.add(unit) |
376 | 1326 | units_not_in_failover.add(unit) | 1380 | |
377 | 1381 | elif relation['state'] != 'standalone': | ||
378 | 1382 | units_not_in_failover.add(unit) | ||
379 | 1327 | 1383 | ||
380 | 1328 | if units_not_in_failover: | 1384 | if units_not_in_failover: |
381 | 1329 | log("{} unaware of impending election. Deferring result.".format( | 1385 | log("{} unaware of impending election. Deferring result.".format( |
382 | @@ -1333,35 +1389,24 @@ | |||
383 | 1333 | log("Election in progress") | 1389 | log("Election in progress") |
384 | 1334 | winner = None | 1390 | winner = None |
385 | 1335 | winning_offset = -1 | 1391 | winning_offset = -1 |
397 | 1336 | for relid in hookenv.relation_ids('replication'): | 1392 | candidates.add(hookenv.local_unit()) |
398 | 1337 | candidates = set(hookenv.related_units(relid)) | 1393 | # Sort the unit lists so we get consistent results in a tie |
399 | 1338 | candidates.add(hookenv.local_unit()) | 1394 | # and lowest unit number wins. |
400 | 1339 | candidates.discard(former_master) | 1395 | for unit in unit_sorted(candidates): |
401 | 1340 | # Sort the unit lists so we get consistent results in a tie | 1396 | relation = hookenv.relation_get(unit=unit, rid=replication_relid) |
402 | 1341 | # and lowest unit number wins. | 1397 | if int(relation['wal_received_offset']) > winning_offset: |
403 | 1342 | for unit in unit_sorted(candidates): | 1398 | winner = unit |
404 | 1343 | relation = hookenv.relation_get(unit=unit, rid=relid) | 1399 | winning_offset = int(relation['wal_received_offset']) |
394 | 1344 | if int(relation['wal_received_offset']) > winning_offset: | ||
395 | 1345 | winner = unit | ||
396 | 1346 | winning_offset = int(relation['wal_received_offset']) | ||
405 | 1347 | 1400 | ||
406 | 1348 | # All remaining hot standbys are in failover mode and have | 1401 | # All remaining hot standbys are in failover mode and have |
407 | 1349 | # reported their wal_received_offset. We can declare victory. | 1402 | # reported their wal_received_offset. We can declare victory. |
423 | 1350 | log("{} won the election as is the new master".format(winner)) | 1403 | if winner == hookenv.local_unit(): |
424 | 1351 | return winner | 1404 | log("I won the election, announcing myself winner") |
425 | 1352 | 1405 | return winner | |
426 | 1353 | # Maybe another peer thinks it is the master? | 1406 | else: |
427 | 1354 | for relid in hookenv.relation_ids('replication'): | 1407 | log("Waiting for {} to announce its victory".format(winner), |
428 | 1355 | for unit in hookenv.related_units(relid): | 1408 | DEBUG) |
429 | 1356 | if hookenv.relation_get('state', unit, relid) == 'master': | 1409 | return None |
415 | 1357 | return unit | ||
416 | 1358 | |||
417 | 1359 | # New peer group. Lowest numbered unit will be the master. | ||
418 | 1360 | for relid in hookenv.relation_ids('replication'): | ||
419 | 1361 | units = hookenv.related_units(relid) + [hookenv.local_unit()] | ||
420 | 1362 | master = unit_sorted(units)[0] | ||
421 | 1363 | log("New peer group. {} is elected master".format(master)) | ||
422 | 1364 | return master | ||
430 | 1365 | 1410 | ||
431 | 1366 | 1411 | ||
432 | 1367 | @hooks.hook('replication-relation-joined', 'replication-relation-changed') | 1412 | @hooks.hook('replication-relation-joined', 'replication-relation-changed') |
433 | @@ -1419,10 +1464,7 @@ | |||
434 | 1419 | log("Fresh unit. I will clone {} and become a hot standby".format( | 1464 | log("Fresh unit. I will clone {} and become a hot standby".format( |
435 | 1420 | master)) | 1465 | master)) |
436 | 1421 | 1466 | ||
437 | 1422 | # Before we start destroying anything, ensure that the | ||
438 | 1423 | # master is contactable. | ||
439 | 1424 | master_ip = hookenv.relation_get('private-address', master) | 1467 | master_ip = hookenv.relation_get('private-address', master) |
440 | 1425 | wait_for_db(db='postgres', user='juju_replication', host=master_ip) | ||
441 | 1426 | 1468 | ||
442 | 1427 | clone_database(master, master_ip) | 1469 | clone_database(master, master_ip) |
443 | 1428 | 1470 | ||
444 | @@ -1592,8 +1634,55 @@ | |||
445 | 1592 | os.chdir(org_dir) | 1634 | os.chdir(org_dir) |
446 | 1593 | 1635 | ||
447 | 1594 | 1636 | ||
448 | 1637 | @contextmanager | ||
449 | 1638 | def restart_lock(unit, exclusive): | ||
450 | 1639 | '''Aquire the database restart lock on the given unit. | ||
451 | 1640 | |||
452 | 1641 | A database needing a restart should grab an exclusive lock before | ||
453 | 1642 | doing so. To block a remote database from doing a restart, grab a shared | ||
454 | 1643 | lock. | ||
455 | 1644 | ''' | ||
456 | 1645 | import psycopg2 | ||
457 | 1646 | key = long(config_data['advisory_lock_restart_key']) | ||
458 | 1647 | if exclusive: | ||
459 | 1648 | lock_function = 'pg_advisory_lock' | ||
460 | 1649 | else: | ||
461 | 1650 | lock_function = 'pg_advisory_lock_shared' | ||
462 | 1651 | q = 'SELECT {}({})'.format(lock_function, key) | ||
463 | 1652 | |||
464 | 1653 | # We will get an exception if the database is rebooted while waiting | ||
465 | 1654 | # for a shared lock. If the connection is killed, we retry a few | ||
466 | 1655 | # times to cope. | ||
467 | 1656 | num_retries = 3 | ||
468 | 1657 | |||
469 | 1658 | for count in range(0, num_retries): | ||
470 | 1659 | try: | ||
471 | 1660 | if unit == hookenv.local_unit(): | ||
472 | 1661 | cur = db_cursor(autocommit=True) | ||
473 | 1662 | else: | ||
474 | 1663 | host = hookenv.relation_get('private-address', unit) | ||
475 | 1664 | cur = db_cursor( | ||
476 | 1665 | autocommit=True, db='postgres', | ||
477 | 1666 | user='juju_replication', host=host) | ||
478 | 1667 | cur.execute(q) | ||
479 | 1668 | break | ||
480 | 1669 | except psycopg2.Error: | ||
481 | 1670 | if count == num_retries - 1: | ||
482 | 1671 | raise | ||
483 | 1672 | |||
484 | 1673 | try: | ||
485 | 1674 | yield | ||
486 | 1675 | finally: | ||
487 | 1676 | # Close our connection, swallowing any exceptions as the database | ||
488 | 1677 | # may be being rebooted now we have released our lock. | ||
489 | 1678 | try: | ||
490 | 1679 | del cur | ||
491 | 1680 | except psycopg2.Error: | ||
492 | 1681 | pass | ||
493 | 1682 | |||
494 | 1683 | |||
495 | 1595 | def clone_database(master_unit, master_host): | 1684 | def clone_database(master_unit, master_host): |
497 | 1596 | with pgpass(): | 1685 | with restart_lock(master_unit, False): |
498 | 1597 | postgresql_stop() | 1686 | postgresql_stop() |
499 | 1598 | log("Cloning master {}".format(master_unit)) | 1687 | log("Cloning master {}".format(master_unit)) |
500 | 1599 | 1688 | ||
501 | @@ -1607,9 +1696,10 @@ | |||
502 | 1607 | shutil.rmtree(postgresql_cluster_dir) | 1696 | shutil.rmtree(postgresql_cluster_dir) |
503 | 1608 | 1697 | ||
504 | 1609 | try: | 1698 | try: |
508 | 1610 | # Change directory the postgres user can read. | 1699 | # Change directory the postgres user can read, and need |
509 | 1611 | with switch_cwd('/tmp'): | 1700 | # .pgpass too. |
510 | 1612 | # Run the sudo command. | 1701 | with switch_cwd('/tmp'), pgpass(): |
511 | 1702 | # Clone the master with pg_basebackup. | ||
512 | 1613 | output = subprocess.check_output(cmd, stderr=subprocess.STDOUT) | 1703 | output = subprocess.check_output(cmd, stderr=subprocess.STDOUT) |
513 | 1614 | log(output, DEBUG) | 1704 | log(output, DEBUG) |
514 | 1615 | # Debian by default expects SSL certificates in the datadir. | 1705 | # Debian by default expects SSL certificates in the datadir. |
515 | @@ -1626,8 +1716,8 @@ | |||
516 | 1626 | # can retry hooks again. Even assuming the charm is | 1716 | # can retry hooks again. Even assuming the charm is |
517 | 1627 | # functioning correctly, the clone may still fail | 1717 | # functioning correctly, the clone may still fail |
518 | 1628 | # due to eg. lack of disk space. | 1718 | # due to eg. lack of disk space. |
519 | 1629 | log("Clone failed, db cluster destroyed", ERROR) | ||
520 | 1630 | log(x.output, ERROR) | 1719 | log(x.output, ERROR) |
521 | 1720 | log("Clone failed, local db destroyed", ERROR) | ||
522 | 1631 | if os.path.exists(postgresql_cluster_dir): | 1721 | if os.path.exists(postgresql_cluster_dir): |
523 | 1632 | shutil.rmtree(postgresql_cluster_dir) | 1722 | shutil.rmtree(postgresql_cluster_dir) |
524 | 1633 | if os.path.exists(postgresql_config_dir): | 1723 | if os.path.exists(postgresql_config_dir): |
525 | @@ -1652,6 +1742,15 @@ | |||
526 | 1652 | os.path.join(postgresql_cluster_dir, 'backup_label')) | 1742 | os.path.join(postgresql_cluster_dir, 'backup_label')) |
527 | 1653 | 1743 | ||
528 | 1654 | 1744 | ||
529 | 1745 | def pg_basebackup_is_running(): | ||
530 | 1746 | cur = db_cursor(autocommit=True) | ||
531 | 1747 | cur.execute(""" | ||
532 | 1748 | SELECT count(*) FROM pg_stat_activity | ||
533 | 1749 | WHERE usename='juju_replication' AND application_name='pg_basebackup' | ||
534 | 1750 | """) | ||
535 | 1751 | return cur.fetchone()[0] > 0 | ||
536 | 1752 | |||
537 | 1753 | |||
538 | 1655 | def postgresql_wal_received_offset(): | 1754 | def postgresql_wal_received_offset(): |
539 | 1656 | """How much WAL we have. | 1755 | """How much WAL we have. |
540 | 1657 | 1756 | ||
541 | @@ -1694,7 +1793,7 @@ | |||
542 | 1694 | try: | 1793 | try: |
543 | 1695 | nagios_uid = getpwnam('nagios').pw_uid | 1794 | nagios_uid = getpwnam('nagios').pw_uid |
544 | 1696 | nagios_gid = getgrnam('nagios').gr_gid | 1795 | nagios_gid = getgrnam('nagios').gr_gid |
546 | 1697 | except: | 1796 | except Exception: |
547 | 1698 | hookenv.log("Nagios user not set up.", hookenv.DEBUG) | 1797 | hookenv.log("Nagios user not set up.", hookenv.DEBUG) |
548 | 1699 | return | 1798 | return |
549 | 1700 | 1799 | ||
550 | 1701 | 1800 | ||
551 | === modified file 'test.py' | |||
552 | --- test.py 2013-08-23 09:40:15 +0000 | |||
553 | +++ test.py 2013-08-23 09:40:16 +0000 | |||
554 | @@ -74,12 +74,12 @@ | |||
555 | 74 | return None | 74 | return None |
556 | 75 | 75 | ||
557 | 76 | def deploy(self, charm, name=None, num_units=1): | 76 | def deploy(self, charm, name=None, num_units=1): |
562 | 77 | # The first time we deploy a charm in the test run, it needs to | 77 | # The first time we deploy a local: charm in the test run, it |
563 | 78 | # deploy with --update to ensure we are testing the desired | 78 | # needs to deploy with --update to ensure we are testing the |
564 | 79 | # revision of the charm. Subsequent deploys we do not use | 79 | # desired revision of the charm. Subsequent deploys we do not |
565 | 80 | # --update to avoid overhead and needless incrementing of the | 80 | # use --update to avoid overhead and needless incrementing of the |
566 | 81 | # revision number. | 81 | # revision number. |
568 | 82 | if charm.startswith('cs:') or charm in self._deployed_charms: | 82 | if not charm.startswith('local:') or charm in self._deployed_charms: |
569 | 83 | cmd = ['deploy'] | 83 | cmd = ['deploy'] |
570 | 84 | else: | 84 | else: |
571 | 85 | cmd = ['deploy', '-u'] | 85 | cmd = ['deploy', '-u'] |
572 | @@ -102,7 +102,7 @@ | |||
573 | 102 | self.status = self.get_result(['status']) | 102 | self.status = self.get_result(['status']) |
574 | 103 | return self.status | 103 | return self.status |
575 | 104 | 104 | ||
577 | 105 | def wait_until_ready(self): | 105 | def wait_until_ready(self, extra=45): |
578 | 106 | ready = False | 106 | ready = False |
579 | 107 | while not ready: | 107 | while not ready: |
580 | 108 | self.refresh_status() | 108 | self.refresh_status() |
581 | @@ -128,7 +128,7 @@ | |||
582 | 128 | # enough that our system is probably stable. This means we have | 128 | # enough that our system is probably stable. This means we have |
583 | 129 | # extremely slow and flaky tests, but that is possibly better | 129 | # extremely slow and flaky tests, but that is possibly better |
584 | 130 | # than no tests. | 130 | # than no tests. |
586 | 131 | time.sleep(45) | 131 | time.sleep(extra) |
587 | 132 | 132 | ||
588 | 133 | def setUp(self): | 133 | def setUp(self): |
589 | 134 | DEBUG("JujuFixture.setUp()") | 134 | DEBUG("JujuFixture.setUp()") |
590 | @@ -156,7 +156,7 @@ | |||
591 | 156 | # Per Bug #1190250 (WONTFIX), we need to wait for dying services | 156 | # Per Bug #1190250 (WONTFIX), we need to wait for dying services |
592 | 157 | # to die before we can continue. | 157 | # to die before we can continue. |
593 | 158 | if found_services: | 158 | if found_services: |
595 | 159 | self.wait_until_ready() | 159 | self.wait_until_ready(0) |
596 | 160 | 160 | ||
597 | 161 | # We shouldn't reuse machines, as we have no guarantee they are | 161 | # We shouldn't reuse machines, as we have no guarantee they are |
598 | 162 | # still in a usable state, so tear them down too. Per | 162 | # still in a usable state, so tear them down too. Per |
599 | @@ -305,15 +305,18 @@ | |||
600 | 305 | self.juju.do(['add-relation', 'postgresql:db', 'psql:db']) | 305 | self.juju.do(['add-relation', 'postgresql:db', 'psql:db']) |
601 | 306 | self.juju.wait_until_ready() | 306 | self.juju.wait_until_ready() |
602 | 307 | 307 | ||
612 | 308 | # On a freshly setup service, lowest numbered unit is always the | 308 | # Even on a freshly setup service, we have no idea which unit |
613 | 309 | # master. | 309 | # will become the master as we have no control over which two |
614 | 310 | units = unit_sorted( | 310 | # units join the peer relation first. |
615 | 311 | self.juju.status['services']['postgresql']['units'].keys()) | 311 | units = sorted((self.is_master(unit), unit) |
616 | 312 | master_unit, standby_unit_1, standby_unit_2 = units | 312 | for unit in |
617 | 313 | 313 | self.juju.status['services']['postgresql']['units'].keys()) | |
618 | 314 | self.assertIs(True, self.is_master(master_unit)) | 314 | self.assertFalse(units[0][0]) |
619 | 315 | self.assertIs(False, self.is_master(standby_unit_1)) | 315 | self.assertFalse(units[1][0]) |
620 | 316 | self.assertIs(False, self.is_master(standby_unit_2)) | 316 | self.assertTrue(units[2][0]) |
621 | 317 | standby_unit_1 = units[0][1] | ||
622 | 318 | standby_unit_2 = units[1][1] | ||
623 | 319 | master_unit = units[2][1] | ||
624 | 317 | 320 | ||
625 | 318 | self.sql('CREATE TABLE Token (x int)', master_unit) | 321 | self.sql('CREATE TABLE Token (x int)', master_unit) |
626 | 319 | 322 | ||
627 | @@ -390,11 +393,18 @@ | |||
628 | 390 | self.juju.do(['add-relation', 'postgresql:db-admin', 'psql:db-admin']) | 393 | self.juju.do(['add-relation', 'postgresql:db-admin', 'psql:db-admin']) |
629 | 391 | self.juju.wait_until_ready() | 394 | self.juju.wait_until_ready() |
630 | 392 | 395 | ||
636 | 393 | # On a freshly setup service, lowest numbered unit is always the | 396 | # Even on a freshly setup service, we have no idea which unit |
637 | 394 | # master. | 397 | # will become the master as we have no control over which two |
638 | 395 | units = unit_sorted( | 398 | # units join the peer relation first. |
639 | 396 | self.juju.status['services']['postgresql']['units'].keys()) | 399 | units = sorted((self.is_master(unit, 'postgres'), unit) |
640 | 397 | master_unit, standby_unit_1, standby_unit_2 = units | 400 | for unit in |
641 | 401 | self.juju.status['services']['postgresql']['units'].keys()) | ||
642 | 402 | self.assertFalse(units[0][0]) | ||
643 | 403 | self.assertFalse(units[1][0]) | ||
644 | 404 | self.assertTrue(units[2][0]) | ||
645 | 405 | standby_unit_1 = units[0][1] | ||
646 | 406 | standby_unit_2 = units[1][1] | ||
647 | 407 | master_unit = units[2][1] | ||
648 | 398 | 408 | ||
649 | 399 | # Shutdown PostgreSQL on standby_unit_1 and ensure | 409 | # Shutdown PostgreSQL on standby_unit_1 and ensure |
650 | 400 | # standby_unit_2 will have received more WAL information from | 410 | # standby_unit_2 will have received more WAL information from |