Merge lp:~stub/charms/precise/postgresql/fix-failover-tests into lp:charms/postgresql
- Precise Pangolin (12.04)
- fix-failover-tests
- Merge into trunk
Proposed by
Stuart Bishop
Status: | Merged |
---|---|
Merged at revision: | 94 |
Proposed branch: | lp:~stub/charms/precise/postgresql/fix-failover-tests |
Merge into: | lp:charms/postgresql |
Prerequisite: | lp:~stub/charms/precise/postgresql/pgtune |
Diff against target: |
761 lines (+312/-172) 3 files modified
README.md (+16/-6) test.py (+279/-143) testing/jujufixture.py (+17/-23) |
To merge this branch: | bzr merge lp:~stub/charms/precise/postgresql/fix-failover-tests |
Related bugs: |
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
Marco Ceppi (community) | Approve | ||
Review via email: mp+214316@code.launchpad.net |
Commit message
Description of the change
Rework tests and fix bugs to get the failover tests running reliably.
PG 9.1 tests now complete, except for syslog due to Bug #1301361 (maybe others will have better luck)
PG 9.2 and 9.3 tests are failing due to package signing issues with the PGDG archive, which I'll look into and deal with in another branch.
To post a comment you must log in.
Preview Diff
[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1 | === modified file 'README.md' |
2 | --- README.md 2014-02-13 13:47:55 +0000 |
3 | +++ README.md 2014-04-04 17:46:15 +0000 |
4 | @@ -49,7 +49,9 @@ |
5 | |
6 | To deploy a new service containing a 'master' and two 'hot standbys':: |
7 | |
8 | - juju deploy -n 3 postgresql pg-b |
9 | + juju deploy -n 2 postgresql pg-b |
10 | + [ ... wait until units are stable ... ] |
11 | + juju add-unit pg-b |
12 | |
13 | You can remove units as normal. If the master unit is removed, failover occurs |
14 | and the most up to date 'hot standby' is promoted to 'master'. The |
15 | @@ -72,12 +74,18 @@ |
16 | |
17 | ## Known Limitations and Issues |
18 | |
19 | -- Do not attempt to relate client charms to a PostgreSQL service containing |
20 | +⚠ Due to current [limitations][1] with juju, you cannot reliably |
21 | +create a service initially containing more than 2 units (eg. juju deploy |
22 | +-n 3 postgresql). Instead, you must first create a service with 2 units. |
23 | +Once the environment is stable and all the hooks have finished running, |
24 | +you may add more units. |
25 | + |
26 | +⚠ Do not attempt to relate client charms to a PostgreSQL service containing |
27 | multiple units unless you know the charm supports a replicated service. |
28 | |
29 | -- You cannot host multiple units in a single juju container. This is |
30 | - problematic as some PostgreSQL features, such as tablespaces, use user |
31 | -specified absolute paths. |
32 | +⚠ To host multiple units on a single server, you must use an lxc |
33 | +container. |
34 | + |
35 | |
36 | # Interacting with the Postgresql Service |
37 | |
38 | @@ -136,7 +144,7 @@ |
39 | |
40 | - `programname`: the syslog 'programname' identifying this unit's |
41 | PostgreSQL logs. |
42 | -- `log_line_prefix`: the 'log_line_prefix' setting for the PostgreSQL |
43 | +- `log_line_prefix`: the `log_line_prefix` setting for the PostgreSQL |
44 | service. |
45 | |
46 | |
47 | @@ -242,3 +250,5 @@ |
48 | - [PostgreSQL bug submission |
49 | guidelines](http://www.postgresql.org/docs/9.2/static/bug-reporting.html) |
50 | - [PostgreSQL Mailing List](http://www.postgresql.org/list/) |
51 | + |
52 | + [1]: https://bugs.launchpad.net/charms/+source/postgresql/+bug/1258485 |
53 | |
54 | === modified file 'test.py' |
55 | --- test.py 2014-04-04 17:46:15 +0000 |
56 | +++ test.py 2014-04-04 17:46:15 +0000 |
57 | @@ -28,6 +28,10 @@ |
58 | PSQL_CHARM = 'cs:postgresql-psql' |
59 | |
60 | |
61 | +class NotReady(Exception): |
62 | + pass |
63 | + |
64 | + |
65 | class PostgreSQLCharmBaseTestCase(object): |
66 | |
67 | # Override these in subclasses to run these tests multiple times |
68 | @@ -59,7 +63,7 @@ |
69 | if timeout > 0: |
70 | self.useFixture(fixtures.Timeout(timeout, gentle=True)) |
71 | |
72 | - def wait_until_ready(self): |
73 | + def wait_until_ready(self, pg_units, relation=True): |
74 | |
75 | # Per Bug #1200267, it is impossible to know when a juju |
76 | # environment is actually ready for testing. Instead, we do the |
77 | @@ -67,79 +71,171 @@ |
78 | # is at this particular instant in the expected state, hoping |
79 | # that the system is stable enough to continue testing. |
80 | |
81 | - class NotReady(Exception): |
82 | - pass |
83 | + timeout = time.time() + 180 |
84 | + pg_units = frozenset(pg_units) |
85 | |
86 | - timeout = time.time() + 300 |
87 | + # The list of PG units we expect to be related to the psql unit. |
88 | + if relation: |
89 | + rel_pg_units = frozenset(pg_units) |
90 | + else: |
91 | + rel_pg_units = frozenset() |
92 | |
93 | while True: |
94 | try: |
95 | - self.juju.wait_until_ready(0) |
96 | - # Confirm the db and db-admin relations are all in a useful |
97 | - # state. |
98 | + self.juju.wait_until_ready(0) # Also refreshes status |
99 | + |
100 | + status_pg_units = set(self.juju.status[ |
101 | + 'services']['postgresql']['units'].keys()) |
102 | + |
103 | + if pg_units != status_pg_units: |
104 | + # Confirm the PG units reported by 'juju status' |
105 | + # match the list we expect. |
106 | + raise NotReady('units not yet added/removed') |
107 | + |
108 | for psql_unit in self.juju.status['services']['psql']['units']: |
109 | - psql_rel_info = self.juju.relation_info(psql_unit) |
110 | - if not psql_rel_info: |
111 | - raise NotReady('No relations') |
112 | - for rel_name in psql_rel_info: |
113 | - for rel_id, rel_info in ( |
114 | - psql_rel_info[rel_name].items()): |
115 | - num_pg_units = len([ |
116 | - k for k in rel_info.keys() |
117 | - if k.startswith('postgresql/')]) |
118 | - if num_pg_units == 0: |
119 | - raise NotReady( |
120 | - '{} has no postgres units'.format(rel_id)) |
121 | - requested_db = rel_info['psql/0'].get( |
122 | - 'database', None) |
123 | - num_masters = 0 |
124 | - for unit, unit_rel_info in rel_info.items(): |
125 | - if not unit_rel_info: |
126 | - raise NotReady( |
127 | - '{} {} is not setup'.format( |
128 | - unit, rel_id)) |
129 | - if not unit.startswith('postgresql/'): |
130 | - continue |
131 | - if 'user' not in unit_rel_info: |
132 | - raise NotReady( |
133 | - '{} has no user'.format(unit)) |
134 | - if 'database' not in unit_rel_info: |
135 | - raise NotReady( |
136 | - '{} has no database'.format(unit)) |
137 | - if requested_db and (unit_rel_info['database'] |
138 | - != requested_db): |
139 | - raise NotReady( |
140 | - '{} not using requested db {}'.format( |
141 | - unit, requested_db)) |
142 | - if 'state' not in unit_rel_info: |
143 | - raise NotReady( |
144 | - '{} has no state'.format(unit)) |
145 | - state = unit_rel_info['state'] |
146 | - if state == 'standalone': |
147 | - if num_pg_units > 1: |
148 | - raise NotReady( |
149 | - '{} is standalone'.format(unit)) |
150 | - elif state == 'master': |
151 | - num_masters += 1 |
152 | - elif state not in ('master', 'hot standby'): |
153 | - raise NotReady( |
154 | - '{} in {} state'.format(unit, state)) |
155 | - allowed_units = unit_rel_info.get( |
156 | - 'allowed-units', '').split() |
157 | - if psql_unit not in allowed_units: |
158 | - raise NotReady( |
159 | - '{} not yet authorized by {} ' |
160 | - '({})'.format( |
161 | - psql_unit, unit, allowed_units)) |
162 | - if num_pg_units > 1 and num_masters != 1: |
163 | - raise NotReady( |
164 | - '{} masters'.format(num_masters)) |
165 | + self.confirm_psql_unit_ready(psql_unit, rel_pg_units) |
166 | + |
167 | + for pg_unit in pg_units: |
168 | + peers = [u for u in pg_units if u != pg_unit] |
169 | + self.confirm_postgresql_unit_ready(pg_unit, peers) |
170 | + |
171 | return |
172 | except NotReady: |
173 | if time.time() > timeout: |
174 | raise |
175 | time.sleep(3) |
176 | |
177 | + def confirm_psql_unit_ready(self, psql_unit, pg_units): |
178 | + # Confirm the db and db-admin relations are all in a useful |
179 | + # state. |
180 | + psql_rel_info = self.juju.relation_info(psql_unit) |
181 | + if pg_units and not psql_rel_info: |
182 | + raise NotReady('{} waiting for relations'.format(psql_unit)) |
183 | + elif not pg_units and psql_rel_info: |
184 | + raise NotReady('{} waiting to drop relations'.format(psql_unit)) |
185 | + elif not pg_units and not psql_rel_info: |
186 | + return |
187 | + |
188 | + psql_service = psql_unit.split('/', 1)[0] |
189 | + |
190 | + # The set of PostgreSQL units related to the psql unit. They |
191 | + # might be related via several db or db-admin relations. |
192 | + all_rel_pg_units = set() |
193 | + |
194 | + for rel_name in psql_rel_info: |
195 | + for rel_id, rel_info in psql_rel_info[rel_name].items(): |
196 | + |
197 | + # The database this relation has requested to use, if any. |
198 | + requested_db = rel_info[psql_unit].get('database', None) |
199 | + |
200 | + rel_pg_units = ( |
201 | + [u for u in rel_info if not u.startswith(psql_service)]) |
202 | + all_rel_pg_units = all_rel_pg_units.union(rel_pg_units) |
203 | + |
204 | + num_masters = 0 |
205 | + |
206 | + for unit in rel_pg_units: |
207 | + unit_rel_info = rel_info[unit] |
208 | + |
209 | + # PG unit must be presenting the correct database. |
210 | + if 'database' not in unit_rel_info: |
211 | + raise NotReady( |
212 | + '{} has no database'.format(unit)) |
213 | + if requested_db and ( |
214 | + unit_rel_info['database'] != requested_db): |
215 | + raise NotReady( |
216 | + '{} not using requested db {}'.format( |
217 | + unit, requested_db)) |
218 | + |
219 | + # PG unit must be in a valid state. |
220 | + state = unit_rel_info.get('state', None) |
221 | + if not state: |
222 | + raise NotReady( |
223 | + '{} has no state'.format(unit)) |
224 | + elif state == 'standalone': |
225 | + if len(rel_pg_units) > 1: |
226 | + raise NotReady( |
227 | + '{} is standalone'.format(unit)) |
228 | + elif state == 'master': |
229 | + num_masters += 1 |
230 | + elif state != 'hot standby': |
231 | + # Failover state or totally broken. |
232 | + raise NotReady( |
233 | + '{} in {} state'.format(unit, state)) |
234 | + |
235 | + # PG unit must have authorized this psql client. |
236 | + allowed_units = unit_rel_info.get( |
237 | + 'allowed-units', '').split() |
238 | + if psql_unit not in allowed_units: |
239 | + raise NotReady( |
240 | + '{} not yet authorized by {} ({})'.format( |
241 | + psql_unit, unit, allowed_units)) |
242 | + |
243 | + # We must not have multiple masters in this relation. |
244 | + if len(rel_pg_units) > 1 and num_masters != 1: |
245 | + raise NotReady( |
246 | + '{} masters'.format(num_masters)) |
247 | + |
248 | + if pg_units != all_rel_pg_units: |
249 | + raise NotReady( |
250 | + 'Expected PG units {} != related units {}'.format( |
251 | + pg_units, all_rel_pg_units)) |
252 | + |
253 | + def confirm_postgresql_unit_ready(self, pg_unit, peers=()): |
254 | + pg_rel_info = self.juju.relation_info(pg_unit) |
255 | + if not pg_rel_info: |
256 | + raise NotReady('{} has no relations'.format(pg_unit)) |
257 | + |
258 | + try: |
259 | + rep_rel_id = pg_rel_info['replication'].keys()[0] |
260 | + actual_peers = set([ |
261 | + u for u in pg_rel_info['replication'][rep_rel_id].keys() |
262 | + if u != pg_unit]) |
263 | + except (IndexError, KeyError): |
264 | + if peers: |
265 | + raise NotReady('Peer relation does not exist') |
266 | + rep_rel_id = None |
267 | + actual_peers = set() |
268 | + |
269 | + if actual_peers != set(peers): |
270 | + raise NotReady('Expecting {} peers, found {}'.format( |
271 | + peers, actual_peers)) |
272 | + |
273 | + if not peers: |
274 | + return |
275 | + |
276 | + pg_rep_rel_info = pg_rel_info['replication'][rep_rel_id].get( |
277 | + pg_unit, None) |
278 | + if not pg_rep_rel_info: |
279 | + raise NotReady('{} has not yet joined the peer relation'.format( |
280 | + pg_unit)) |
281 | + |
282 | + state = pg_rep_rel_info.get('state', None) |
283 | + |
284 | + if not state: |
285 | + raise NotReady('{} has no state'.format(pg_unit)) |
286 | + |
287 | + if state == 'standalone' and peers: |
288 | + raise NotReady('{} is standalone but has peers'.format(pg_unit)) |
289 | + |
290 | + if state not in ('standalone', 'master', 'hot standby'): |
291 | + raise NotReady('{} reports failover in progress'.format(pg_unit)) |
292 | + |
293 | + num_masters = 1 if state in ('master', 'standalone') else 0 |
294 | + |
295 | + for peer in peers: |
296 | + peer_rel_info = pg_rel_info['replication'][rep_rel_id][peer] |
297 | + peer_state = peer_rel_info.get('state', None) |
298 | + if not peer_state: |
299 | + raise NotReady('{} has no peer state'.format(peer)) |
300 | + if peer_state == 'master': |
301 | + num_masters += 1 |
302 | + elif peer_state != 'hot standby': |
303 | + raise NotReady('Peer {} in state {}'.format(peer, peer_state)) |
304 | + |
305 | + if num_masters != 1: |
306 | + raise NotReady('No masters seen from {}'.format(pg_unit)) |
307 | + |
308 | def sql(self, sql, postgres_unit=None, psql_unit=None, dbname=None): |
309 | '''Run some SQL on postgres_unit from psql_unit. |
310 | |
311 | @@ -191,40 +287,51 @@ |
312 | local_port = s.getsockname()[1] |
313 | s.close() |
314 | |
315 | - # Open the tunnel and wait for it to come up |
316 | + # Open the tunnel and wait for it to come up. The new process |
317 | + # group is to ensure we can reap all the ssh tunnels, as simply |
318 | + # killing the 'juju ssh' process doesn't seem to be enough. |
319 | tunnel_cmd = [ |
320 | - 'juju', 'ssh', psql_unit, |
321 | - '-N', '-L', |
322 | + 'juju', 'ssh', psql_unit, '-N', '-L', |
323 | '{}:{}:{}'.format(local_port, rel_info['host'], rel_info['port'])] |
324 | tunnel_proc = subprocess.Popen( |
325 | - tunnel_cmd, stdin=subprocess.PIPE, |
326 | - stdout=subprocess.PIPE, stderr=subprocess.STDOUT) |
327 | + tunnel_cmd, stdin=subprocess.PIPE) |
328 | + # Don't disable stdout, so we can see when there are SSH |
329 | + # failures like bad host keys. |
330 | + #stdout=open('/dev/null', 'ab'), stderr=subprocess.STDOUT) |
331 | + tunnel_proc.stdin.close() |
332 | |
333 | - timeout = time.time() + 30 |
334 | - while True: |
335 | - try: |
336 | - socket.create_connection(('localhost', local_port)).close() |
337 | - break |
338 | - except socket.error: |
339 | + try: |
340 | + timeout = time.time() + 60 |
341 | + while True: |
342 | + time.sleep(1) |
343 | assert tunnel_proc.poll() is None, 'Tunnel died {!r}'.format( |
344 | tunnel_proc.stdout) |
345 | - if time.time() > timeout: |
346 | - raise |
347 | - time.sleep(0.25) |
348 | + try: |
349 | + socket.create_connection(('localhost', local_port)).close() |
350 | + break |
351 | + except socket.error: |
352 | + if time.time() > timeout: |
353 | + # Its not going to work. Per Bug #802117, this |
354 | + # is likely an invalid host key forcing |
355 | + # tunnelling to be disabled. |
356 | + raise |
357 | |
358 | - # Execute the query |
359 | - con = psycopg2.connect( |
360 | - database=dbname, port=local_port, host='localhost', |
361 | - user=rel_info['user'], password=rel_info['password']) |
362 | - cur = con.cursor() |
363 | - cur.execute(sql) |
364 | - if cur.description is None: |
365 | - rv = None |
366 | - else: |
367 | - rv = cur.fetchall() |
368 | - con.commit() |
369 | - tunnel_proc.kill() |
370 | - return rv |
371 | + # Execute the query |
372 | + con = psycopg2.connect( |
373 | + database=dbname, port=local_port, host='localhost', |
374 | + user=rel_info['user'], password=rel_info['password']) |
375 | + cur = con.cursor() |
376 | + cur.execute(sql) |
377 | + if cur.description is None: |
378 | + rv = None |
379 | + else: |
380 | + rv = cur.fetchall() |
381 | + con.commit() |
382 | + con.close() |
383 | + return rv |
384 | + finally: |
385 | + tunnel_proc.kill() |
386 | + tunnel_proc.wait() |
387 | |
388 | def pg_ctlcluster(self, unit, command): |
389 | cmd = [ |
390 | @@ -237,21 +344,21 @@ |
391 | self.juju.deploy(TEST_CHARM, 'postgresql', config=self.pg_config) |
392 | self.juju.deploy(PSQL_CHARM, 'psql') |
393 | self.juju.do(['add-relation', 'postgresql:db', 'psql:db']) |
394 | - self.wait_until_ready() |
395 | + self.wait_until_ready(['postgresql/0']) |
396 | |
397 | result = self.sql('SELECT TRUE') |
398 | self.assertEqual(result, [(True,)]) |
399 | |
400 | # Confirm that the relation tears down without errors. |
401 | self.juju.do(['destroy-relation', 'postgresql:db', 'psql:db']) |
402 | - self.wait_until_ready() |
403 | + self.wait_until_ready(['postgresql/0'], relation=False) |
404 | |
405 | def test_streaming_replication(self): |
406 | self.juju.deploy( |
407 | TEST_CHARM, 'postgresql', num_units=2, config=self.pg_config) |
408 | self.juju.deploy(PSQL_CHARM, 'psql') |
409 | self.juju.do(['add-relation', 'postgresql:db', 'psql:db']) |
410 | - self.wait_until_ready() |
411 | + self.wait_until_ready(['postgresql/0', 'postgresql/1']) |
412 | |
413 | # Confirm that the slave has successfully opened a streaming |
414 | # replication connection. |
415 | @@ -267,7 +374,7 @@ |
416 | self.juju.deploy(PSQL_CHARM, 'psql') |
417 | self.juju.do(['add-relation', 'postgresql:db-admin', 'psql:db-admin']) |
418 | self.juju.do(['expose', 'postgresql']) |
419 | - self.wait_until_ready() |
420 | + self.wait_until_ready(['postgresql/0']) |
421 | |
422 | result = self.sql('SELECT TRUE', dbname='postgres') |
423 | self.assertEqual(result, [(True,)]) |
424 | @@ -275,7 +382,7 @@ |
425 | # Confirm that the relation tears down without errors. |
426 | self.juju.do([ |
427 | 'destroy-relation', 'postgresql:db-admin', 'psql:db-admin']) |
428 | - self.wait_until_ready() |
429 | + self.wait_until_ready(['postgresql/0'], relation=False) |
430 | |
431 | def is_master(self, postgres_unit, dbname=None): |
432 | is_master = self.sql( |
433 | @@ -285,11 +392,16 @@ |
434 | |
435 | def test_failover(self): |
436 | """Set up a multi-unit service and perform failovers.""" |
437 | + # Per Bug #1258485, creating a 3 unit service will often fail. |
438 | + # Instead, create a 2 unit service, wait for it to be ready, |
439 | + # then add a third unit. |
440 | + self.juju.deploy(PSQL_CHARM, 'psql') |
441 | self.juju.deploy( |
442 | - TEST_CHARM, 'postgresql', num_units=3, config=self.pg_config) |
443 | - self.juju.deploy(PSQL_CHARM, 'psql') |
444 | + TEST_CHARM, 'postgresql', num_units=2, config=self.pg_config) |
445 | self.juju.do(['add-relation', 'postgresql:db', 'psql:db']) |
446 | - self.wait_until_ready() |
447 | + self.wait_until_ready(['postgresql/0', 'postgresql/1']) |
448 | + self.juju.add_unit('postgresql') |
449 | + self.wait_until_ready(['postgresql/0', 'postgresql/1', 'postgresql/2']) |
450 | |
451 | # Even on a freshly setup service, we have no idea which unit |
452 | # will become the master as we have no control over which two |
453 | @@ -334,7 +446,7 @@ |
454 | |
455 | # Remove the master unit. |
456 | self.juju.do(['remove-unit', master_unit]) |
457 | - self.wait_until_ready() |
458 | + self.wait_until_ready([standby_unit_1, standby_unit_2]) |
459 | |
460 | # When we failover, the unit that has received the most WAL |
461 | # information from the old master (most in sync) is elected the |
462 | @@ -357,7 +469,7 @@ |
463 | |
464 | # Remove the master again, leaving a single unit. |
465 | self.juju.do(['remove-unit', master_unit]) |
466 | - self.wait_until_ready() |
467 | + self.wait_until_ready([standby_unit]) |
468 | |
469 | # Last unit is a working, standalone database. |
470 | self.is_master(standby_unit) |
471 | @@ -377,11 +489,16 @@ |
472 | |
473 | def test_failover_election(self): |
474 | """Ensure master elected in a failover is the best choice""" |
475 | + # Per Bug #1258485, creating a 3 unit service will often fail. |
476 | + # Instead, create a 2 unit service, wait for it to be ready, |
477 | + # then add a third unit. |
478 | self.juju.deploy( |
479 | - TEST_CHARM, 'postgresql', num_units=3, config=self.pg_config) |
480 | + TEST_CHARM, 'postgresql', num_units=2, config=self.pg_config) |
481 | self.juju.deploy(PSQL_CHARM, 'psql') |
482 | self.juju.do(['add-relation', 'postgresql:db-admin', 'psql:db-admin']) |
483 | - self.wait_until_ready() |
484 | + self.wait_until_ready(['postgresql/0', 'postgresql/1']) |
485 | + self.juju.add_unit('postgresql') |
486 | + self.wait_until_ready(['postgresql/0', 'postgresql/1', 'postgresql/2']) |
487 | |
488 | # Even on a freshly setup service, we have no idea which unit |
489 | # will become the master as we have no control over which two |
490 | @@ -414,12 +531,12 @@ |
491 | |
492 | # Failover. |
493 | self.juju.do(['remove-unit', master_unit]) |
494 | - self.wait_until_ready() |
495 | + self.wait_until_ready([standby_unit_1, standby_unit_2]) |
496 | |
497 | # Fix replication. |
498 | self.sql( |
499 | "ALTER ROLE juju_replication REPLICATION", |
500 | - standby_unit_2, dbname='postgres') |
501 | + 'master', dbname='postgres') |
502 | |
503 | # Ensure the election went as predicted. |
504 | self.assertIs(True, self.is_master(standby_unit_2, 'postgres')) |
505 | @@ -437,7 +554,7 @@ |
506 | self.juju.deploy(TEST_CHARM, 'postgresql', config=self.pg_config) |
507 | self.juju.deploy(PSQL_CHARM, 'psql') |
508 | self.juju.do(['add-relation', 'postgresql:db-admin', 'psql:db-admin']) |
509 | - self.wait_until_ready() |
510 | + self.wait_until_ready(['postgresql/0']) |
511 | |
512 | # Determine the IP address that the unit will see. |
513 | unit = self.juju.status['services']['postgresql']['units'].keys()[0] |
514 | @@ -478,14 +595,22 @@ |
515 | self.assertEquals(1, cur.fetchone()[0]) |
516 | |
517 | def test_explicit_database(self): |
518 | - self.juju.deploy(TEST_CHARM, 'postgresql', config=self.pg_config) |
519 | + # Two units to ensure both masters and hot standbys |
520 | + # present the correct credentials. |
521 | + self.juju.deploy( |
522 | + TEST_CHARM, 'postgresql', num_units=2, config=self.pg_config) |
523 | self.juju.deploy(PSQL_CHARM, 'psql') |
524 | self.juju.do(['set', 'psql', 'database=explicit']) |
525 | self.juju.do(['add-relation', 'postgresql:db', 'psql:db']) |
526 | - self.wait_until_ready() |
527 | - |
528 | - result = self.sql('SELECT current_database()')[0][0] |
529 | - self.assertEqual(result, 'explicit') |
530 | + |
531 | + pg_units = ['postgresql/0', 'postgresql/1'] |
532 | + self.wait_until_ready(pg_units) |
533 | + |
534 | + for unit in pg_units: |
535 | + result = self.sql('SELECT current_database()', unit)[0][0] |
536 | + self.assertEqual( |
537 | + result, 'explicit', |
538 | + '{} reports incorrect db {}'.format(unit, result)) |
539 | |
540 | def test_roles_granted(self): |
541 | # We use two units to confirm that there is no attempt to |
542 | @@ -494,7 +619,8 @@ |
543 | TEST_CHARM, 'postgresql', num_units=2, config=self.pg_config) |
544 | self.juju.deploy(PSQL_CHARM, 'psql', config={'roles': 'role_a'}) |
545 | self.juju.do(['add-relation', 'postgresql:db', 'psql:db']) |
546 | - self.wait_until_ready() |
547 | + pg_units = ['postgresql/0', 'postgresql/1'] |
548 | + self.wait_until_ready(pg_units) |
549 | |
550 | has_role_a = self.sql(''' |
551 | SELECT pg_has_role(current_user, 'role_a', 'MEMBER') |
552 | @@ -502,13 +628,14 @@ |
553 | self.assertTrue(has_role_a) |
554 | |
555 | self.juju.do(['set', 'psql', 'roles=role_a,role_b']) |
556 | - self.wait_until_ready() |
557 | + self.wait_until_ready(pg_units) |
558 | |
559 | # Retry this for a while. Per Bug #1200267, we can't tell when |
560 | # the hooks have finished running and the role has been granted. |
561 | # We could make the PostgreSQL charm provide feedback on when |
562 | - # the role has actually been granted and wait for that, but we |
563 | - # don't want to complicate the interface any more than we must. |
564 | + # the role has actually been granted and wait for that, but that |
565 | + # is complex as hot standbys need to wait until the master has |
566 | + # performed the grant and the grant has replicated. |
567 | timeout = time.time() + 60 |
568 | while True: |
569 | try: |
570 | @@ -531,7 +658,8 @@ |
571 | TEST_CHARM, 'postgresql', num_units=2, config=self.pg_config) |
572 | self.juju.deploy(PSQL_CHARM, 'psql', config={'roles': 'role_a,role_b'}) |
573 | self.juju.do(['add-relation', 'postgresql:db', 'psql:db']) |
574 | - self.wait_until_ready() |
575 | + pg_units = ['postgresql/0', 'postgresql/1'] |
576 | + self.wait_until_ready(pg_units) |
577 | |
578 | has_role_a, has_role_b = self.sql(''' |
579 | SELECT |
580 | @@ -542,37 +670,38 @@ |
581 | self.assertTrue(has_role_b) |
582 | |
583 | self.juju.do(['set', 'psql', 'roles=role_c']) |
584 | - self.wait_until_ready() |
585 | + self.wait_until_ready(pg_units) |
586 | |
587 | - # Per Bug #1200267, we have to sleep here and hope. We have no |
588 | - # way of knowing how many of the three pending role changes have |
589 | + # Per Bug #1200267, we have to retry a while here and hope. |
590 | + # We have of knowing when the pending role changes have |
591 | # actually been applied. |
592 | - time.sleep(30) |
593 | - |
594 | - has_role_a, has_role_b, has_role_c = self.sql(''' |
595 | - SELECT |
596 | - pg_has_role(current_user, 'role_a', 'MEMBER'), |
597 | - pg_has_role(current_user, 'role_b', 'MEMBER'), |
598 | - pg_has_role(current_user, 'role_c', 'MEMBER') |
599 | - ''')[0] |
600 | + timeout = time.time() + 60 |
601 | + while time.time() < timeout: |
602 | + has_role_a, has_role_b, has_role_c = self.sql(''' |
603 | + SELECT |
604 | + pg_has_role(current_user, 'role_a', 'MEMBER'), |
605 | + pg_has_role(current_user, 'role_b', 'MEMBER'), |
606 | + pg_has_role(current_user, 'role_c', 'MEMBER') |
607 | + ''')[0] |
608 | + if has_role_c: |
609 | + break |
610 | self.assertFalse(has_role_a) |
611 | self.assertFalse(has_role_b) |
612 | self.assertTrue(has_role_c) |
613 | |
614 | self.juju.do(['unset', 'psql', 'roles']) |
615 | - self.wait_until_ready() |
616 | - |
617 | - # Per Bug #1200267, we have to sleep here and hope. We have no |
618 | - # way of knowing how many of the three pending role changes have |
619 | - # actually been applied. |
620 | - time.sleep(30) |
621 | - |
622 | - has_role_a, has_role_b, has_role_c = self.sql(''' |
623 | - SELECT |
624 | - pg_has_role(current_user, 'role_a', 'MEMBER'), |
625 | - pg_has_role(current_user, 'role_b', 'MEMBER'), |
626 | - pg_has_role(current_user, 'role_c', 'MEMBER') |
627 | - ''')[0] |
628 | + self.wait_until_ready(pg_units) |
629 | + |
630 | + timeout = time.time() + 60 |
631 | + while True: |
632 | + has_role_a, has_role_b, has_role_c = self.sql(''' |
633 | + SELECT |
634 | + pg_has_role(current_user, 'role_a', 'MEMBER'), |
635 | + pg_has_role(current_user, 'role_b', 'MEMBER'), |
636 | + pg_has_role(current_user, 'role_c', 'MEMBER') |
637 | + ''')[0] |
638 | + if not has_role_c: |
639 | + break |
640 | self.assertFalse(has_role_a) |
641 | self.assertFalse(has_role_b) |
642 | self.assertFalse(has_role_c) |
643 | @@ -588,7 +717,8 @@ |
644 | self.juju.deploy('cs:rsyslog', 'rsyslog', num_units=2) |
645 | self.juju.do([ |
646 | 'add-relation', 'postgresql:syslog', 'rsyslog:aggregator']) |
647 | - self.wait_until_ready() |
648 | + pg_units = ['postgresql/0', 'postgresql/1'] |
649 | + self.wait_until_ready(pg_units) |
650 | |
651 | token = str(uuid.uuid1()) |
652 | |
653 | @@ -603,9 +733,15 @@ |
654 | self.failUnless('hot standby {}'.format(token) in out) |
655 | |
656 | # Confirm that the relation tears down correctly. |
657 | - self.juju.do([ |
658 | - 'destroy-relation', 'postgresql:syslog', 'rsyslog:aggregator']) |
659 | - self.wait_until_ready() |
660 | + self.juju.do(['destroy-service', 'rsyslog:aggregator']) |
661 | + timeout = time.time() + 60 |
662 | + while time.time() < timeout: |
663 | + status = self.juju.refresh_status() |
664 | + if 'rsyslog' in status['services']: |
665 | + break |
666 | + self.assert_( |
667 | + 'rsyslog' not in status['services'], 'rsyslog failed to die') |
668 | + self.wait_until_ready(pg_units) |
669 | |
670 | |
671 | class PG91Tests( |
672 | |
673 | === modified file 'testing/jujufixture.py' |
674 | --- testing/jujufixture.py 2014-04-04 17:46:15 +0000 |
675 | +++ testing/jujufixture.py 2014-04-04 17:46:15 +0000 |
676 | @@ -20,8 +20,6 @@ |
677 | def __init__(self, reuse_machines=False, do_teardown=True): |
678 | super(JujuFixture, self).__init__() |
679 | |
680 | - self._deployed_charms = set() |
681 | - |
682 | self.reuse_machines = reuse_machines |
683 | |
684 | # Optionally, don't teardown services and machines after running |
685 | @@ -45,16 +43,7 @@ |
686 | return None |
687 | |
688 | def deploy(self, charm, name=None, num_units=1, config=None): |
689 | - # The first time we deploy a local: charm in the test run, it |
690 | - # needs to deploy with --update to ensure we are testing the |
691 | - # desired revision of the charm. Subsequent deploys we do not |
692 | - # use --update to avoid overhead and needless incrementing of the |
693 | - # revision number. |
694 | - if not charm.startswith('local:') or charm in self._deployed_charms: |
695 | - cmd = ['deploy'] |
696 | - else: |
697 | - cmd = ['deploy', '-u'] |
698 | - self._deployed_charms.add(charm) |
699 | + cmd = ['deploy'] |
700 | |
701 | if config: |
702 | config_path = os.path.join( |
703 | @@ -76,15 +65,12 @@ |
704 | cmd.extend(['--to', str(self._free_machines.pop())]) |
705 | self.do(cmd) |
706 | if num_units > 1: |
707 | - self.add_unit(charm, name, num_units - 1) |
708 | + self.add_unit(name, num_units - 1) |
709 | else: |
710 | cmd.extend(['-n', str(num_units)]) |
711 | self.do(cmd) |
712 | |
713 | - def add_unit(self, charm, name=None, num_units=1): |
714 | - if name is None: |
715 | - name = charm.split(':', 1)[-1] |
716 | - |
717 | + def add_unit(self, name, num_units=1): |
718 | num_units_spawned = 0 |
719 | while self.reuse_machines and self._free_machines: |
720 | cmd = ['add-unit', '--to', str(self._free_machines.pop()), name] |
721 | @@ -108,9 +94,9 @@ |
722 | and m.get('life', None) not in ('dead', 'dying') |
723 | and m.get('agent-state', 'pending') in ('started', 'ready')) |
724 | for service in self.status.get('services', {}).values(): |
725 | - for unit in service.get('units', []): |
726 | + for unit in service.get('units', {}).values(): |
727 | if 'machine' in unit: |
728 | - self._free_machines.remove(int(unit['machine'])) |
729 | + self._free_machines.discard(int(unit['machine'])) |
730 | |
731 | return self.status |
732 | |
733 | @@ -123,16 +109,24 @@ |
734 | relation_names = [] |
735 | for service_name, service_info in self.status['services'].items(): |
736 | if service_name == unit.split('/')[0]: |
737 | - relation_names = service_info['relations'].keys() |
738 | + relation_names = service_info.get('relations', {}).keys() |
739 | break |
740 | |
741 | res = {} |
742 | juju_run_cmd = ['juju', 'run', '--unit', unit] |
743 | for rel_name in relation_names: |
744 | + try: |
745 | + relation_ids = run( |
746 | + self, juju_run_cmd + [ |
747 | + 'relation-ids {}'.format(rel_name)]).split() |
748 | + except subprocess.CalledProcessError: |
749 | + # Per Bug #1298819, we can't ask the unit which relation |
750 | + # names are active so we need to use the relation names |
751 | + # reported by 'juju status'. This may cause us to |
752 | + # request relation information that the unit is not yet |
753 | + # aware of. |
754 | + continue |
755 | res[rel_name] = {} |
756 | - relation_ids = run( |
757 | - self, juju_run_cmd + [ |
758 | - 'relation-ids {}'.format(rel_name)]).split() |
759 | for rel_id in relation_ids: |
760 | res[rel_name][rel_id] = {} |
761 | relation_units = [unit] + run( |
LGTM, +1