Merge lp:~hopem/charm-helpers/retry-crm-resource-check-if-not-running into lp:charm-helpers

Proposed by Edward Hope-Morley
Status: Merged
Merged at revision: 275
Proposed branch: lp:~hopem/charm-helpers/retry-crm-resource-check-if-not-running
Merge into: lp:charm-helpers
Diff against target: 173 lines (+90/-17)
3 files modified
charmhelpers/contrib/hahelpers/cluster.py (+27/-12)
charmhelpers/core/decorators.py (+41/-0)
tests/contrib/hahelpers/test_cluster_utils.py (+22/-5)
To merge this branch: bzr merge lp:~hopem/charm-helpers/retry-crm-resource-check-if-not-running
Reviewer Review Type Date Requested Status
Jorge Niedbalski (community) Approve
OpenStack Charmers Pending
James Page Pending
Review via email: mp+245023@code.launchpad.net

This proposal supersedes a proposal from 2014-12-17.

To post a comment you must log in.
Revision history for this message
Jorge Niedbalski (niedbalski) wrote : Posted in a previous version of this proposal

Nevertheless that corosync resource synchronization should be a pretty quick process, this workaround seems like a good addition for cases in which this is not happening quickly.

- Do you mind to move the nice retry_on_exception method to charmhelpers.core.decorators ?
- I found a minor python 3 issue trying to decode a string. (commented)

Other than that LGTM.

review: Needs Fixing
Revision history for this message
Jorge Niedbalski (niedbalski) wrote :

Thanks Edward, I made a small change for fixing the tests , checking if the current status is already a valid text_type, if not it cast to a text_type and encodes it on utf-8.

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'charmhelpers/contrib/hahelpers/cluster.py'
2--- charmhelpers/contrib/hahelpers/cluster.py 2014-11-26 08:22:54 +0000
3+++ charmhelpers/contrib/hahelpers/cluster.py 2014-12-17 17:49:23 +0000
4@@ -13,6 +13,7 @@
5
6 import subprocess
7 import os
8+
9 from socket import gethostname as get_unit_hostname
10
11 import six
12@@ -28,12 +29,19 @@
13 WARNING,
14 unit_get,
15 )
16+from charmhelpers.core.decorators import (
17+ retry_on_exception,
18+)
19
20
21 class HAIncompleteConfig(Exception):
22 pass
23
24
25+class CRMResourceNotFound(Exception):
26+ pass
27+
28+
29 def is_elected_leader(resource):
30 """
31 Returns True if the charm executing this is the elected cluster leader.
32@@ -68,24 +76,31 @@
33 return False
34
35
36-def is_crm_leader(resource):
37+@retry_on_exception(5, base_delay=2, exc_type=CRMResourceNotFound)
38+def is_crm_leader(resource, retry=False):
39 """
40 Returns True if the charm calling this is the elected corosync leader,
41 as returned by calling the external "crm" command.
42+
43+ We allow this operation to be retried to avoid the possibility of getting a
44+ false negative. See LP #1396246 for more info.
45 """
46- cmd = [
47- "crm", "resource",
48- "show", resource
49- ]
50+ cmd = ['crm', 'resource', 'show', resource]
51 try:
52- status = subprocess.check_output(cmd).decode('UTF-8')
53+ status = subprocess.check_output(cmd, stderr=subprocess.STDOUT)
54+ if not six.PY3:
55+ status = status.decode("utf-8")
56+
57 except subprocess.CalledProcessError:
58- return False
59- else:
60- if get_unit_hostname() in status:
61- return True
62- else:
63- return False
64+ status = None
65+
66+ if status and get_unit_hostname() in status:
67+ return True
68+
69+ if status and "resource %s is NOT running" % (resource) in status:
70+ raise CRMResourceNotFound("CRM resource %s not found" % (resource))
71+
72+ return False
73
74
75 def is_leader(resource):
76
77=== added file 'charmhelpers/core/decorators.py'
78--- charmhelpers/core/decorators.py 1970-01-01 00:00:00 +0000
79+++ charmhelpers/core/decorators.py 2014-12-17 17:49:23 +0000
80@@ -0,0 +1,41 @@
81+#
82+# Copyright 2014 Canonical Ltd.
83+#
84+# Authors:
85+# Edward Hope-Morley <opentastic@gmail.com>
86+#
87+
88+import time
89+
90+from charmhelpers.core.hookenv import (
91+ log,
92+ INFO,
93+)
94+
95+
96+def retry_on_exception(num_retries, base_delay=0, exc_type=Exception):
97+ """If the decorated function raises exception exc_type, allow num_retries
98+ retry attempts before raise the exception.
99+ """
100+ def _retry_on_exception_inner_1(f):
101+ def _retry_on_exception_inner_2(*args, **kwargs):
102+ retries = num_retries
103+ multiplier = 1
104+ while True:
105+ try:
106+ return f(*args, **kwargs)
107+ except exc_type:
108+ if not retries:
109+ raise
110+
111+ delay = base_delay * multiplier
112+ multiplier += 1
113+ log("Retrying '%s' %d more times (delay=%s)" %
114+ (f.__name__, retries, delay), level=INFO)
115+ retries -= 1
116+ if delay:
117+ time.sleep(delay)
118+
119+ return _retry_on_exception_inner_2
120+
121+ return _retry_on_exception_inner_1
122
123=== modified file 'tests/contrib/hahelpers/test_cluster_utils.py'
124--- tests/contrib/hahelpers/test_cluster_utils.py 2014-11-26 08:22:54 +0000
125+++ tests/contrib/hahelpers/test_cluster_utils.py 2014-12-17 17:49:23 +0000
126@@ -1,5 +1,5 @@
127
128-from mock import patch, MagicMock
129+from mock import patch, MagicMock, call
130
131 from subprocess import CalledProcessError
132 from testtools import TestCase
133@@ -48,19 +48,36 @@
134 check_output.return_value = crm
135 self.assertTrue(cluster_utils.is_crm_leader('vip'))
136
137+ @patch('charmhelpers.core.decorators.time')
138 @patch('subprocess.check_output')
139- def test_is_not_leader(self, check_output):
140+ def test_is_not_leader(self, check_output, mock_time):
141 '''It determines its unit is not leader'''
142 self.get_unit_hostname.return_value = 'node1'
143 crm = b'resource vip is running on: node2'
144 check_output.return_value = crm
145 self.assertFalse(cluster_utils.is_crm_leader('some_resource'))
146-
147- @patch('subprocess.check_output')
148- def test_is_crm_leader_no_cluster(self, check_output):
149+ self.assertFalse(mock_time.called)
150+
151+ @patch('charmhelpers.core.decorators.log')
152+ @patch('charmhelpers.core.decorators.time')
153+ @patch('subprocess.check_output')
154+ def test_is_not_leader_resource_not_exists(self, check_output, mock_time,
155+ mock_log):
156+ '''It determines its unit is not leader'''
157+ self.get_unit_hostname.return_value = 'node1'
158+ check_output.return_value = "resource vip is NOT running"
159+ self.assertRaises(cluster_utils.CRMResourceNotFound,
160+ cluster_utils.is_crm_leader, 'vip')
161+ mock_time.assert_has_calls([call.sleep(2), call.sleep(4),
162+ call.sleep(6)])
163+
164+ @patch('charmhelpers.core.decorators.time')
165+ @patch('subprocess.check_output')
166+ def test_is_crm_leader_no_cluster(self, check_output, mock_time):
167 '''It is not leader if there is no cluster up'''
168 check_output.side_effect = CalledProcessError(1, 'crm')
169 self.assertFalse(cluster_utils.is_crm_leader('vip'))
170+ self.assertFalse(mock_time.called)
171
172 def test_peer_units(self):
173 '''It lists all peer units for cluster relation'''

Subscribers

People subscribed via source and target branches