Merge bootstack-ops:plus65_retry into bootstack-ops:master

Proposed by Giuseppe Petralia
Status: Merged
Approved by: David O Neill
Approved revision: ad2d84ff1a67cf43e543877dc2af1145471e1872
Merged at revision: 58aaa04c3d7d1b2e8486ed007f41af74e3936542
Proposed branch: bootstack-ops:plus65_retry
Merge into: bootstack-ops:master
Diff against target: 167 lines (+56/-29)
1 file modified
bootstack-ops/cloud_report.py (+56/-29)
Reviewer Review Type Date Requested Status
David O Neill (community) Approve
Review via email: mp+365904@code.launchpad.net

Commit message

Add sleep retry on MAAS connection errors

To post a comment you must log in.
Revision history for this message
🤖 Canonical IS Merge Bot (canonical-is-mergebot) wrote :

This merge proposal is being monitored by mergebot. Change the status to Approved to merge.

Revision history for this message
David O Neill (dmzoneill) :
review: Approve
Revision history for this message
🤖 Canonical IS Merge Bot (canonical-is-mergebot) wrote :

Change successfully merged at revision 58aaa04c3d7d1b2e8486ed007f41af74e3936542

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1diff --git a/bootstack-ops/cloud_report.py b/bootstack-ops/cloud_report.py
2index df3fdc6..f13a5d3 100644
3--- a/bootstack-ops/cloud_report.py
4+++ b/bootstack-ops/cloud_report.py
5@@ -50,6 +50,7 @@ from gevent.pywsgi import WSGIServer
6 from maas.client.viscera.controllers import RackController, RegionController
7 from oauthlib import oauth1
8 import requests
9+from aiohttp.client_exceptions import ServerDisconnectedError
10
11
12 JUJU_DATA = os.environ.get('JUJU_DATA')
13@@ -65,6 +66,9 @@ API_PORT = os.environ.get("API_PORT", 5000)
14
15 UNKNOWN_VALUE = "unknown"
16
17+RETRY = 10
18+SLEEP = 2
19+
20
21 def get_timestamp():
22 timestamp = time.time()
23@@ -87,6 +91,15 @@ def url_join(base, uri):
24 return base + uri
25
26
27+def retry(call_to_retry, num=RETRY):
28+ for i in range(0, num):
29+ try:
30+ return call_to_retry()
31+ except Exception:
32+ time.sleep(SLEEP * i)
33+ print("Error: failed to retrieve machines from maas [{}/{}]".format(i+1,num))
34+
35+
36 class MaasV2(object):
37 def __init__(self):
38 self.client = None
39@@ -117,46 +130,57 @@ class MaasV2(object):
40 else:
41 self.maas_endpoint = MAAS_ENDPOINT
42
43- try:
44- if MAAS_PASSWORD:
45- self.client = login(
46- self.maas_endpoint,
47- username=MAAS_USERNAME, password=MAAS_PASSWORD,
48- )
49- elif MAAS_APIKEY:
50- self.client = connect(
51- self.maas_endpoint,
52- apikey=MAAS_APIKEY
53- )
54- except Exception as e:
55- print("Error connecting to MAAS endpoint: {}. {}".format(self.maas_endpoint, e))
56-
57- return self.client is not None
58+ for i in range(0, RETRY):
59+ try:
60+ if MAAS_PASSWORD:
61+ self.client = login(
62+ self.maas_endpoint,
63+ username=MAAS_USERNAME, password=MAAS_PASSWORD,
64+ )
65+ elif MAAS_APIKEY:
66+ self.client = connect(
67+ self.maas_endpoint,
68+ apikey=MAAS_APIKEY
69+ )
70+ return self.client is not None
71+ except Exception as e:
72+ print("Error connecting to MAAS endpoint: {}. {}".format(self.maas_endpoint, e))
73+ time.sleep(SLEEP * i)
74
75 def disconnect(self):
76 pass
77
78 def _list_machines(self):
79 try:
80- return self.client.machines.list()
81+ return retry(self.client.machines.list) or []
82 except Exception as e:
83 print("Error retrieving machines list from MAAS endpoint: {}. {}".format(self.maas_endpoint, e))
84 return []
85
86 def _list_rack_controllers(self):
87 try:
88- return self.client.rack_controllers.list()
89+ return retry(self.client.rack_controllers.list) or []
90 except Exception as e:
91 print("Error retrieving rack controllers list from MAAS endpoint: {}. {}".format(self.maas_endpoint, e))
92 return []
93
94 def _list_region_controllers(self):
95 try:
96- return self.client.region_controllers.list()
97+ return retry(self.client.region_controllers.list) or []
98 except Exception as e:
99 print("Error retrieving region controllers list from MAAS endpoint: {}. {}".format(self.maas_endpoint, e))
100 return []
101
102+ def _get_maas_version(self):
103+ version = "None"
104+ report_version = retry(self.client.version.get)
105+ if report_version:
106+ version = report_version.version
107+ return version
108+
109+ def _get_power_parameters(self, m):
110+ return retry(m.get_power_parameters) or {}
111+
112 def _serialize_machine(self, m):
113 """
114 Convert a Maas Machine into a dict.
115@@ -172,7 +196,7 @@ class MaasV2(object):
116 else:
117 status = m.status.name.lower()
118 power_type = m.power_type.lower()
119- power_address = m.get_power_parameters().get('power_address', UNKNOWN_VALUE)
120+ power_address = self._get_power_parameters(m).get('power_address', UNKNOWN_VALUE)
121
122 mac_addresses = [net_if.mac_address.lower() for net_if in m.interfaces]
123
124@@ -198,7 +222,7 @@ class MaasV2(object):
125 :return: json report
126 """
127
128- maas_version = self.client.version.get().version
129+ maas_version = self._get_maas_version()
130
131 machines = list(map(self._serialize_machine, self._list_machines())) + \
132 list(map(self._serialize_machine, self._list_rack_controllers())) + \
133@@ -274,8 +298,8 @@ class MaasV1(object):
134 signature_method=oauth1.SIGNATURE_PLAINTEXT)
135
136 url = url_join(self.maas_endpoint, uri)
137- uri, headers, body = client.sign(url)
138- response = requests.get(uri, headers=headers)
139+ uri, headers, body = retry(client.sign(url)) or [None, None, None]
140+ response = retry(requests.get(uri, headers=headers)) or ""
141 if response.status_code == 200:
142 return response.json()
143 except Exception as e:
144@@ -374,13 +398,16 @@ class Juju(object):
145 else:
146 cmd = ['snap run juju', 'status', '--format=yaml']
147
148- try:
149- output = subprocess.check_output(cmd, env=env)
150- parsed = yaml.safe_load(output)
151- machines = parsed['machines']
152- except subprocess.CalledProcessError as e:
153- print("Exception calling juju status command. {}".format(e))
154-
155+ for i in range(0, RETRY):
156+ try:
157+ output = subprocess.check_output(cmd, timeout=30, env=env)
158+ parsed = yaml.safe_load(output)
159+ machines = parsed['machines']
160+ i = RETRY + 1
161+ except Exception as e:
162+ print("Error: failed to retrieve machines from juju [{}/{}]\n{}".format(i+1,RETRY,e))
163+ time.sleep(SLEEP * i)
164+
165 return machines
166
167 def connect(self):

Subscribers

People subscribed via source and target branches

to all changes: