Merge lp:~hloeung/ubuntu-repository-cache/metadata-sync-to-peers-less-juju into lp:ubuntu-repository-cache

Proposed by Haw Loeung
Status: Merged
Approved by: Haw Loeung
Approved revision: 386
Merged at revision: 383
Proposed branch: lp:~hloeung/ubuntu-repository-cache/metadata-sync-to-peers-less-juju
Merge into: lp:ubuntu-repository-cache
Diff against target: 414 lines (+142/-83)
10 files modified
hooks/hooks.py (+24/-28)
lib/ubuntu_repository_cache/metadata_cleanup.py (+60/-0)
lib/ubuntu_repository_cache/metadata_sync.py (+1/-1)
lib/ubuntu_repository_cache/mirror.py (+10/-16)
lib/ubuntu_repository_cache/service.py (+33/-29)
templates/cron/ubuntu-repository-cache-config (+2/-1)
templates/cron/ubuntu-repository-cache-cron (+5/-1)
tests/110-multi_unit.simple (+2/-2)
tests/140-failover.simple (+4/-4)
tests/util.py (+1/-1)
To merge this branch: bzr merge lp:~hloeung/ubuntu-repository-cache/metadata-sync-to-peers-less-juju
Reviewer Review Type Date Requested Status
James Simpson Approve
Ubuntu Repository Cache Charmers, Canonical Pending
Review via email: mp+428686@code.launchpad.net

Commit message

Untie metadata sync from relying on Juju

To post a comment you must log in.
Revision history for this message
🤖 Canonical IS Merge Bot (canonical-is-mergebot) wrote :

This merge proposal is being monitored by mergebot. Change the status to Approved to merge.

384. By Haw Loeung

Fixed missing newline before EOF causing cron to fail to load our crontab

385. By Haw Loeung

Fixed here too

Revision history for this message
James Simpson (jsimpso) wrote :

LGTM, a couple of comments in-line but nothing worth blocking on!

review: Approve
Revision history for this message
Haw Loeung (hloeung) :
386. By Haw Loeung

Add missing metric, also rename ubuntu_repository_cache_metadata_sync_total_duration to ubuntu_repository_cache_metadata_sync_duration making it consistent

Revision history for this message
🤖 Canonical IS Merge Bot (canonical-is-mergebot) wrote :

Change successfully merged at revision 383

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'hooks/hooks.py'
2--- hooks/hooks.py 2022-07-22 06:33:43 +0000
3+++ hooks/hooks.py 2022-08-24 00:09:04 +0000
4@@ -9,7 +9,6 @@
5 from charmhelpers.core import hookenv
6
7 from lib.ubuntu_repository_cache import (
8- metadata_sync,
9 mirror,
10 service,
11 util,
12@@ -51,9 +50,10 @@
13 metadata is ready."""
14
15 LOG('Notifying peers of metadata update')
16+ urc_config = parse_config()
17 # We can't use hookenv.config() here because the persistent config
18 # permissions are restricted to root:root 0600 - LP:1828658
19- apache_root = parse_config()['APACHE_ROOT']
20+ apache_root = urc_config['APACHE_ROOT']
21 apache_data = '/'.join((apache_root, 'data'))
22
23 meta_ver = sys.argv[1]
24@@ -62,42 +62,38 @@
25 return
26
27 link_next = '/'.join((apache_data, 'ubuntu_next'))
28-
29 meta_dir = os.path.realpath(link_next)
30
31+ peers = urc_config['PEERS'].split()
32+ LOG('Peers: {}'.format(' '.join(peers)))
33+
34 start_time = time.time()
35
36 # Push the metadata to peers
37- if hookenv.is_relation_made('cluster'):
38- dest = '/'.join((apache_data, meta_ver))
39- link_dir = '/'.join((apache_data, 'ubuntu_active'))
40- LOG('Pushing updates to peers', hookenv.DEBUG)
41- source = meta_dir + '/'
42- successful_peers = mirror.rsync_to_peers(source, dest, link_dir=link_dir)
43-
44- # Push metaversion_good canary to indicate a complete sync
45- canary = '_'.join((meta_dir, 'good'))
46- LOG('Pushing canary files to peers', hookenv.DEBUG)
47- mirror.rsync_to_peers(canary, apache_data, peers=successful_peers)
48-
49- # Update 'meta_version' relation setting
50- LOG('', hookenv.DEBUG)
51- ids = hookenv.relation_ids('cluster') or []
52- LOG('Updating meta_version for ids %s' % ids, hookenv.DEBUG)
53- for r_id in ids:
54- LOG('Setting meta_version=%s' % meta_ver, hookenv.DEBUG)
55- hookenv.relation_set(relation_id=r_id, meta_version=meta_ver)
56+ dest = '/'.join((apache_data, meta_ver))
57+ link_dir = '/'.join((apache_data, 'ubuntu_active'))
58+ LOG('Pushing updates to peers', hookenv.DEBUG)
59+ source = meta_dir + '/'
60+ successful_peers = mirror.rsync_to_peers(source, dest, link_dir=link_dir, peers=peers)
61+
62+ if sorted(successful_peers) != sorted(peers):
63+ LOG('Failed to sync latest metadata to all peers, aborting')
64+ return
65+
66+ # Push metaversion_good canary to indicate a complete sync
67+ canary = '_'.join((meta_dir, 'good'))
68+ LOG('Pushing canary files to peers', hookenv.DEBUG)
69+ mirror.rsync_to_peers(canary, apache_data, peers=successful_peers)
70+
71+ # Update ubuntu_active/ symlink
72+ LOG('Updating ubuntu_active symlink', hookenv.DEBUG)
73+ util.update_active_symlink(os.path.basename(meta_dir), apache_root)
74+ mirror.rsync_to_peers(link_dir, apache_data, peers=successful_peers)
75
76 duration = time.time() - start_time
77 metric_name = 'ubuntu_repository_cache_metadata_sync_peers_duration'
78 util.send_to_influx(util.render_influx(metric_name, '', duration))
79
80- util.update_active_symlink(os.path.basename(meta_dir), apache_root)
81- metadata_sync.clean_metadata_dir(apache_root, LOG)
82- # Sync doesn't change apache2 configs so doesn't require
83- # graceful. Just start it up if it isn't running.
84- service.start(apache2_start_only=True)
85-
86
87 if __name__ == "__main__":
88 # execute a hook based on the name the program is called by
89
90=== added file 'lib/ubuntu_repository_cache/metadata_cleanup.py'
91--- lib/ubuntu_repository_cache/metadata_cleanup.py 1970-01-01 00:00:00 +0000
92+++ lib/ubuntu_repository_cache/metadata_cleanup.py 2022-08-24 00:09:04 +0000
93@@ -0,0 +1,60 @@
94+#! /usr/bin/python3
95+
96+import os
97+import time
98+
99+from .metadata_sync import (
100+ clean_metadata_dir,
101+)
102+from .util import (
103+ FileSemaphore,
104+ SemaphoreExistsError,
105+ render_influx,
106+ send_to_influx,
107+)
108+
109+
110+# Log file and directory for the repository sync cron job
111+REPO_SYNC_DIR = '/var/log/ubuntu-repository-cache'
112+REPO_SYNC_LOG = os.path.join(REPO_SYNC_DIR, 'metadata-cleanup.log')
113+
114+
115+SEMAPHORE_FILE = '/tmp/metadata-cleanup-running'
116+
117+
118+def main(environment, log):
119+ apache_root = environment['APACHE_ROOT']
120+ clean_metadata_dir(apache_root, log)
121+
122+
123+if __name__ == "__main__":
124+
125+ # Use the logging infrastructure to print messages with timestamps to
126+ # stdout. This is a cheap way to get good timestamps. Ideally we should
127+ # move to leverage the logging infrastructure better. ~tribaal
128+ import logging
129+
130+ file_handler = logging.FileHandler(REPO_SYNC_LOG)
131+ file_handler.setLevel(logging.INFO)
132+
133+ basic_formatter = logging.Formatter('%(asctime)s - %(message)s')
134+ file_handler.setFormatter(basic_formatter)
135+
136+ cache_logger = logging.getLogger('ubuntu-repository-cache')
137+ cache_logger.setLevel(logging.INFO)
138+ cache_logger.addHandler(file_handler)
139+
140+ environment = os.environ
141+
142+ start_time = time.time()
143+ try:
144+ with FileSemaphore(SEMAPHORE_FILE):
145+ cache_logger.info("Metadata cleanup started from metadata_cleanup.py:__main__")
146+ main(environment, cache_logger.info)
147+ cache_logger.info("Metadata cleanup from __main__ ended.")
148+
149+ duration = time.time() - start_time
150+ metric_name = 'ubuntu_repository_cache_metadata_cleanup_duration'
151+ send_to_influx(render_influx(metric_name, '', duration))
152+ except SemaphoreExistsError:
153+ cache_logger.info("Can't acquire semaphore: {} already exists.".format(SEMAPHORE_FILE))
154
155=== modified file 'lib/ubuntu_repository_cache/metadata_sync.py'
156--- lib/ubuntu_repository_cache/metadata_sync.py 2022-08-22 04:17:09 +0000
157+++ lib/ubuntu_repository_cache/metadata_sync.py 2022-08-24 00:09:04 +0000
158@@ -621,7 +621,7 @@
159 cache_logger.info("Metadata sync from __main__ ended.")
160
161 duration = time.time() - start_time
162- metric_name = 'ubuntu_repository_cache_metadata_sync_total_duration'
163+ metric_name = 'ubuntu_repository_cache_metadata_sync_duration'
164 send_to_influx(render_influx(metric_name, 'upstream_host={}'.format(environment["SYNC_HOST"]), duration))
165 except SemaphoreExistsError:
166 cache_logger.info("Can't acquire semaphore: {} already exists.".format(SEMAPHORE_FILE))
167
168=== modified file 'lib/ubuntu_repository_cache/mirror.py'
169--- lib/ubuntu_repository_cache/mirror.py 2022-08-22 04:00:31 +0000
170+++ lib/ubuntu_repository_cache/mirror.py 2022-08-24 00:09:04 +0000
171@@ -11,7 +11,6 @@
172 CalledProcessError,
173 )
174
175-from charmhelpers.contrib import unison
176 from charmhelpers.contrib.hahelpers import cluster
177
178 from charmhelpers.core import (
179@@ -76,24 +75,19 @@
180 """
181 Use rsync to push a directory to all configured peers.
182
183- Peers are configured in the 'cluster' relationship using
184- charmhelpers.contrib.unison.ssh_authorized_peers(); this list can be
185- overridden with the peers parameter.
186-
187 A list of the peers whose transfers were successful will be
188 returned.
189 """
190
191- if peers is None:
192- peers = unison.collect_authed_hosts(peer_interface='cluster')
193-
194 successful_peers = []
195- for peer in peers:
196- rsync_dest = '{}@{}:{}'.format(user, peer, dest_dir)
197- LOG('Syncing {} to {}.'.format(source_dir, rsync_dest))
198+ for sync_peer in peers:
199+ (peer_unit, peer_addr) = sync_peer.split(':')
200+
201+ rsync_dest = '{}@{}:{}'.format(user, peer_addr, dest_dir)
202+ LOG('Syncing {} to {} ({}) {}.'.format(source_dir, peer_unit, peer_addr, rsync_dest))
203
204 metric_name = 'ubuntu_repository_cache_metadata_sync_peers_failures'
205- label = 'peer={}'.format(peer)
206+ label = 'peer={}'.format(peer_unit)
207 failures = 0
208
209 errmsgs = []
210@@ -109,7 +103,7 @@
211 # Save the exact exception and reuse when we need to
212 # reraise it after exhausting all available attempts.
213 exp = e
214- msg = 'Sync to {} failed'.format(peer)
215+ msg = 'Sync to {} ({}) failed'.format(peer_unit, peer_addr)
216 LOG(msg)
217 LOG(e.output.decode())
218 errmsgs.append("{}: {}".format(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), msg))
219@@ -118,13 +112,13 @@
220 sleep = random.randint(attempt, 10 * attempt)
221 time.sleep(sleep)
222 else:
223- successful_peers.append(peer)
224- LOG('Sync to {} complete.'.format(peer))
225+ successful_peers.append(sync_peer)
226+ LOG('Sync to {} ({}) complete.'.format(peer_unit, peer_addr))
227 break
228 else:
229 util.send_to_influx(util.render_influx(metric_name, label, max_retries))
230 print('\n'.join(errmsgs))
231- msg = "Failed after all available attempts ({}) with peer {}".format(max_retries, peer)
232+ msg = "Failed after all available attempts ({}) with peer {} ({})".format(max_retries, peer_unit, peer_addr)
233 LOG(msg)
234 print("{}: {}".format(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), msg))
235 sys.stdout.flush()
236
237=== modified file 'lib/ubuntu_repository_cache/service.py'
238--- lib/ubuntu_repository_cache/service.py 2022-08-22 04:17:09 +0000
239+++ lib/ubuntu_repository_cache/service.py 2022-08-24 00:09:04 +0000
240@@ -265,36 +265,40 @@
241
242 local_unit = hookenv.local_unit()
243
244- # Cron job for metadata update on leader
245- cron_filename = '/etc/cron.d/ubuntu-repository-cache_rsync'
246+ # Clean up old file
247+ old_cron_filename = '/etc/cron.d/ubuntu-repository-cache_rsync'
248+ if os.path.exists(old_cron_filename):
249+ os.unlink(old_cron_filename)
250+
251+ # Cron job for metadata update
252+ cron_filename = '/etc/cron.d/ubuntu-repository-cache'
253+ cron_context = {}
254+ if not unitdata.kv().get('rsync-minutes'):
255+ # Don't schedule an rsync within 15 minutes of startup
256+ minutes = (random.randint(0, 43) + time.gmtime().tm_min + 15) % 60
257+ unitdata.kv().set('rsync-minutes', minutes)
258+ cron_context['ApacheRoot'] = unitdata.kv().get('apache-root')
259+ cron_context['SyncHost'] = config['sync-host']
260+ cron_context['PathBase'] = config['path-base']
261+ cron_context['RsyncModule'] = config['rsync-module']
262+ cron_context['Minutes'] = unitdata.kv().get('rsync-minutes')
263+ cron_context['UnitId'] = hookenv.local_unit()
264+ cron_context['UnitPath'] = '-'.join(('unit', hookenv.local_unit().replace('/', '-')))
265+ cron_context['MirrorSeries'] = config['mirror-series'].strip()
266+ cron_context['ConfigPath'] = urc_config_path
267+ cron_context['CharmEnvPath'] = os.path.join(hookenv.charm_dir(), 'bin', 'charm-env')
268+ # Add Juju model name and AZ so it's easier to work out which region sent out cron emails.
269+ cron_context['JujuModelName'] = os.environ.get('JUJU_MODEL_NAME')
270+ cron_context['JujuAvailabilityZone'] = os.environ.get('JUJU_AVAILABILITY_ZONE')
271+ peers = []
272 if (config['leader_unit'] == '' and cluster.is_elected_leader(None)) or (local_unit == config['leader_unit']):
273- cron_context = {}
274- if not unitdata.kv().get('rsync-minutes'):
275- # Don't schedule an rsync within 15 minutes of startup
276- minutes = (random.randint(0, 43) + time.gmtime().tm_min + 15) % 60
277- unitdata.kv().set('rsync-minutes', minutes)
278- cron_context['ApacheRoot'] = unitdata.kv().get('apache-root')
279- cron_context['SyncHost'] = config['sync-host']
280- cron_context['PathBase'] = config['path-base']
281- cron_context['RsyncModule'] = config['rsync-module']
282- cron_context['Minutes'] = unitdata.kv().get('rsync-minutes')
283- cron_context['UnitId'] = hookenv.local_unit()
284- cron_context['UnitPath'] = '-'.join(('unit', hookenv.local_unit().replace('/', '-')))
285- cron_context['MirrorSeries'] = config['mirror-series'].strip()
286- cron_context['ConfigPath'] = urc_config_path
287- cron_context['CharmEnvPath'] = os.path.join(hookenv.charm_dir(), 'bin', 'charm-env')
288- # Add Juju model name and AZ so it's easier to work out which region sent out cron emails.
289- cron_context['JujuModelName'] = os.environ.get('JUJU_MODEL_NAME')
290- cron_context['JujuAvailabilityZone'] = os.environ.get('JUJU_AVAILABILITY_ZONE')
291- templating.render('cron/ubuntu-repository-cache_rsync.cron', cron_filename, cron_context)
292- templating.render('cron/ubuntu-repository-cache-config', urc_config_path, cron_context)
293- else:
294- try:
295- os.stat(cron_filename)
296- except OSError:
297- pass
298- else:
299- os.remove(cron_filename)
300+ for u in hookenv.iter_units_for_relation_name('cluster'):
301+ unit_addr = hookenv.ingress_address(rid=u.rid, unit=u.unit)
302+ peers.append('{}:{}'.format(u.unit, unit_addr))
303+ cron_context['Peers'] = ' '.join(peers)
304+
305+ templating.render('cron/ubuntu-repository-cache-cron', cron_filename, cron_context)
306+ templating.render('cron/ubuntu-repository-cache-config', urc_config_path, cron_context)
307
308
309 def update_checks():
310
311=== modified file 'templates/cron/ubuntu-repository-cache-config'
312--- templates/cron/ubuntu-repository-cache-config 2022-08-22 04:22:38 +0000
313+++ templates/cron/ubuntu-repository-cache-config 2022-08-24 00:09:04 +0000
314@@ -7,7 +7,8 @@
315 export MIRROR_SERIES="{{ MirrorSeries }}"
316 export PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
317 export PATH_BASE={{ PathBase }}
318-export PYTHONPATH=/var/lib/juju/agents/{{ UnitPath }}/charm/lib
319+export PEERS="{{ Peers }}"
320+export PYTHONPATH=/var/lib/juju/agents/{{ UnitPath }}/charm
321 export RSYNC_MODULE={{ RsyncModule }}
322 export SYNC_HOST={{ SyncHost }}
323 export UNIT_PATH={{ UnitPath }}
324
325=== renamed file 'templates/cron/ubuntu-repository-cache_rsync.cron' => 'templates/cron/ubuntu-repository-cache-cron'
326--- templates/cron/ubuntu-repository-cache_rsync.cron 2022-08-08 23:08:37 +0000
327+++ templates/cron/ubuntu-repository-cache-cron 2022-08-24 00:09:04 +0000
328@@ -2,7 +2,11 @@
329 {%- if JujuAvailabilityZone %}
330 JUJU_AVAILABILITY_ZONE={{ JujuAvailabilityZone }}
331 {%- endif %}
332+{%- if Peers != '' %}
333 # This cronjob will make the leader sync its view of the metadata with upstream
334-# It will then trigger a juju-run to let its peers synchronise.
335 {{ Minutes }} * * * * root . {{ ConfigPath }} && run-one {{ CharmEnvPath }} --charm ubuntu-repository-cache python3 -m ubuntu_repository_cache.metadata_sync # {{ JujuModelName }} {% if JujuAvailabilityZone %}{{ JujuAvailabilityZone }}{% endif %}
336
337+{% else %}
338+{{ Minutes }} * * * * root . {{ ConfigPath }} && run-one {{ CharmEnvPath }} --charm ubuntu-repository-cache python3 -m ubuntu_repository_cache.metadata_cleanup # {{ JujuModelName }} {% if JujuAvailabilityZone %}{{ JujuAvailabilityZone }}{% endif %}
339+
340+{% endif %}
341
342=== modified file 'tests/110-multi_unit.simple'
343--- tests/110-multi_unit.simple 2017-04-06 21:02:01 +0000
344+++ tests/110-multi_unit.simple 2022-08-24 00:09:04 +0000
345@@ -108,14 +108,14 @@
346
347 # Test that leader has cron job for metadata rsync and peer does not
348 try:
349- content = leader.file('/etc/cron.d/ubuntu-repository-cache_rsync')
350+ content = leader.file('/etc/cron.d/ubuntu-repository-cache')
351 except:
352 msg = 'Leader {} missing rsync cron job'.format(unit_name(leader))
353 amulet.raise_status(amulet.FAIL, msg=msg)
354 log('PASS: Leader {} has the rsync cron job'.format(unit_name(leader)))
355
356 try:
357- peer.file('/etc/cron.d/ubuntu-repository-cache_rsync')
358+ peer.file('/etc/cron.d/ubuntu-repository-cache')
359 except IOError:
360 pass
361 else:
362
363=== modified file 'tests/140-failover.simple'
364--- tests/140-failover.simple 2017-04-06 21:02:01 +0000
365+++ tests/140-failover.simple 2022-08-24 00:09:04 +0000
366@@ -130,7 +130,7 @@
367
368 # Test that leader has cron job for metadata rsync and peer does not
369 try:
370- content = leader.file('/etc/cron.d/ubuntu-repository-cache_rsync')
371+ content = leader.file('/etc/cron.d/ubuntu-repository-cache')
372 except:
373 msg = 'Leader {} missing rsync cron job'.format(unit_name(leader))
374 amulet.raise_status(amulet.FAIL, msg=msg)
375@@ -138,7 +138,7 @@
376
377 for peer in peers:
378 try:
379- content = peer.file('/etc/cron.d/ubuntu-repository-cache_rsync')
380+ content = peer.file('/etc/cron.d/ubuntu-repository-cache')
381 except IOError:
382 pass
383 else:
384@@ -153,7 +153,7 @@
385
386 # Failover should cause only one peer unit to generate the cron job
387 # This also confirms that the new leader has the cron job
388-new_leader = wait_for_file(peers, '/etc/cron.d/ubuntu-repository-cache_rsync',
389+new_leader = wait_for_file(peers, '/etc/cron.d/ubuntu-repository-cache',
390 RSYNC_TIMEOUT, 'Neither peer become leader')
391 log('PASS: New leader {} has the rsync cron job'.format(unit_name(new_leader)))
392
393@@ -171,7 +171,7 @@
394 peers.remove(new_leader)
395 remaining_peer = peers[0]
396 try:
397- remaining_peer.file('/etc/cron.d/ubuntu-repository-cache_rsync')
398+ remaining_peer.file('/etc/cron.d/ubuntu-repository-cache')
399 except IOError:
400 pass
401 else:
402
403=== modified file 'tests/util.py'
404--- tests/util.py 2021-02-09 08:54:35 +0000
405+++ tests/util.py 2022-08-24 00:09:04 +0000
406@@ -12,7 +12,7 @@
407 SERVICE = 'ubuntu-repository-cache'
408 TIMEOUT = (40 + 60) * 60
409
410-CRON_FILE = '/etc/cron.d/ubuntu-repository-cache_rsync'
411+CRON_FILE = '/etc/cron.d/ubuntu-repository-cache'
412 LOG_DIR = '/var/log/ubuntu-repository-cache'
413 LOG_NAME = 'repository-sync-cron.log'
414 LOG_FILE = os.path.join(LOG_DIR, LOG_NAME)

Subscribers

People subscribed via source and target branches