Merge lp:~cjwatson/ddeb-retriever/lp-fetch into lp:ddeb-retriever

Proposed by Colin Watson on 2015-04-14
Status: Merged
Merged at revision: 153
Proposed branch: lp:~cjwatson/ddeb-retriever/lp-fetch
Merge into: lp:ddeb-retriever
Diff against target: 222 lines (+163/-2)
2 files modified
ddeb_retriever.py (+136/-1)
lpinfo.py (+27/-1)
To merge this branch: bzr merge lp:~cjwatson/ddeb-retriever/lp-fetch
Reviewer Review Type Date Requested Status
Colin Watson Approve on 2015-04-14
Review via email: mp+256192@code.launchpad.net

Commit Message

Fetch ddebs from the Launchpad librarian if possible.

Description of the Change

Fetch ddebs from the Launchpad librarian if possible.

To post a comment you must log in.
Colin Watson (cjwatson) wrote :

20:36 <pitti> cjwatson: I didn't look through it with a fine comb again, but this looks very similar to the changes I've reviewed some two weeks ago?
20:36 <cjwatson> pitti: Yes, it's just bug-fixes since then
20:36 <cjwatson> pitti: And the order_by_date=True stuff which was a result of LP reviews
20:36 <pitti> cjwatson: so if you feel good about it, please feel free to merge
20:37 <pitti> or rather, push, for cleaner history
20:38 <cjwatson> pitti: will do, thanks!

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'ddeb_retriever.py'
2--- ddeb_retriever.py 2014-08-07 09:03:46 +0000
3+++ ddeb_retriever.py 2015-04-14 19:16:49 +0000
4@@ -10,6 +10,9 @@
5 import tarfile
6 import argparse
7 import fcntl
8+import datetime
9+
10+import wadllib.iso_strptime
11
12 import lpinfo
13 import archive_tools
14@@ -61,6 +64,75 @@
15 return result
16
17
18+distroarchseries_links = {}
19+distroseries_links = {}
20+
21+def get_suite_for_publication(pub):
22+ '''Return the suite for a binary_package_publishing_history object.
23+
24+ We memoise the series objects, as otherwise this is very slow for lots
25+ of publications.
26+ '''
27+ # Don't be tempted to shorten this using setdefault; fetching
28+ # pub.distro_arch_series and das.distroseries is expensive.
29+ distroarchseries_link = pub.distro_arch_series_link
30+ if distroarchseries_link not in distroarchseries_links:
31+ distroarchseries_links[distroarchseries_link] = pub.distro_arch_series
32+ distroarchseries = distroarchseries_links[distroarchseries_link]
33+ distroseries_link = distroarchseries.distroseries_link
34+ if distroseries_link not in distroseries_links:
35+ distroseries_links[distroseries_link] = distroarchseries.distroseries
36+ distroseries = distroseries_links[distroseries_link]
37+ return lpinfo.make_suite(distroseries, pub.pocket)
38+
39+
40+def install_from_librarian(pub, url, ddeb_archive_root):
41+ '''Fetch ddeb from the librarian and install it into ddeb_archive_root
42+
43+ If the file already exists in the appropriate place in pool, it will not
44+ be downloaded again.
45+
46+ Return True if it was installed successfully or already present,
47+ otherwise False.
48+ '''
49+ ddeb = urllib.unquote(os.path.basename(url))
50+ try:
51+ (dbgsymname, version, arch) = ddeb.split('_')
52+ assert arch.endswith('.ddeb')
53+ (arch, _) = arch.split('.')
54+ assert dbgsymname.endswith('-dbgsym')
55+ except (ValueError, AssertionError):
56+ logging.warning('Ignoring non-ddeb file %s in %s', ddeb, pub)
57+ return False
58+
59+ source_pub = pub.build.getLatestSourcePublication()
60+ if source_pub is None:
61+ logging.warning('Ignoring %s which has no source publication', pub)
62+ return False
63+
64+ source_name = source_pub.source_package_name
65+ if source_name.startswith('lib'):
66+ prefix = source_name[:4]
67+ else:
68+ prefix = source_name[0]
69+
70+ destdir = os.path.join(
71+ ddeb_archive_root, 'pool', pub.component_name, prefix, source_name)
72+ dest = os.path.join(destdir, ddeb)
73+ if os.path.exists(dest):
74+ logging.debug('%s already exists, skipping', dest)
75+ else:
76+ logging.debug('Downloading %s into %s', ddeb, os.path.dirname(dest))
77+ try:
78+ os.makedirs(destdir)
79+ except OSError:
80+ pass
81+ urllib.urlretrieve(url, dest)
82+ logging.debug('Downloaded %s into %s', ddeb, os.path.dirname(dest))
83+
84+ return True
85+
86+
87 def get_ddeb_index(url):
88 '''Parse the ddeb index file at the given URL
89
90@@ -251,9 +323,39 @@
91 logging.error('Cannot acquire lock, another ddeb-retriever instance is already running')
92 return
93
94+ UTC = wadllib.iso_strptime.TimeZone('+00:00')
95+ lp_threshold_path = os.path.join(args.archive_root, '.lp-threshold')
96+ try:
97+ with open(lp_threshold_path) as lp_threshold_file:
98+ lp_threshold = datetime.datetime.fromtimestamp(
99+ int(lp_threshold_file.readline()), tz=UTC)
100+ except IOError:
101+ lp_threshold = None
102+
103 series_info = lpinfo.get_series(args.distribution)
104 logging.debug('series info: %s', series_info)
105
106+ if lp_threshold is not None:
107+ # Allow a grace period to cope with publications arriving out of
108+ # order during long transactions.
109+ real_threshold = lp_threshold - datetime.timedelta(hours=1)
110+ logging.info(
111+ 'Retrieving Launchpad publications since %s' % real_threshold)
112+ else:
113+ real_threshold = None
114+ logging.info('Retrieving all Launchpad publications')
115+ binary_pubs = lpinfo.get_binary_publications(args.distribution)
116+ debug_pubs = []
117+ latest_date_created = lp_threshold
118+ for pub in binary_pubs:
119+ if pub.date_created < real_threshold:
120+ break
121+ if (latest_date_created is None or
122+ pub.date_created > latest_date_created):
123+ latest_date_created = pub.date_created
124+ if pub.is_debug:
125+ debug_pubs.append(pub)
126+
127 # Download ddebs into temporary dir; all in parallel, as buildd Apaches are
128 # very slow
129 for date in args.date:
130@@ -275,6 +377,8 @@
131 queue_dir = os.path.join(args.archive_root, 'queue')
132 if os.path.isdir(queue_dir):
133 updated_pockets.update(os.listdir(queue_dir))
134+ for pub in debug_pubs:
135+ updated_pockets.add(get_suite_for_publication(pub))
136 logging.info('got downloaded/queued ddebs for pockets: %s', ' '.join(updated_pockets))
137
138 # complete the pocket list
139@@ -295,8 +399,39 @@
140 pockets_filter=updated_pockets,
141 strip_epoch=True)
142
143+ logging.info('Building ddeb map, this can take a while')
144+ ddeb_map = archive_tools.archive_map(series_info, [args.archive_root],
145+ suffix='')
146+
147+ # try to install ddebs from publishing history records
148+ installed_pockets = set()
149+ for pub in debug_pubs:
150+ already_present = False
151+ pub_installed = False
152+ if pub.binary_package_name in ddeb_map:
153+ existing = ddeb_map[pub.binary_package_name]
154+ for _, series_map in existing.items():
155+ if pub.binary_package_version in series_map:
156+ already_present = True
157+ break
158+ if already_present:
159+ # We already have this ddeb on disk, so just note that indexes
160+ # for this pocket need to be rebuilt.
161+ pub_installed = True
162+ else:
163+ for url in pub.binaryFileUrls():
164+ if install_from_librarian(pub, url, args.archive_root):
165+ pub_installed = True
166+ if pub_installed:
167+ installed_pockets.add(get_suite_for_publication(pub))
168+
169+ if latest_date_created is not None:
170+ epoch = datetime.datetime.fromtimestamp(0, tz=UTC)
171+ new_threshold = (latest_date_created - epoch).total_seconds()
172+ with open(lp_threshold_path, 'w') as lp_threshold_file:
173+ lp_threshold_file.write("%d\n" % new_threshold)
174+
175 # try to install downloaded ddebs
176- installed_pockets = set()
177 for pocket in updated_pockets:
178 d = os.path.join(download_dir, pocket)
179 if not os.path.isdir(d):
180
181=== modified file 'lpinfo.py'
182--- lpinfo.py 2014-07-25 06:50:12 +0000
183+++ lpinfo.py 2015-04-14 19:16:49 +0000
184@@ -10,7 +10,9 @@
185 global lp
186 if lp is None:
187 lp = Launchpad.login_anonymously(
188- 'ddeb-retriever', os.environ.get('LAUNCHPAD_INSTANCE', 'production'))
189+ 'ddeb-retriever',
190+ os.environ.get('LAUNCHPAD_INSTANCE', 'production'),
191+ version='devel')
192
193
194 def get_series(distribution):
195@@ -45,3 +47,27 @@
196 buildds.append(builder.name)
197
198 return buildds
199+
200+
201+def get_binary_publications(distribution, created_since_date=None):
202+ '''Get recently-published binaries for the given distribution'''
203+
204+ _get_lp()
205+ archive = lp.distributions[distribution].main_archive
206+ # It's important to omit the status filter here, even if we later decide
207+ # that we only care about the Published status (although at the moment
208+ # it seems reasonable to scan everything and let the garbage-collection
209+ # pass clean up superseded ddebs later). This is because the collection
210+ # may change as we're iterating over it. Without any filtering, this is
211+ # OK because entries can never be removed from the collection: the worst
212+ # case is that we encounter the same publication twice. With filtering
213+ # on mutable properties, it would be possible to lose entries between
214+ # two successive batches.
215+ return archive.getPublishedBinaries(order_by_date=True)
216+
217+
218+def make_suite(distroseries, pocket):
219+ if pocket == 'Release':
220+ return distroseries.name
221+ else:
222+ return '%s-%s' % (distroseries.name, pocket.lower())

Subscribers

People subscribed via source and target branches