Merge ubuntu-archive-tools:find-proposed-cluster into ubuntu-archive-tools:main

Proposed by Steve Langasek
Status: Merged
Merged at revision: 12065253df572110f433ffd0672cc716a6212f05
Proposed branch: ubuntu-archive-tools:find-proposed-cluster
Merge into: ubuntu-archive-tools:main
Diff against target: 162 lines (+144/-1)
2 files modified
find-proposed-cluster (+143/-0)
retry-autopkgtest-regressions (+1/-1)
Reviewer Review Type Date Requested Status
Łukasz Zemczak Approve
Andy Whitcroft Approve
Review via email: mp+434590@code.launchpad.net
To post a comment you must log in.
Revision history for this message
Andy Whitcroft (apw) wrote :

Looks like a useful tool for the toolkit. A couple of comments inline.

+1

review: Approve
Revision history for this message
Steve Langasek (vorlon) :
Revision history for this message
Łukasz Zemczak (sil2100) wrote :

Looking good, I think you can merge it as is. A few inline suggestions/mentions, but those are just nitpicks. Also, maybe would be nice to be a bit more verbose, like mentioning what the output means in a bit more detail.

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1diff --git a/find-proposed-cluster b/find-proposed-cluster
2new file mode 100755
3index 0000000..30c459a
4--- /dev/null
5+++ b/find-proposed-cluster
6@@ -0,0 +1,143 @@
7+#!/usr/bin/python3
8+# Identify the biggest cluster of packages in britney indicating a transition
9+# that should be worked on
10+# Copyright (C) 2022 Canonical Ltd.
11+# Author: Steve Langasek <steve.langasek@ubuntu.com>
12+
13+# This program is free software; you can redistribute it and/or
14+# modify it under the terms of the GNU General Public License, version 3.
15+
16+# This program is distributed in the hope that it will be useful,
17+# but WITHOUT ANY WARRANTY; without even the implied warranty of
18+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19+# General Public License for more details.
20+
21+# You should have received a copy of the GNU General Public License
22+# along with this library; if not, write to the Free Software
23+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
24+# 02110-1301 USA
25+
26+from datetime import datetime
27+import dateutil.parser
28+from dateutil.tz import tzutc
29+import argparse
30+import lzma
31+import os
32+import re
33+import urllib.request
34+import urllib.parse
35+import yaml
36+
37+default_series = 'lunar'
38+args = None
39+
40+
41+def get_cache_dir():
42+ cache_dir = os.environ.get('XDG_CACHE_HOME',
43+ os.path.expanduser(os.path.join('~', '.cache')))
44+ uat_cache = os.path.join(cache_dir, 'ubuntu-archive-tools')
45+ os.makedirs(uat_cache, exist_ok=True)
46+ return uat_cache
47+
48+
49+def parse_args():
50+ parser = argparse.ArgumentParser(
51+ 'Find the biggest transition in update_excuses',
52+ formatter_class=argparse.RawDescriptionHelpFormatter)
53+ parser.add_argument('-s', '--series', default=default_series,
54+ help='Ubuntu series (default: %(default)s)')
55+ parser.add_argument('--force-cached', action='store_true',
56+ help='Do not try to download files again, use cached'
57+ 'version when it is present. This is useful '
58+ 'when triggering various subset of tests in a row.')
59+ parser.add_argument('--limit', default=5, metavar='N',
60+ help='Output the top N results (default: %(default)d)')
61+
62+ args = parser.parse_args()
63+
64+ return args
65+
66+
67+def get_url(url, force_cached):
68+ ''' Return file to the URL, using a cache
69+ '''
70+ cache_file = None
71+
72+ m = re.search('people.canonical.com/~ubuntu-archive/proposed-migration/'
73+ '([^/]*)/([^/]*)',
74+ url)
75+ cache_dir = get_cache_dir()
76+ cache_file = os.path.join(cache_dir, '%s_%s' % (m.group(1), m.group(2)))
77+
78+ try:
79+ prev_mtime = os.stat(cache_file).st_mtime
80+ except FileNotFoundError:
81+ prev_mtime = 0
82+ prev_timestamp = datetime.fromtimestamp(prev_mtime, tz=tzutc())
83+ new_timestamp = datetime.now(tz=tzutc()).timestamp()
84+ if force_cached:
85+ return open(cache_file, 'rb')
86+
87+ f = urllib.request.urlopen(url)
88+
89+ remote_ts = dateutil.parser.parse(f.headers['last-modified'])
90+ if remote_ts > prev_timestamp:
91+ with open('%s.new' % cache_file, 'wb') as new_cache:
92+ for line in f:
93+ new_cache.write(line)
94+ os.rename('%s.new' % cache_file, cache_file)
95+ os.utime(cache_file, times=(new_timestamp, new_timestamp))
96+ f.close()
97+ f = open(cache_file, 'rb')
98+ return f
99+
100+
101+def get_sources(excuses_url, force_cached):
102+
103+ # load YAML excuses
104+ try:
105+ f = get_url(excuses_url, force_cached)
106+ lzma_f = lzma.open(f)
107+ excuses = yaml.load(lzma_f, Loader=yaml.CSafeLoader)
108+ lzma_f.close()
109+ except urllib.error.HTTPError as e:
110+ if e.code == 404:
111+ # some versions of britney output this file uncompressed, try that
112+ # location too
113+ f = get_url(excuses_url.removesuffix('.xz'), force_cached)
114+ excuses = yaml.load(f, Loader=yaml.CSafeLoader)
115+ f.close()
116+ else:
117+ raise
118+ return excuses['sources']
119+
120+
121+args = parse_args()
122+
123+excuses_url = 'http://people.canonical.com/~ubuntu-archive/proposed-migration/%s/update_excuses.yaml.xz' % args.series
124+
125+sources = get_sources(excuses_url, args.force_cached)
126+
127+clusters = {}
128+for source in sources:
129+ if 'dependencies' in source and 'migrate-after' in source['dependencies']:
130+ clusters[source['source']] = source['dependencies']['migrate-after']
131+
132+clusters = {k: v for k, v in sorted(clusters.items(),
133+ key=lambda item: -len(item[1]))}
134+
135+count = 0
136+limit = int(args.limit)
137+seen_packages = set()
138+for k,v in clusters.items():
139+ if k in seen_packages:
140+ seen_packages.update(v)
141+ continue
142+ count += 1
143+ if count > limit:
144+ break
145+ length = len(v)
146+ if length < 3:
147+ break
148+ seen_packages.update(v)
149+ print(k,length)
150diff --git a/retry-autopkgtest-regressions b/retry-autopkgtest-regressions
151index 9a5f912..8d058f2 100755
152--- a/retry-autopkgtest-regressions
153+++ b/retry-autopkgtest-regressions
154@@ -246,7 +246,7 @@ def get_regressions(excuses_url, release, retry_states, min_age, max_age,
155 if e.code == 404:
156 # some versions of britney output this file uncompressed, try that
157 # location too
158- f = get_url(excuses_url.rstrip('.xz'), force_cached)
159+ f = get_url(excuses_url.removesuffix('.xz'), force_cached)
160 excuses = yaml.load(f, Loader=yaml.CSafeLoader)
161 f.close()
162 else:

Subscribers

People subscribed via source and target branches