ubuntu-archive-tools

Merge ubuntu-archive-tools:find-proposed-cluster into ubuntu-archive-tools:main

Proposed by Steve Langasek on 2022-12-13

Status:	Merged
Merged at revision:	12065253df572110f433ffd0672cc716a6212f05
Proposed branch:	ubuntu-archive-tools:find-proposed-cluster
Merge into:	ubuntu-archive-tools:main
Diff against target:	162 lines (+144/-1) 2 files modified find-proposed-cluster (+143/-0) retry-autopkgtest-regressions (+1/-1)
Related bugs:	Link a bug report

Reviewer	Date Requested	Status
Łukasz Zemczak	2022-12-13	Approve on 2022-12-14
Andy Whitcroft	2022-12-13	Approve on 2022-12-13
Review via email: mp+434590@code.launchpad.net

Revision history for this message

Andy Whitcroft (apw) wrote on 2022-12-13:

Looks like a useful tool for the toolkit. A couple of comments inline.

review: Approve

Revision history for this message

Steve Langasek (vorlon) on 2022-12-14:

Revision history for this message

Łukasz Zemczak (sil2100) wrote on 2022-12-14:

Looking good, I think you can merge it as is. A few inline suggestions/mentions, but those are just nitpicks. Also, maybe would be nice to be a bit more verbose, like mentioning what the output means in a bit more detail.

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk

Subscribers

People subscribed via source and target branches

to all changes:

Rik Mills

Steve Beattie

Ubuntu Package Archive Administrators

 diff --git a/find-proposed-cluster b/find-proposed-cluster
 new file mode 100755
 index 0000000..30c459a
 --- /dev/null
 +++ b/find-proposed-cluster
@@ -0,0 +1,143 @@
++#!/usr/bin/python3
++# Identify the biggest cluster of packages in britney indicating a transition
++# that should be worked on
++# Copyright (C) 2022 Canonical Ltd.
++# Author: Steve Langasek <steve.langasek@ubuntu.com>
++
++# This program is free software; you can redistribute it and/or
++# modify it under the terms of the GNU General Public License, version 3.
++
++# This program is distributed in the hope that it will be useful,
++# but WITHOUT ANY WARRANTY; without even the implied warranty of
++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++# General Public License for more details.
++
++# You should have received a copy of the GNU General Public License
++# along with this library; if not, write to the Free Software
++# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
++# 02110-1301 USA
++
++from datetime import datetime
++import dateutil.parser
++from dateutil.tz import tzutc
++import argparse
++import lzma
++import os
++import re
++import urllib.request
++import urllib.parse
++import yaml
++
++default_series = 'lunar'
++args = None
++
++
++def get_cache_dir():
++    cache_dir = os.environ.get('XDG_CACHE_HOME',
++                               os.path.expanduser(os.path.join('~', '.cache')))
++    uat_cache = os.path.join(cache_dir, 'ubuntu-archive-tools')
++    os.makedirs(uat_cache, exist_ok=True)
++    return uat_cache
++
++
++def parse_args():
++    parser = argparse.ArgumentParser(
++        'Find the biggest transition in update_excuses',
++        formatter_class=argparse.RawDescriptionHelpFormatter)
++    parser.add_argument('-s', '--series', default=default_series,
++                        help='Ubuntu series (default: %(default)s)')
++    parser.add_argument('--force-cached', action='store_true',
++                        help='Do not try to download files again, use cached'
++                        'version when it is present. This is useful '
++                        'when triggering various subset of tests in a row.')
++    parser.add_argument('--limit', default=5, metavar='N',
++                        help='Output the top N results (default: %(default)d)')
++
++    args = parser.parse_args()
++
++    return args
++
++
++def get_url(url, force_cached):
++    ''' Return file to the URL, using a cache
++    '''
++    cache_file = None
++
++    m = re.search('people.canonical.com/~ubuntu-archive/proposed-migration/'
++                  '([^/]*)/([^/]*)',
++                  url)
++    cache_dir = get_cache_dir()
++    cache_file = os.path.join(cache_dir, '%s_%s' % (m.group(1), m.group(2)))
++
++    try:
++        prev_mtime = os.stat(cache_file).st_mtime
++    except FileNotFoundError:
++        prev_mtime = 0
++    prev_timestamp = datetime.fromtimestamp(prev_mtime, tz=tzutc())
++    new_timestamp = datetime.now(tz=tzutc()).timestamp()
++    if force_cached:
++        return open(cache_file, 'rb')
++
++    f = urllib.request.urlopen(url)
++
++    remote_ts = dateutil.parser.parse(f.headers['last-modified'])
++    if remote_ts > prev_timestamp:
++        with open('%s.new' % cache_file, 'wb') as new_cache:
++            for line in f:
++                new_cache.write(line)
++        os.rename('%s.new' % cache_file, cache_file)
++        os.utime(cache_file, times=(new_timestamp, new_timestamp))
++    f.close()
++    f = open(cache_file, 'rb')
++    return f
++
++
++def get_sources(excuses_url, force_cached):
++
++    # load YAML excuses
++    try:
++        f = get_url(excuses_url, force_cached)
++        lzma_f = lzma.open(f)
++        excuses = yaml.load(lzma_f, Loader=yaml.CSafeLoader)
++        lzma_f.close()
++    except urllib.error.HTTPError as e:
++        if e.code == 404:
++            # some versions of britney output this file uncompressed, try that
++            # location too
++            f = get_url(excuses_url.removesuffix('.xz'), force_cached)
++            excuses = yaml.load(f, Loader=yaml.CSafeLoader)
++            f.close()
++        else:
++            raise
++    return excuses['sources']
++
++
++args = parse_args()
++
++excuses_url = 'http://people.canonical.com/~ubuntu-archive/proposed-migration/%s/update_excuses.yaml.xz' % args.series
++
++sources = get_sources(excuses_url, args.force_cached)
++
++clusters = {}
++for source in sources:
++    if 'dependencies' in source and 'migrate-after' in source['dependencies']:
++       clusters[source['source']] = source['dependencies']['migrate-after']
++
++clusters = {k: v for k, v in sorted(clusters.items(),
++                                    key=lambda item: -len(item[1]))}
++
++count = 0
++limit = int(args.limit)
++seen_packages = set()
++for k,v in clusters.items():
++    if k in seen_packages:
++        seen_packages.update(v)
++        continue
++    count += 1
++    if count > limit:
++        break
++    length = len(v)
++    if length < 3:
++        break
++    seen_packages.update(v)
++    print(k,length)
 diff --git a/retry-autopkgtest-regressions b/retry-autopkgtest-regressions
 index 9a5f912..8d058f2 100755
 --- a/retry-autopkgtest-regressions
 +++ b/retry-autopkgtest-regressions
@@ -246,7 +246,7 @@ def get_regressions(excuses_url, release, retry_states, min_age, max_age,
          if e.code == 404:
              # some versions of britney output this file uncompressed, try that
              # location too
--            f = get_url(excuses_url.rstrip('.xz'), force_cached)
++            f = get_url(excuses_url.removesuffix('.xz'), force_cached)
              excuses = yaml.load(f, Loader=yaml.CSafeLoader)
              f.close()
          else:

ubuntu-archive-tools

Merge ubuntu-archive-tools:find-proposed-cluster into ubuntu-archive-tools:main

Commit message

Description of the change

Preview Diff

Subscribers