Hi Michael! Thanks for getting in there and improving Launchpad code! As Aaron suggested, it would be really helpful to include more info with your merge proposal. A template we often try to use is: https://dev.launchpad.net/QuickCoverLetterTemplate or if you want lots of details ;) : https://dev.launchpad.net/DetailedCoverLetterTemplate Especially important for branches like this without tests is a QA plan for how we can verify on dogfood that this does exactly what you intend. Tests would be wonderful, but I know that it's fair to ask you to add them now (If you want an example of how other cron scripts are tested, see lib/lp/archivepublisher/tests/test_generate_ppa_htaccess.py) OK, I've got a bunch of small changes below, but more important for me is the summary of what this branch does (after reading it I've now understood most of it, but it would still be helpful to clarify) and steps for us to QA this on dogfood. As I don't have lots of experience with germination etc. (I'm still learning ;) ), and Julian is away, I'll ask William to take a look to and provide feedback. Thanks! > === modified file 'cronscripts/publishing/maintenance-check.py' > --- cronscripts/publishing/maintenance-check.py 2010-01-22 13:57:45 +0000 > +++ cronscripts/publishing/maintenance-check.py 2010-03-25 11:57:28 +0000 > @@ -59,9 +63,57 @@ > # germinate output base directory > BASE_URL = "http://people.canonical.com/~ubuntu-archive/germinate-output/" > > +# hints dir url, hints file is "$distro.hints" by default > +# (e.g. lucid.hints) > +#HINTS_DIR_URL = "http://people.canonical.com/~mvo/maintenance-check/%s.hints" Is there a reason for leaving the above comment eg. in? > +HINTS_DIR_URL = "http://people.canonical.com/~ubuntu-archive/seeds/platform.%s/SUPPORTED_HINTS" > + > +# we need the archive root to parse the Sources file to support > +# by-source hints > +#ARCHIVE_ROOT = "file:/srv/launchpad.net/ubuntu-archive/ubuntu" Same above? > +ARCHIVE_ROOT = "http://archive.ubuntu.com/ubuntu" > + > # support timeframe tag used in the Packages file > SUPPORT_TAG = "Supported" > > +def get_binaries_for_source_pkg(srcname): > + """ > + get all binary package names for the given source package name > + """ Just a few style comments (based on the Launchpad python style guide at https://dev.launchpad.net/PythonStyleGuide). I realise the rest of this file doesn't follow the style-guide, but it'd be nice to clean it up bit-by-bit (but not necessary, up to you). So the above docstring could be formatted along the lines of: """ Return all binary names for the given source name.""" If you are really keen, you could add the :param: and :return: types as outlined under "Docstrings" in the above styleguide, but it's not necessary. > + pkgnames = set() > + recs = apt_pkg.GetPkgSrcRecords() > + while recs.Lookup(srcname): > + for binary in recs.Binaries: > + pkgnames.add(binary) > + return pkgnames > + > +def expand_src_pkgname(pkgname): > + """ expand a given pkgname if prefixed with src: to a list of > + binary package names, if not prefixed, just return the pkgname > + """ """ Expand a package name if it is prefixed with src. If the package name is prefixed with src it will be expanded to a list of binary package names. Otherwise the original package name will be returned. """ > + if not pkgname.startswith("src:"): > + return [pkgname] > + return get_binaries_for_source_pkg(pkgname.split("src:")[1]) > + > +def create_and_update_deb_src_source_list(distro): > + """ > + create sources.list with deb-src entries for a given distro release > + and update to make sure we are current > + """ Similar docstring formatting here (and I think we use distroseries instead of distro release now?) More importantly, without an explanation, I don't understand why this function is being added to update the src sources list... I assume it's to do with the hints, but I'm not sure how. > + # apt root dir > + rootdir="./aptroot.%s" % distro > + sources_list_dir = os.path.join(rootdir, "etc","apt") > + if not os.path.exists(sources_list_dir): > + os.makedirs(sources_list_dir) > + sources_list = open(os.path.join(sources_list_dir, "sources.list"),"w") > + for pocket in ["%s" % distro, > + "%s-updates" % distro, > + "%s-security" % distro]: Small style-guide differences: for pocket in [ "%s" % distro, "%s-updates" % distro, "%s-security" % distro, ]: (or if you prefer, just define the list first. > + sources_list.write("deb-src %s %s main restricted\n" % ( > + ARCHIVE_ROOT, pocket)) sources_list.write( "deb-src %s %s main restricted\n" % ( ARCHIVE_ROOT, pocket)) > + sources_list.close() > + cache = apt.Cache(rootdir=rootdir) > + cache.update(apt.progress.FetchProgress()) > > def get_structure(name, version): > """ > @@ -150,6 +202,8 @@ > parser.add_option("--source-packages", "", default=False, > action="store_true", > help="show as source pkgs") > + parser.add_option("--hints-file", "", default=None, > + help="use diffenrt use hints file location") > (options, args) = parser.parse_args() > > # init > @@ -160,6 +214,17 @@ > sys.exit(1) > else: > distro = "lucid" > + > + # make sure our deb-src information is up-to-date So this is related to my comment above. Why is this necessary now when it wasn't before. It's probably obvious to you, but not to me :) > + create_and_update_deb_src_source_list(distro) > + > + if options.hints_file: > + hints_file = options.hints_file > + (schema, netloc, path, query, fragment) = urlparse.urlsplit(hints_file) > + if not schema: > + hints_file = "file:%s" % path > + else: > + hints_file = HINTS_DIR_URL % distro > > # go over the distros we need to check > pkg_support_time = {} > @@ -175,20 +240,57 @@ > else: > support_timeframe = SUPPORT_TIMEFRAME > get_packages_support_time(structure, name, pkg_support_time, support_timeframe) > + > + # now check the hints file that is used to overwrite > + # the default seeds > + try: > + for line in urllib2.urlopen(hints_file): > + line = line.strip() > + if not line or line.startswith("#"): > + continue > + try: > + (raw_pkgname, support_time) = line.split() > + for pkgname in expand_src_pkgname(raw_pkgname): > + if support_time == 'unsupported': > + try: > + del pkg_support_time[pkgname] > + sys.stderr.write("hints-file: marking %s unsupported\n" % pkgname) > + except KeyError: > + pass > + else: > + if pkg_support_time.get(pkgname) != support_time: > + sys.stderr.write("hints-file: changing %s from %s to %s\n" % (pkgname, pkg_support_time.get(pkgname), support_time)) Lint should tell you this is too long. Actually, there's lots of lint here to clean up if you've time: http://pastebin.ubuntu.com/407510/ > + pkg_support_time[pkgname] = support_time > + except: > + logging.exception("can not parts line '%s'" % line) s/parts/parse? > + except urllib2.HTTPError, e: > + if e.getcode() != 404: > + raise > + sys.stderr.write("hints-file: %s gave 404 error\n" % hints_file) > > # output suitable for the extra-override file > for pkgname in sorted(pkg_support_time.keys()): > - # go over the supported arches, they are divided in > - # first-class (PRIMARY) and second-class with different > - # support levels > - for arch in SUPPORTED_ARCHES: > - # full LTS support > - if arch in PRIMARY_ARCHES: > - print "%s/%s %s %s" % ( > - pkgname, arch, SUPPORT_TAG, pkg_support_time[pkgname]) > - else: > - # not a LTS supported architecture, gets only regular > - # support_timeframe > - print "%s/%s %s %s" % ( > - pkgname, arch, SUPPORT_TAG, SUPPORT_TIMEFRAME[0][0]) > + # special case, the hints file may contain overwrites that s/overwrites/overrides ? > + # are arch-specific (like zsh-doc/armel) > + if "/" in pkgname: > + print "%s %s %s" % ( > + pkgname, SUPPORT_TAG, pkg_support_time[pkgname]) Could this not be done below with something like: if "/" in pkgname: pkgname_and_arch = pkgname else: pkgname_and_arch = "%s/%s" % (pkgname, arch) That would make your diff a bit smaller. > + else: > + # go over the supported arches, they are divided in > + # first-class (PRIMARY) and second-class with different > + # support levels > + for arch in SUPPORTED_ARCHES: > + # ensure we do not overwrite arch-specific overwrites > + pkgname_and_arch = "%s/%s" % (pkgname, arch) > + if pkgname_and_arch in pkg_support_time: > + break > + if arch in PRIMARY_ARCHES: > + # arch with full LTS support > + print "%s %s %s" % ( > + pkgname_and_arch, SUPPORT_TAG, pkg_support_time[pkgname]) > + else: > + # not a LTS supported architecture, gets only regular > + # support_timeframe > + print "%s %s %s" % ( > + pkgname_and_arch, SUPPORT_TAG, SUPPORT_TIMEFRAME[0][0]) >