(I reviewed this several hours ago by email and only just noticed that LP rejected my email..GRRRRRRRRRRR) Thanks for doing this Jeroen, it looks pretty good so far. I've just got a few nits inline: > === modified file 'lib/lp/archivepublisher/domination.py' > --- lib/lp/archivepublisher/domination.py 2011-11-02 12:58:31 +0000 > +++ lib/lp/archivepublisher/domination.py 2011-11-03 10:12:17 +0000 > @@ -52,8 +52,12 @@ > > __all__ = ['Dominator'] > > +from collections import defaultdict > from datetime import timedelta > -from operator import itemgetter > +from operator import ( > + attrgetter, > + itemgetter, > + ) > > import apt_pkg > from storm.expr import ( > @@ -72,6 +76,7 @@ > DecoratedResultSet, > ) > from canonical.launchpad.interfaces.lpstorm import IStore > +from canonical.launchpad.utilities.orderingcheck import OrderingCheck > from lp.registry.model.sourcepackagename import SourcePackageName > from lp.services.database.bulk import load_related > from lp.soyuz.enums import ( > @@ -192,6 +197,104 @@ > else: > return version_comparison > > + def sortPublications(self, publications): > + """Sort publications from most to least current versions.""" > + # Listify; we want to iterate this twice, which won't do for a > + # non-persistent sequence. > + sorted_publications = list(publications) This variable is prematurely named, I think I'd just write: publications = list(publications) and return publications.sort(...) later. > + # Batch-load associated package releases; we'll be needing them > + # to compare versions. > + self.load_releases(sorted_publications) > + # Now sort. This is that second iteration. An in-place sort > + # won't hurt the original, because we're working on a copy of > + # the original iterable. > + sorted_publications.sort(cmp=self.compare, reverse=True) > + return sorted_publications > + > + > +def find_live_source_versions(publications): > + """Find versions out of Published `publications` that should stay live. > + > + This particular notion of liveness applies to source domination: the > + latest version stays live, and that's it. > + > + :param publications: An iterable of `SourcePackagePublishingHistory` > + sorted by descending package version. > + :return: A list of live versions. > + """ > + # Given the required sort order, the latest version is at the head > + # of the list. > + return [publications[0].sourcepackagerelease.version] Nicely abstracted. > + > + > +def get_binary_versions(binary_publications): > + """List versions for sequence of `BinaryPackagePublishingHistory`.""" > + return [pub.binarypackagerelease.version for pub in binary_publications] If you're going to do :param: and :return: for the other functions, you should do it here too! > + > + > +def find_live_binary_versions_pass_1(publications): > + """Find versions out of Published `publications` that should stay live. > + > + This particular notion of liveness applies to first-pass binary > + domination: the latest version stays live, and so do publications of > + binary packages for the "all" architecture. > + > + :param publications: An iterable of `BinaryPackagePublishingHistory`, > + sorted by descending package version. > + :return: A list of live versions. > + """ > + publications = list(publications) > + latest = publications.pop(0) > + return get_binary_versions( > + [latest] + [ > + pub for pub in publications if not pub.architecture_specific]) pub.architecture_specific masks a query, in case you didn't know. I absolutely detest properties that do that, it encourages SQL death-by-a- thousand-cuts. > + > + > +def find_live_binary_versions_pass_2(publications): > + """Find versions out of Published `publications` that should stay live. > + > + This particular notion of liveness applies to second-pass binary > + domination: the latest version stays live, and architecture-specific > + publications stay live (i.e, ones that are not for the "all" > + architecture). > + > + More importantly, any publication for binary packages of the "all" > + architecture stay live if any of the non-"all" binary packages from > + the same source package release are still active -- even if they are > + for other architectures. > + > + This is the raison d'etre for the two-pass binary domination algorithm: > + to let us see which architecture-independent binary publications can be > + superseded without rendering any architecture-specific binaries from the > + same source package release uninstallable. > + > + (Note that here, "active" includes Published publications but also > + Pending ones. This is standard nomenclature in Soyuz. Some of the > + domination code confuses matters by using the term "active" to mean only > + Published publications). > + > + :param publications: An iterable of `BinaryPackagePublishingHistory`, > + sorted by descending package version. > + :return: A list of live versions. Thanks for the detailed docstring. I'd only add that "all" means not architecture_specific otherwise non-soyuz people reading this will get confused. > + """ > + publications = list(publications) > + latest = publications.pop(0) > + is_arch_specific = attrgetter('architecture_specific') > + arch_specific_pubs = filter(is_arch_specific, publications) > + arch_indep_pubs = filter( > + lambda pub: not is_arch_specific(pub), > + publications) Out of interest, why didn't you use a list comprehension? I thought map and filter were deprecated these days. > + > + # XXX JeroenVermeulen 2011-11-01 bug=884649: This is likely to be > + # costly, and the result could be reused for all builds of the same > + # source package release to all architectures. > + reprieved_pubs = [ > + pub > + for pub in arch_indep_pubs > + if pub.getOtherPublicationsForSameSource().any()] > + > + return get_binary_versions([latest] + arch_specific_pubs + reprieved_pubs) > + > > class Dominator: > """Manage the process of marking packages as superseded. > @@ -209,27 +312,6 @@ > self.logger = logger > self.archive = archive > > - def _checkArchIndep(self, publication): > - """Return True if the binary publication can be superseded. > - > - If the publication is an arch-indep binary, we can only supersede > - it if all the binaries from the same source are also superseded, > - else those binaries may become uninstallable. > - See bug 34086. > - """ > - binary = publication.binarypackagerelease > - if not binary.architecturespecific: > - # getOtherPublicationsForSameSource returns PENDING in > - # addition to PUBLISHED binaries, and we rely on this since > - # they must also block domination. > - others = publication.getOtherPublicationsForSameSource() > - if others.any(): > - # Don't dominate this arch:all binary as there are > - # other arch-specific binaries from the same build > - # that are still active. > - return False > - return True > - > def dominatePackage(self, publications, live_versions, generalization): > """Dominate publications for a single package. > > @@ -247,34 +329,33 @@ > > :param publications: Iterable of publications for the same package, > in the same archive, series, and pocket, all with status > - `PackagePublishingStatus.PUBLISHED`. > - :param live_versions: Iterable of version strings that are still > - considered live for this package. The given publications will > - remain active insofar as they represent any of these versions; > - older publications will be marked as superseded and newer ones > - as deleted. > + `PackagePublishingStatus.PUBLISHED`. They must be sorted from > + most current to least current, as would be the result of > + `generalization.sortPublications`. > + :param live_versions: Iterable of versions that are still considered > + "live" for this package. For any of these, the latest publication > + among `publications` will remain Published. Publications for > + older releases, as well as older publications of live versions, > + will be marked as Superseded. Publications of newer versions than > + are listed in `live_versions` are marked as Deleted. > :param generalization: A `GeneralizedPublication` helper representing > - the kind of publications these are--source or binary. > + the kind of publications these are: source or binary. > """ > - publications = list(publications) > - generalization.load_releases(publications) > - > - # Go through publications from latest version to oldest. This > - # makes it easy to figure out which release superseded which: > - # the dominant is always the oldest live release that is newer > - # than the one being superseded. In this loop, that means the > - # dominant is always the last live publication we saw. > - publications = sorted( > - publications, cmp=generalization.compare, reverse=True) > + live_versions = frozenset(live_versions) > > self.logger.debug( > "Package has %d live publication(s). Live versions: %s", > len(publications), live_versions) > > + # Verify that the publications are really sorted properly. > + check_order = OrderingCheck(cmp=generalization.compare, reverse=True) Do you really need this? Isn't it just overhead? (I realise it's belt & braces to prevent coding snafus, but this is a performance-critical area) > + > current_dominant = None > dominant_version = None > > for pub in publications: > + check_order.check(pub) > + > version = generalization.getPackageVersion(pub) > # There should never be two published releases with the same > # version. So it doesn't matter whether this comparison is > @@ -295,11 +376,6 @@ > current_dominant = pub > dominant_version = version > self.logger.debug2("Keeping version %s.", version) > - elif not (generalization.is_source or self._checkArchIndep(pub)): > - # As a special case, we keep this version live as well. > - current_dominant = pub > - dominant_version = version > - self.logger.debug2("Keeping version %s.", version) > elif current_dominant is None: > # This publication is no longer live, but there is no > # newer version to supersede it either. Therefore it > @@ -312,50 +388,32 @@ > pub.supersede(current_dominant, logger=self.logger) > self.logger.debug2("Superseding version %s.", version) > > - def _dominatePublications(self, pubs, generalization): > - """Perform dominations for the given publications. > - > - Keep the latest published version for each package active, > - superseding older versions. > - > - :param pubs: A dict mapping names to a list of publications. Every > - publication must be PUBLISHED or PENDING, and the first in each > - list will be treated as dominant (so should be the latest). > - :param generalization: A `GeneralizedPublication` helper representing > - the kind of publications these are--source or binary. > - """ > - self.logger.debug("Dominating packages...") > - for name, publications in pubs.iteritems(): > - assert publications, "Empty list of publications for %s." % name > - # Since this always picks the latest version as the live > - # one, this dominatePackage call will never result in a > - # deletion. > - latest_version = generalization.getPackageVersion(publications[0]) > - self.logger.debug2("Dominating %s" % name) > - self.dominatePackage( > - publications, [latest_version], generalization) > - > - def _sortPackages(self, pkglist, generalization): > - """Map out packages by name, and sort by descending version. > - > - :param pkglist: An iterable of `SourcePackagePublishingHistory` or > - `BinaryPackagePublishingHistory`. > - :param generalization: A `GeneralizedPublication` helper representing > - the kind of publications these are--source or binary. > - :return: A dict mapping each package name to a list of publications > - from `pkglist`, newest first. > - """ > - self.logger.debug("Sorting packages...") > - > - outpkgs = {} > - for inpkg in pkglist: > - key = generalization.getPackageName(inpkg) > - outpkgs.setdefault(key, []).append(inpkg) > - > - for package_pubs in outpkgs.itervalues(): > - package_pubs.sort(cmp=generalization.compare, reverse=True) > - > - return outpkgs > + def _sortPackages(self, publications, generalization): > + """Partition publications by package name, and sort them. > + > + The publications are sorted from most current to least current, > + as required by `dominatePackage` etc. > + > + :param publications: An iterable of `SourcePackagePublishingHistory` > + or of `BinaryPackagePublishingHistory`. > + :param generalization: A `GeneralizedPublication` helper representing > + the kind of publications these are: source or binary. > + :return: A dict mapping each package name to a sorted list of > + publications from `publications`. > + """ > + pubs_by_package = defaultdict(list) > + for pub in publications: > + pubs_by_package[generalization.getPackageName(pub)].append(pub) > + > + # Sort the publication lists. This is not an in-place sort, so > + # it involves altering the dict while we iterate it. Listify > + # the keys so that we can be sure that we're not altering the > + # iteration order while iteration is underway. > + for package in list(pubs_by_package.keys()): > + pubs_by_package[package] = generalization.sortPublications( > + pubs_by_package[package]) > + > + return pubs_by_package > > def _setScheduledDeletionDate(self, pub_record): > """Set the scheduleddeletiondate on a publishing record. > @@ -520,7 +578,11 @@ > bins = self.findBinariesForDomination(distroarchseries, pocket) > sorted_packages = self._sortPackages(bins, generalization) > self.logger.info("Dominating binaries...") > - self._dominatePublications(sorted_packages, generalization) > + for name, pubs in sorted_packages.iteritems(): > + self.logger.debug("Dominating %s" % name) > + assert len(pubs) > 0, "Dominating zero binaries!" > + live_versions = find_live_binary_versions_pass_1(pubs) > + self.dominatePackage(pubs, live_versions, generalization) > > # We need to make a second pass to cover the cases where: > # * arch-specific binaries were not all dominated before arch-all > @@ -534,7 +596,11 @@ > bins = self.findBinariesForDomination(distroarchseries, pocket) > sorted_packages = self._sortPackages(bins, generalization) > self.logger.info("Dominating binaries...(2nd pass)") > - self._dominatePublications(sorted_packages, generalization) > + for name, pubs in sorted_packages.iteritems(): > + self.logger.debug("Dominating %s" % name) > + assert len(pubs) > 0, "Dominating zero binaries in 2nd pass!" > + live_versions = find_live_binary_versions_pass_2(pubs) > + self.dominatePackage(pubs, live_versions, generalization) > > def _composeActiveSourcePubsCondition(self, distroseries, pocket): > """Compose ORM condition for restricting relevant source pubs.""" > @@ -550,7 +616,12 @@ > ) > > def findSourcesForDomination(self, distroseries, pocket): > - """Find binary publications that need dominating.""" > + """Find binary publications that need dominating. > + > + This is only for traditional domination, where the latest published > + publication is always kept published. It will ignore publications > + that have no other publications competing for the same binary package. > + """ This docstring doesn't make sense when read in isolation. You might want to mention the arch-all thing or just reference the explanation in the docstring for find_live_binary_versions_pass_2(). > # Avoid circular imports. > from lp.soyuz.model.publishing import SourcePackagePublishingHistory > > @@ -589,11 +660,18 @@ > distroseries.name, pocket.title) > > generalization = GeneralizedPublication(is_source=True) > + > + self.logger.debug("Finding sources...") > sources = self.findSourcesForDomination(distroseries, pocket) > + sorted_packages = self._sortPackages(sources, generalization) > > self.logger.debug("Dominating sources...") > - self._dominatePublications( > - self._sortPackages(sources, generalization), generalization) > + for name, pubs in sorted_packages.iteritems(): > + self.logger.debug("Dominating %s" % name) > + assert len(pubs) > 0, "Dominating zero binaries!" This is for sources! > + live_versions = find_live_source_versions(pubs) > + self.dominatePackage(pubs, live_versions, generalization) > + > flush_database_updates() > > def findPublishedSourcePackageNames(self, distroseries, pocket): > @@ -653,6 +731,7 @@ > """ > generalization = GeneralizedPublication(is_source=True) > pubs = self.findPublishedSPPHs(distroseries, pocket, package_name) > + pubs = generalization.sortPublications(pubs) > self.dominatePackage(pubs, live_versions, generalization) > > def judge(self, distroseries, pocket): > > === modified file 'lib/lp/archivepublisher/tests/test_dominator.py' > --- lib/lp/archivepublisher/tests/test_dominator.py 2011-11-02 10:28:31 +0000 > +++ lib/lp/archivepublisher/tests/test_dominator.py 2011-11-03 10:12:17 +0000 > @@ -16,6 +16,9 @@ > from canonical.testing.layers import ZopelessDatabaseLayer > from lp.archivepublisher.domination import ( > Dominator, > + find_live_binary_versions_pass_1, > + find_live_binary_versions_pass_2, > + find_live_source_versions, > GeneralizedPublication, > STAY_OF_EXECUTION, > ) > @@ -30,6 +33,7 @@ > StormStatementRecorder, > TestCaseWithFactory, > ) > +from lp.testing.fakemethod import FakeMethod Ahhh your favourite helper! > from lp.testing.matchers import HasQueryCount > > > @@ -72,13 +76,9 @@ > is_source=ISourcePackagePublishingHistory.providedBy(dominant)) > dominator = Dominator(self.logger, self.ubuntutest.main_archive) > > - # The _dominate* test methods require a dictionary where the > - # package name is the key. The key's value is a list of > - # source or binary packages representing dominant, the first element > - # and dominated, the subsequents. > - pubs = {'foo': [dominant, dominated]} > - > - dominator._dominatePublications(pubs, generalization) > + pubs = [dominant, dominated] > + live_versions = [generalization.getPackageVersion(dominant)] > + dominator.dominatePackage(pubs, live_versions, generalization) > flush_database_updates() > > # The dominant version remains correctly published. > @@ -158,16 +158,30 @@ > [foo_10_source] + foo_10_binaries, > PackagePublishingStatus.SUPERSEDED) > > - def testEmptyDomination(self): > - """Domination asserts for not empty input list.""" > - dominator = Dominator(self.logger, self.ubuntutest.main_archive) > - pubs = {'foo': []} > - # This isn't a really good exception. It should probably be > - # something more indicative of bad input. > - self.assertRaises( > - AssertionError, > - dominator._dominatePublications, > - pubs, GeneralizedPublication(True)) > + def test_dominateBinaries_rejects_empty_publication_list(self): > + """Domination asserts for non-empty input list.""" > + package = self.factory.makeBinaryPackageName() > + dominator = Dominator(self.logger, self.ubuntutest.main_archive) > + dominator._sortPackages = FakeMethod({package.name: []}) > + # This isn't a really good exception. It should probably be > + # something more indicative of bad input. > + self.assertRaises( > + AssertionError, > + dominator.dominateBinaries, > + self.factory.makeDistroArchSeries().distroseries, > + self.factory.getAnyPocket()) > + > + def test_dominateSources_rejects_empty_publication_list(self): > + """Domination asserts for non-empty input list.""" > + package = self.factory.makeSourcePackageName() > + dominator = Dominator(self.logger, self.ubuntutest.main_archive) > + dominator._sortPackages = FakeMethod({package.name: []}) > + # This isn't a really good exception. It should probably be > + # something more indicative of bad input. > + self.assertRaises( > + AssertionError, > + dominator.dominateSources, > + self.factory.makeDistroSeries(), self.factory.getAnyPocket()) > > def test_archall_domination(self): > # Arch-all binaries should not be dominated when a new source > @@ -358,6 +372,16 @@ > SeriesStatus.OBSOLETE) > > > +def remove_security_proxies(proxied_objects): > + """Return list of `proxied_objects`, without their proxies. > + > + The dominator runs only in scripts, where security proxies don't get > + in the way. To test realistically for this environment, strip the > + proxies wherever necessary and do as you will. > + """ > + return [removeSecurityProxy(obj) for obj in proxied_objects] This tells me that the script is running in the wrong layer perhaps? I've never had to do this in a Zopeless test - there is no proxy to remove. > +def make_publications_arch_specific(pubs, arch_specific=True): > + """Set the `architecturespecific` attribute for given SPPHs. > + > + :param pubs: An iterable of `BinaryPackagePublishingHistory`. > + :param arch_specific: Whether the binary package releases published > + by `pubs` are to be architecture-specific. If not, they will be > + treated as being for the "all" architecture. > + """ > + for pub in pubs: > + bpr = removeSecurityProxy(pub).binarypackagerelease > + bpr.architecturespecific = arch_specific > + > + > +class TestLivenessFunctions(TestCaseWithFactory): > + """Tests for the functions that say which versions are live.""" > + > + layer = ZopelessDatabaseLayer Hmmm. Something is weird then. Ah, it's the set_attribute in the zcml for BPR of course.... > + > + def test_find_live_source_versions_blesses_latest(self): > + spphs = make_spphs_for_versions(self.factory, ['1.2', '1.1', '1.0']) > + self.assertEqual(['1.2'], find_live_source_versions(spphs)) > + > + def test_find_live_binary_versions_pass_1_blesses_latest(self): > + bpphs = make_bpphs_for_versions(self.factory, ['1.2', '1.1', '1.0']) > + make_publications_arch_specific(bpphs) > + self.assertEqual(['1.2'], find_live_binary_versions_pass_1(bpphs)) > + > + def test_find_live_binary_versions_pass_1_blesses_arch_all(self): > + versions = list(reversed(['1.%d' % version for version in range(3)])) > + bpphs = make_bpphs_for_versions(self.factory, versions) > + > + # All of these publications are architecture-specific, except > + # the last one. This would happen if the binary package had > + # just changed from being architecture-specific to being > + # architecture-independent. Did you consider the case where arch-indep turns into arch-specific? > + make_publications_arch_specific(bpphs, True) > + make_publications_arch_specific(bpphs[-1:], False) > + self.assertEqual( > + versions[:1] + versions[-1:], > + find_live_binary_versions_pass_1(bpphs)) > + > + def test_find_live_binary_versions_pass_2_blesses_latest(self): > + bpphs = make_bpphs_for_versions(self.factory, ['1.2', '1.1', '1.0']) > + make_publications_arch_specific(bpphs, False) > + self.assertEqual(['1.2'], find_live_binary_versions_pass_2(bpphs)) > + > + def test_find_live_binary_versions_pass_2_blesses_arch_specific(self): > + versions = list(reversed(['1.%d' % version for version in range(3)])) > + bpphs = make_bpphs_for_versions(self.factory, versions) > + make_publications_arch_specific(bpphs) > + self.assertEqual(versions, find_live_binary_versions_pass_2(bpphs)) Can you comment what "blesses" means in the last two tests - you'll thank me in a year's time when you come back to this > + > + def test_find_live_binary_versions_pass_2_reprieves_arch_all(self): > + # An arch-all BPPH for a BPR built by an SPR that also still has > + # active arch-dependent BPPHs gets a reprieve: it can't be > + # superseded until those arch-dependent BPPHs have been > + # superseded. > + bpphs = make_bpphs_for_versions(self.factory, ['1.2', '1.1', '1.0']) > + make_publications_arch_specific(bpphs, False) > + dependent = self.factory.makeBinaryPackagePublishingHistory( > + binarypackagerelease=bpphs[1].binarypackagerelease) > + make_publications_arch_specific([dependent], True) > + self.assertEqual( > + ['1.2', '1.1'], find_live_binary_versions_pass_2(bpphs)) >