Merge lp:~jtv/launchpad/bug-884649-branch-5 into lp:launchpad

Proposed by Jeroen T. Vermeulen
Status: Merged
Approved by: Julian Edwards
Approved revision: no longer in the source branch.
Merged at revision: 14572
Proposed branch: lp:~jtv/launchpad/bug-884649-branch-5
Merge into: lp:launchpad
Diff against target: 292 lines (+67/-91)
2 files modified
lib/lp/archivepublisher/domination.py (+67/-78)
lib/lp/archivepublisher/tests/test_dominator.py (+0/-13)
To merge this branch: bzr merge lp:~jtv/launchpad/bug-884649-branch-5
Reviewer Review Type Date Requested Status
Julian Edwards (community) Approve
Review via email: mp+86518@code.launchpad.net

Commit message

[r=julian-edwards][bug=884649] Use denormalized [BS]PPH.[bs]pn columns in dominator.

Description of the change

= Summary =

To get to a source package publication record's source package name, we no longer have to go through the associated source package release. There is now a column on SourcePackagePublishingHistory that gives us the SourcePackageName. Similar for BinaryPackagePublishingHistory & BinaryPackageName.

== Pre-implementation notes ==

The work to introduce these new columns wasn't moving along so I helped optimize the populators, kept an eye on them, retired them, added NOT NULL constraints, and initialized the sample data accordingly.

Meanwhile, on the flip side of the coin, domination is actually pretty fast already now. It probably takes less than 10% of the publication process that we're trying to speed up (though it's close enough to its current run-time target that a few percent of optimization are probably still welcome). I chose to keep thebranch because it's also a chance to make things just a bit shorter.

== Implementation details ==

Sadly, the extra column on BPPH doesn't seem to help the dominator all that much — at least not in terms of code simplicity. Most use-cases need the BinaryPackageRelease as well anyway.

== Tests ==

{{{
./bin/test -vvc lp.archvepublisher -t dominator
}}}

== Demo and Q/A ==

Run publish-ftpmaster for Ubuntu. Domination should still work.

Similarly, run gina and verify domination for Debian.

= Launchpad lint =

Checking for conflicts and issues in changed files.

Linting changed files:
  lib/lp/archivepublisher/domination.py
  lib/lp/archivepublisher/tests/test_dominator.py

To post a comment you must log in.
Revision history for this message
Julian Edwards (julian-edwards) :
review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'lib/lp/archivepublisher/domination.py'
2--- lib/lp/archivepublisher/domination.py 2011-12-19 23:38:16 +0000
3+++ lib/lp/archivepublisher/domination.py 2011-12-21 09:51:27 +0000
4@@ -98,10 +98,15 @@
5 apt_pkg.InitSystem()
6
7
8-def join_spr_spn():
9- """Join condition: SourcePackageRelease/SourcePackageName."""
10- return (
11- SourcePackageName.id == SourcePackageRelease.sourcepackagenameID)
12+def join_spph_spn():
13+ """Join condition: SourcePackagePublishingHistory/SourcePackageName."""
14+ # Avoid circular imports.
15+ from lp.soyuz.model.publishing import SourcePackagePublishingHistory
16+
17+ SPPH = SourcePackagePublishingHistory
18+ SPN = SourcePackageName
19+
20+ return SPN.id == SPPH.sourcepackagenameID
21
22
23 def join_spph_spr():
24@@ -110,9 +115,10 @@
25 # Avoid circular imports.
26 from lp.soyuz.model.publishing import SourcePackagePublishingHistory
27
28- return (
29- SourcePackageRelease.id ==
30- SourcePackagePublishingHistory.sourcepackagereleaseID)
31+ SPPH = SourcePackagePublishingHistory
32+ SPR = SourcePackageRelease
33+
34+ return SPR.id == SPPH.sourcepackagereleaseID
35
36
37 class SourcePublicationTraits:
38@@ -127,7 +133,7 @@
39 @staticmethod
40 def getPackageName(spph):
41 """Return the name of this publication's source package."""
42- return spph.sourcepackagerelease.sourcepackagename.name
43+ return spph.sourcepackagename.name
44
45 @staticmethod
46 def getPackageRelease(spph):
47@@ -147,7 +153,7 @@
48 @staticmethod
49 def getPackageName(bpph):
50 """Return the name of this publication's binary package."""
51- return bpph.binarypackagerelease.binarypackagename.name
52+ return bpph.binarypackagename.name
53
54 @staticmethod
55 def getPackageRelease(bpph):
56@@ -178,12 +184,6 @@
57 """Obtain the version string for a publication record."""
58 return self.traits.getPackageRelease(pub).version
59
60- def load_releases(self, pubs):
61- """Load the releases associated with a series of publications."""
62- return load_related(
63- self.traits.release_class, pubs,
64- [self.traits.release_reference_name])
65-
66 def compare(self, pub1, pub2):
67 """Compare publications by version.
68
69@@ -201,17 +201,7 @@
70
71 def sortPublications(self, publications):
72 """Sort publications from most to least current versions."""
73- # Listify; we want to iterate this twice, which won't do for a
74- # non-persistent sequence.
75- publications = list(publications)
76- # Batch-load associated package releases; we'll be needing them
77- # to compare versions.
78- self.load_releases(publications)
79- # Now sort. This is that second iteration. An in-place sort
80- # won't hurt the original, because we're working on a copy of
81- # the original iterable.
82- publications.sort(cmp=self.compare, reverse=True)
83- return publications
84+ return sorted(publications, cmp=self.compare, reverse=True)
85
86
87 def find_live_source_versions(sorted_pubs):
88@@ -335,12 +325,20 @@
89 has active arch-specific publications.
90 :return: A list of live versions.
91 """
92+ # Avoid circular imports
93+ from lp.soyuz.model.binarypackagebuild import BinaryPackageBuild
94+
95 sorted_pubs = list(sorted_pubs)
96 latest = sorted_pubs.pop(0)
97 is_arch_specific = attrgetter('architecture_specific')
98 arch_specific_pubs = list(ifilter(is_arch_specific, sorted_pubs))
99 arch_indep_pubs = list(ifilterfalse(is_arch_specific, sorted_pubs))
100
101+ bpbs = load_related(
102+ BinaryPackageBuild,
103+ [pub.binarypackagerelease for pub in arch_indep_pubs], ['buildID'])
104+ load_related(SourcePackageRelease, bpbs, ['source_package_release_id'])
105+
106 reprieved_pubs = [
107 pub
108 for pub in arch_indep_pubs
109@@ -577,46 +575,33 @@
110 # Avoid circular imports.
111 from lp.soyuz.model.publishing import BinaryPackagePublishingHistory
112
113+ BPPH = BinaryPackagePublishingHistory
114+ BPR = BinaryPackageRelease
115+
116 bpph_location_clauses = [
117- BinaryPackagePublishingHistory.status ==
118- PackagePublishingStatus.PUBLISHED,
119- BinaryPackagePublishingHistory.distroarchseries ==
120- distroarchseries,
121- BinaryPackagePublishingHistory.archive == self.archive,
122- BinaryPackagePublishingHistory.pocket == pocket,
123+ BPPH.status == PackagePublishingStatus.PUBLISHED,
124+ BPPH.distroarchseries == distroarchseries,
125+ BPPH.archive == self.archive,
126+ BPPH.pocket == pocket,
127 ]
128 candidate_binary_names = Select(
129- BinaryPackageName.id,
130- And(
131- BinaryPackageRelease.binarypackagenameID ==
132- BinaryPackageName.id,
133- BinaryPackagePublishingHistory.binarypackagereleaseID ==
134- BinaryPackageRelease.id,
135- bpph_location_clauses,
136- ),
137- group_by=BinaryPackageName.id,
138- having=Count(BinaryPackagePublishingHistory.id) > 1)
139- main_clauses = [
140- BinaryPackageRelease.id ==
141- BinaryPackagePublishingHistory.binarypackagereleaseID,
142- BinaryPackageRelease.binarypackagenameID.is_in(
143- candidate_binary_names),
144- BinaryPackageRelease.binpackageformat !=
145- BinaryPackageFormat.DDEB,
146+ BPPH.binarypackagenameID, And(*bpph_location_clauses),
147+ group_by=BPPH.binarypackagenameID, having=(Count() > 1))
148+ main_clauses = bpph_location_clauses + [
149+ BPR.id == BPPH.binarypackagereleaseID,
150+ BPR.binarypackagenameID.is_in(candidate_binary_names),
151+ BPR.binpackageformat != BinaryPackageFormat.DDEB,
152 ]
153- main_clauses.extend(bpph_location_clauses)
154-
155- store = IStore(BinaryPackagePublishingHistory)
156
157 # We're going to access the BPRs as well. Since we make the
158 # database look them up anyway, and since there won't be many
159 # duplications among them, load them alongside the publications.
160 # We'll also want their BinaryPackageNames, but adding those to
161 # the join would complicate the query.
162- query = store.find(
163- (BinaryPackagePublishingHistory, BinaryPackageRelease),
164- *main_clauses)
165- return DecoratedResultSet(query, itemgetter(0))
166+ query = IStore(BPPH).find((BPPH, BPR), *main_clauses)
167+ bpphs = list(DecoratedResultSet(query, itemgetter(0)))
168+ load_related(BinaryPackageName, bpphs, ['binarypackagenameID'])
169+ return bpphs
170
171 def dominateBinaries(self, distroseries, pocket):
172 """Perform domination on binary package publications.
173@@ -684,12 +669,13 @@
174 # Avoid circular imports.
175 from lp.soyuz.model.publishing import SourcePackagePublishingHistory
176
177+ SPPH = SourcePackagePublishingHistory
178+
179 return And(
180- SourcePackagePublishingHistory.status ==
181- PackagePublishingStatus.PUBLISHED,
182- SourcePackagePublishingHistory.distroseries == distroseries,
183- SourcePackagePublishingHistory.archive == self.archive,
184- SourcePackagePublishingHistory.pocket == pocket,
185+ SPPH.status == PackagePublishingStatus.PUBLISHED,
186+ SPPH.distroseries == distroseries,
187+ SPPH.archive == self.archive,
188+ SPPH.pocket == pocket,
189 )
190
191 def findSourcesForDomination(self, distroseries, pocket):
192@@ -706,15 +692,16 @@
193 # Avoid circular imports.
194 from lp.soyuz.model.publishing import SourcePackagePublishingHistory
195
196+ SPPH = SourcePackagePublishingHistory
197+ SPR = SourcePackageRelease
198+
199 spph_location_clauses = self._composeActiveSourcePubsCondition(
200 distroseries, pocket)
201- having_multiple_active_publications = (
202- Count(SourcePackagePublishingHistory.id) > 1)
203 candidate_source_names = Select(
204- SourcePackageName.id,
205- And(join_spph_spr(), join_spr_spn(), spph_location_clauses),
206- group_by=SourcePackageName.id,
207- having=having_multiple_active_publications)
208+ SPPH.sourcepackagenameID,
209+ And(join_spph_spr(), spph_location_clauses),
210+ group_by=SPPH.sourcepackagenameID,
211+ having=(Count() > 1))
212
213 # We'll also access the SourcePackageReleases associated with
214 # the publications we find. Since they're in the join anyway,
215@@ -722,13 +709,14 @@
216 # Actually we'll also want the SourcePackageNames, but adding
217 # those to the (outer) query would complicate it, and
218 # potentially slow it down.
219- query = IStore(SourcePackagePublishingHistory).find(
220- (SourcePackagePublishingHistory, SourcePackageRelease),
221+ query = IStore(SPPH).find(
222+ (SPPH, SPR),
223 join_spph_spr(),
224- SourcePackageRelease.sourcepackagenameID.is_in(
225- candidate_source_names),
226+ SPPH.sourcepackagenameID.is_in(candidate_source_names),
227 spph_location_clauses)
228- return DecoratedResultSet(query, itemgetter(0))
229+ spphs = DecoratedResultSet(query, itemgetter(0))
230+ load_related(SourcePackageName, spphs, ['sourcepackagenameID'])
231+ return spphs
232
233 def dominateSources(self, distroseries, pocket):
234 """Perform domination on source package publications.
235@@ -771,7 +759,7 @@
236 result = IStore(SourcePackageName).find(
237 looking_for,
238 join_spph_spr(),
239- join_spr_spn(),
240+ join_spph_spn(),
241 self._composeActiveSourcePubsCondition(distroseries, pocket))
242 return result.group_by(SourcePackageName.name)
243
244@@ -780,18 +768,19 @@
245 # Avoid circular imports.
246 from lp.soyuz.model.publishing import SourcePackagePublishingHistory
247
248+ SPPH = SourcePackagePublishingHistory
249+ SPR = SourcePackageRelease
250+
251 query = IStore(SourcePackagePublishingHistory).find(
252- SourcePackagePublishingHistory,
253+ SPPH,
254 join_spph_spr(),
255- join_spr_spn(),
256+ join_spph_spn(),
257 SourcePackageName.name == package_name,
258 self._composeActiveSourcePubsCondition(distroseries, pocket))
259 # Sort by descending version (SPR.version has type debversion in
260 # the database, so this should be a real proper comparison) so
261 # that _sortPackage will have slightly less work to do later.
262- return query.order_by(
263- Desc(SourcePackageRelease.version),
264- Desc(SourcePackagePublishingHistory.datecreated))
265+ return query.order_by(Desc(SPR.version), Desc(SPPH.datecreated))
266
267 def dominateSourceVersions(self, distroseries, pocket, package_name,
268 live_versions):
269
270=== modified file 'lib/lp/archivepublisher/tests/test_dominator.py'
271--- lib/lp/archivepublisher/tests/test_dominator.py 2011-11-04 19:11:18 +0000
272+++ lib/lp/archivepublisher/tests/test_dominator.py 2011-12-21 09:51:27 +0000
273@@ -473,19 +473,6 @@
274 bpph.binarypackagerelease.version,
275 GeneralizedPublication(is_source=False).getPackageVersion(bpph))
276
277- def test_load_releases_loads_sourcepackagerelease(self):
278- spph = self.factory.makeSourcePackagePublishingHistory()
279- self.assertContentEqual(
280- [spph.sourcepackagerelease],
281- GeneralizedPublication(is_source=True).load_releases([spph]))
282-
283- def test_load_releases_loads_binarypackagerelease(self):
284- bpph = self.factory.makeBinaryPackagePublishingHistory(
285- binarypackagerelease=self.factory.makeBinaryPackageRelease())
286- self.assertContentEqual(
287- [bpph.binarypackagerelease],
288- GeneralizedPublication(is_source=False).load_releases([bpph]))
289-
290 def test_compare_sorts_versions(self):
291 versions = [
292 '1.1v2',