Merge lp:~wgrant/launchpad/bug-654372-optimise-domination into lp:launchpad

Proposed by William Grant on 2010-11-15
Status: Merged
Approved by: Graham Binns on 2010-11-16
Approved revision: no longer in the source branch.
Merged at revision: 11931
Proposed branch: lp:~wgrant/launchpad/bug-654372-optimise-domination
Merge into: lp:launchpad
Diff against target: 143 lines (+59/-47)
1 file modified
lib/lp/archivepublisher/domination.py (+59/-47)
To merge this branch: bzr merge lp:~wgrant/launchpad/bug-654372-optimise-domination
Reviewer Review Type Date Requested Status
Graham Binns (community) code 2010-11-15 Approve on 2010-11-16
Review via email: mp+40854@code.launchpad.net

Commit Message

[r=gmb][ui=none][bug=654372] Optimise source domination down from a few minutes to less than a second.

Description of the Change

= Summary =
The source domination candidate selection process currently takes up to
three minutes per primary series in production. This is roughly 10% of
the total primary archive publisher time.

== Proposed fix ==
Binary domination has an optimisation which cuts it from minutes to less
than a second: it first calculates the set of names which have multiple
active publications and then considers only the publications matching
those names. The obvious fix is to extend this technique to cover source
domination too.

== Pre-implementation notes ==
None. This is just a cleanup of the binary candidate selection approach
and an extension of it to sources.

== Implementation details ==
The existing approach uses a temporary table and string-based SQL
queries. I've revised it to use Storm syntax and a subselect instead.

== Tests ==
This is just about impossible to test -- query counts should be
similar, but they'll be much faster.
lp.archivepublisher.tests.test_dominator has the primary existing tests
for this area.

== Demo and Q/A ==
The tests are reasonably good now, so should catch any regressions. We
may see the dogfood publisher become a few minutes faster.

= Launchpad lint =

Checking for conflicts and issues in changed files.

Linting changed files:
  lib/lp/archivepublisher/domination.py

To post a comment you must log in.
Graham Binns (gmb) :
review: Approve (code)

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'lib/lp/archivepublisher/domination.py'
2--- lib/lp/archivepublisher/domination.py 2010-10-03 15:30:06 +0000
3+++ lib/lp/archivepublisher/domination.py 2010-11-15 12:03:19 +0000
4@@ -58,19 +58,24 @@
5 import operator
6
7 import apt_pkg
8+from storm.expr import And, Count, Select
9
10 from canonical.database.constants import UTC_NOW
11 from canonical.database.sqlbase import (
12 clear_current_connection_cache,
13- cursor,
14 flush_database_updates,
15 sqlvalues,
16 )
17+from canonical.launchpad.interfaces.lpstorm import IMasterStore
18 from lp.archivepublisher import ELIGIBLE_DOMINATION_STATES
19+from lp.registry.model.sourcepackagename import SourcePackageName
20 from lp.soyuz.enums import (
21 BinaryPackageFormat,
22 PackagePublishingStatus,
23 )
24+from lp.soyuz.model.binarypackagename import BinaryPackageName
25+from lp.soyuz.model.binarypackagerelease import BinaryPackageRelease
26+from lp.soyuz.model.sourcepackagerelease import SourcePackageRelease
27
28
29 def clear_cache():
30@@ -287,60 +292,67 @@
31 self.debug("Performing domination across %s/%s (%s)" % (
32 dr.name, pocket.title, distroarchseries.architecturetag))
33
34- # Here we go behind SQLObject's back to generate an assistance
35- # table which will seriously improve the performance of this
36- # part of the publisher.
37- # XXX: dsilvers 2006-02-04: It would be nice to not have to do
38- # this. Most of this methodology is stolen from person.py
39- # XXX: malcc 2006-08-03: This should go away when we shift to
40- # doing this one package at a time.
41- flush_database_updates()
42- cur = cursor()
43- cur.execute("""SELECT bpn.id AS name, count(bpn.id) AS count INTO
44- temporary table PubDomHelper FROM BinaryPackageRelease bpr,
45- BinaryPackageName bpn, BinaryPackagePublishingHistory
46- sbpph WHERE bpr.binarypackagename = bpn.id AND
47- sbpph.binarypackagerelease = bpr.id AND
48- sbpph.distroarchseries = %s AND sbpph.archive = %s AND
49- sbpph.status = %s AND sbpph.pocket = %s
50- GROUP BY bpn.id""" % sqlvalues(
51- distroarchseries, self.archive,
52- PackagePublishingStatus.PUBLISHED, pocket))
53-
54- binaries = BinaryPackagePublishingHistory.select(
55- """
56- binarypackagepublishinghistory.distroarchseries = %s
57- AND binarypackagepublishinghistory.archive = %s
58- AND binarypackagepublishinghistory.pocket = %s
59- AND binarypackagepublishinghistory.status = %s AND
60- binarypackagepublishinghistory.binarypackagerelease =
61- binarypackagerelease.id
62- AND binarypackagerelease.binpackageformat != %s
63- AND binarypackagerelease.binarypackagename IN (
64- SELECT name FROM PubDomHelper WHERE count > 1)"""
65- % sqlvalues(distroarchseries, self.archive,
66- pocket, PackagePublishingStatus.PUBLISHED,
67- BinaryPackageFormat.DDEB),
68- clauseTables=['BinaryPackageRelease'])
69-
70+ bpph_location_clauses = And(
71+ BinaryPackagePublishingHistory.status ==
72+ PackagePublishingStatus.PUBLISHED,
73+ BinaryPackagePublishingHistory.distroarchseries ==
74+ distroarchseries,
75+ BinaryPackagePublishingHistory.archive == self.archive,
76+ BinaryPackagePublishingHistory.pocket == pocket,
77+ )
78+ candidate_binary_names = Select(
79+ BinaryPackageName.id,
80+ And(
81+ BinaryPackageRelease.binarypackagenameID ==
82+ BinaryPackageName.id,
83+ BinaryPackagePublishingHistory.binarypackagereleaseID ==
84+ BinaryPackageRelease.id,
85+ bpph_location_clauses,
86+ ),
87+ group_by=BinaryPackageName.id,
88+ having=Count(BinaryPackagePublishingHistory.id) > 1)
89+ binaries = IMasterStore(BinaryPackagePublishingHistory).find(
90+ BinaryPackagePublishingHistory,
91+ BinaryPackageRelease.id ==
92+ BinaryPackagePublishingHistory.binarypackagereleaseID,
93+ BinaryPackageRelease.binarypackagenameID.is_in(
94+ candidate_binary_names),
95+ BinaryPackageRelease.binpackageformat !=
96+ BinaryPackageFormat.DDEB,
97+ bpph_location_clauses)
98 self.debug("Dominating binaries...")
99 self._dominatePublications(self._sortPackages(binaries, False))
100 if do_clear_cache:
101 self.debug("Flushing SQLObject cache.")
102 clear_cache()
103
104- flush_database_updates()
105- cur.execute("DROP TABLE PubDomHelper")
106-
107- if do_clear_cache:
108- self.debug("Flushing SQLObject cache.")
109- clear_cache()
110-
111 self.debug("Performing domination across %s/%s (Source)" %
112 (dr.name, pocket.title))
113- sources = SourcePackagePublishingHistory.selectBy(
114- distroseries=dr, archive=self.archive, pocket=pocket,
115- status=PackagePublishingStatus.PUBLISHED)
116+ spph_location_clauses = And(
117+ SourcePackagePublishingHistory.status ==
118+ PackagePublishingStatus.PUBLISHED,
119+ SourcePackagePublishingHistory.distroseries == dr,
120+ SourcePackagePublishingHistory.archive == self.archive,
121+ SourcePackagePublishingHistory.pocket == pocket,
122+ )
123+ candidate_source_names = Select(
124+ SourcePackageName.id,
125+ And(
126+ SourcePackageRelease.sourcepackagenameID ==
127+ SourcePackageName.id,
128+ SourcePackagePublishingHistory.sourcepackagereleaseID ==
129+ SourcePackageRelease.id,
130+ spph_location_clauses,
131+ ),
132+ group_by=SourcePackageName.id,
133+ having=Count(SourcePackagePublishingHistory.id) > 1)
134+ sources = IMasterStore(SourcePackagePublishingHistory).find(
135+ SourcePackagePublishingHistory,
136+ SourcePackageRelease.id ==
137+ SourcePackagePublishingHistory.sourcepackagereleaseID,
138+ SourcePackageRelease.sourcepackagenameID.is_in(
139+ candidate_source_names),
140+ spph_location_clauses)
141 self.debug("Dominating sources...")
142 self._dominatePublications(self._sortPackages(sources))
143 flush_database_updates()