Merge lp:~lifeless/launchpad/getBuildSummariesForSourceIds into lp:launchpad

Proposed by Robert Collins on 2010-11-07
Status: Merged
Approved by: Robert Collins on 2010-11-08
Approved revision: no longer in the source branch.
Merged at revision: 11888
Proposed branch: lp:~lifeless/launchpad/getBuildSummariesForSourceIds
Merge into: lp:launchpad
Diff against target: 249 lines (+99/-49)
3 files modified
lib/lp/soyuz/adapters/archivesourcepublication.py (+2/-1)
lib/lp/soyuz/interfaces/publishing.py (+18/-2)
lib/lp/soyuz/model/publishing.py (+79/-46)
To merge this branch: bzr merge lp:~lifeless/launchpad/getBuildSummariesForSourceIds
Reviewer Review Type Date Requested Status
Tim Penhey (community) 2010-11-07 Approve on 2010-11-08
Review via email: mp+40298@code.launchpad.net

Commit Message

Group up calls in Archive:EntryResource:getBuildSummariesForSourceIds rather than suffering death by a thousand queries.

Description of the Change

Group up calls in Archive:EntryResource:getBuildSummariesForSourceIds rather than suffering death by a thousand queries. I haven't tuned the queries (which also needs doing), nor have I written a test for the query counts. For the former, I want the actual queries we should be doing to be visible, which is why I've refactored to a constant query count(ish). For the latter I haven't reached the activation energy needed to write one yet, and while I think it would be useful, I think the most useful thing to do right now is fix the OOPSes.

To post a comment you must log in.
Tim Penhey (thumper) wrote :

I don't fully understand what is going on, but the code looks fine.

review: Approve
Robert Collins (lifeless) wrote :

Thanks Tim. I'm moving stuff from this pattern:
{{{
SQL
for thing in things:
   thing.method_doing_SQL
   thing.other_method_doing_SQL
}}}

to
{{{
SQL
SQL
SQL
post_process_in_python
}}}

In the single build case, same amount of SQL. In the 100 package case, vastly less ;)

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'lib/lp/soyuz/adapters/archivesourcepublication.py'
2--- lib/lp/soyuz/adapters/archivesourcepublication.py 2010-11-03 06:55:53 +0000
3+++ lib/lp/soyuz/adapters/archivesourcepublication.py 2010-11-08 19:18:52 +0000
4@@ -92,7 +92,8 @@
5 # using the delegate as self - might not be possible without
6 # duck-typing.
7 return getUtility(
8- IPublishingSet).getBuildStatusSummaryForSourcePublication(self)
9+ IPublishingSet).getBuildStatusSummaryForSourcePublication(self,
10+ self.getBuilds)
11
12 class ArchiveSourcePublications:
13 """`ArchiveSourcePublication` iterator."""
14
15=== modified file 'lib/lp/soyuz/interfaces/publishing.py'
16--- lib/lp/soyuz/interfaces/publishing.py 2010-10-06 12:40:00 +0000
17+++ lib/lp/soyuz/interfaces/publishing.py 2010-11-08 19:18:52 +0000
18@@ -507,6 +507,10 @@
19 """Return a resultset of `IBuild` objects in this context that are
20 not published.
21
22+ Note that this is convenience glue for
23+ PublishingSet.getUnpublishedBuildsForSources - and that method should
24+ be considered authoritative.
25+
26 :param build_states: list of build states to which the result should
27 be limited. Defaults to BuildStatus.FULLYBUILT if none are
28 specified.
29@@ -1075,7 +1079,8 @@
30 `IBinaryPackagePublishingHistory`.
31 """
32
33- def getBuildStatusSummariesForSourceIdsAndArchive(source_ids, archive):
34+ def getBuildStatusSummariesForSourceIdsAndArchive(source_ids, archive,
35+ get_builds=None):
36 """Return a summary of the build statuses for source publishing ids.
37
38 This method collects all the builds for the provided source package
39@@ -1090,6 +1095,9 @@
40 :param archive: The archive which will be used to filter the source
41 ids.
42 :type archive: `IArchive`
43+ :param get_builds: Optional override to replace database querying for
44+ build records. This is used in the ArchivePublication adapter code
45+ path and should be avoided where possible.
46 :return: A dict consisting of the overall status summaries for the
47 given ids that belong in the archive. For example:
48 {
49@@ -1100,7 +1108,8 @@
50 :rtype: ``dict``.
51 """
52
53- def getBuildStatusSummaryForSourcePublication(source_publication):
54+ def getBuildStatusSummaryForSourcePublication(source_publication,
55+ get_builds=None):
56 """Return a summary of the build statuses for this source
57 publication.
58
59@@ -1108,6 +1117,13 @@
60 for details. The call is just proxied here so that it can also be
61 used with an ArchiveSourcePublication passed in as
62 the source_package_pub, allowing the use of the cached results.
63+
64+ :param get_builds: An optional callback to replace the database lookup
65+ builds. This is used by some adapters as part of a preloading
66+ strategy - but the preferred form is to have
67+ getBuildStatusSummariesForSourceIdsAndArchive (which this call
68+ delegates to) perform the initial and only loading of the build
69+ records.
70 """
71
72 def getNearestAncestor(
73
74=== modified file 'lib/lp/soyuz/model/publishing.py'
75--- lib/lp/soyuz/model/publishing.py 2010-11-08 03:45:09 +0000
76+++ lib/lp/soyuz/model/publishing.py 2010-11-08 19:18:52 +0000
77@@ -16,6 +16,7 @@
78 ]
79
80
81+from collections import defaultdict
82 from datetime import datetime
83 import operator
84 import os
85@@ -502,25 +503,23 @@
86
87 return unique_binary_publications
88
89+ @staticmethod
90+ def _convertBuilds(builds_for_sources):
91+ """Convert from IPublishingSet getBuilds to SPPH getBuilds."""
92+ return [build for source, build, arch in builds_for_sources]
93+
94 def getBuilds(self):
95 """See `ISourcePackagePublishingHistory`."""
96 publishing_set = getUtility(IPublishingSet)
97- result_set = publishing_set.getBuildsForSources(self)
98-
99- return [build for source, build, arch in result_set]
100+ result_set = publishing_set.getBuildsForSources([self])
101+ return SourcePackagePublishingHistory._convertBuilds(result_set)
102
103 def getUnpublishedBuilds(self, build_states=None):
104 """See `ISourcePackagePublishingHistory`."""
105 publishing_set = getUtility(IPublishingSet)
106 result_set = publishing_set.getUnpublishedBuildsForSources(
107 self, build_states)
108-
109- # Create a function that will just return the second item
110- # in the result tuple (the build).
111- def result_to_build(result):
112- return result[1]
113-
114- return DecoratedResultSet(result_set, result_to_build)
115+ return DecoratedResultSet(result_set, operator.itemgetter(1))
116
117 def changesFileUrl(self):
118 """See `ISourcePackagePublishingHistory`."""
119@@ -1699,28 +1698,84 @@
120 PackageUploadSource.sourcepackagereleaseID == spr.id)
121 return result_set.one()
122
123- def getBuildStatusSummariesForSourceIdsAndArchive(self,
124- source_ids,
125- archive):
126+ def getBuildStatusSummariesForSourceIdsAndArchive(self, source_ids,
127+ archive, get_builds=None):
128 """See `IPublishingSet`."""
129 # source_ids can be None or an empty sequence.
130 if not source_ids:
131 return {}
132
133 store = getUtility(IStoreSelector).get(MAIN_STORE, DEFAULT_FLAVOR)
134- source_pubs = store.find(
135- SourcePackagePublishingHistory,
136- SourcePackagePublishingHistory.id.is_in(source_ids),
137- SourcePackagePublishingHistory.archive == archive)
138-
139+ if len(source_ids) == 1 and get_builds is not None:
140+ # Fake out the DB access for ArchivePublication
141+ source_pubs = list(store.find(
142+ SourcePackagePublishingHistory,
143+ SourcePackagePublishingHistory.id.is_in(source_ids),
144+ SourcePackagePublishingHistory.archive == archive))
145+ source = source_pubs[0]
146+ build_info = [(source, build, None) for build in get_builds()]
147+ else:
148+ build_info = list(
149+ self.getBuildsForSourceIds(source_ids, archive=archive))
150+ source_pubs = set()
151+ found_source_ids = set()
152+ for row in build_info:
153+ source_pubs.add(row[0])
154+ found_source_ids.add(row[0].id)
155+ pubs_without_builds = set(source_ids) - found_source_ids
156+ if pubs_without_builds:
157+ # Add in source pubs for which no builds were found: we may in
158+ # future want to make this a LEFT OUTER JOIN in
159+ # getBuildsForSourceIds but to avoid destabilising other code
160+ # paths while we fix performance, it is just done as a single
161+ # separate query for now.
162+ source_pubs.update(store.find(
163+ SourcePackagePublishingHistory,
164+ SourcePackagePublishingHistory.id.is_in(
165+ pubs_without_builds),
166+ SourcePackagePublishingHistory.archive == archive))
167+ # For each source_pub found, provide an aggregate summary of its
168+ # builds.
169+ binarypackages = getUtility(IBinaryPackageBuildSet)
170 source_build_statuses = {}
171+ need_unpublished = set()
172 for source_pub in source_pubs:
173- status_summary = source_pub.getStatusSummaryForBuilds()
174- source_build_statuses[source_pub.id] = status_summary
175+ source_builds = [build for build in build_info
176+ if build[0].id == source_pub.id]
177+ builds = SourcePackagePublishingHistory._convertBuilds(source_builds)
178+ summary = binarypackages.getStatusSummaryForBuilds(builds)
179+ source_build_statuses[source_pub.id] = summary
180+
181+ # If:
182+ # 1. the SPPH is in an active publishing state, and
183+ # 2. all the builds are fully-built, and
184+ # 3. the SPPH is not being published in a rebuild/copy archive (in
185+ # which case the binaries are not published)
186+ # 4. There are unpublished builds
187+ # Then we augment the result with FULLYBUILT_PENDING and attach the
188+ # unpublished builds.
189+ if (source_pub.status in active_publishing_status and
190+ summary['status'] == BuildSetStatus.FULLYBUILT and
191+ not source_pub.archive.is_copy):
192+ need_unpublished.add(source_pub)
193+
194+ if need_unpublished:
195+ unpublished = list(self.getUnpublishedBuildsForSources(
196+ need_unpublished))
197+ unpublished_per_source = defaultdict(list)
198+ for source_pub, build, _ in unpublished:
199+ unpublished_per_source[source_pub].append(build)
200+ for source_pub, builds in unpublished_per_source.items():
201+ summary = {
202+ 'status': BuildSetStatus.FULLYBUILT_PENDING,
203+ 'builds': builds
204+ }
205+ source_build_statuses[source_pub.id] = summary
206
207 return source_build_statuses
208
209- def getBuildStatusSummaryForSourcePublication(self, source_publication):
210+ def getBuildStatusSummaryForSourcePublication(self, source_publication,
211+ get_builds=None):
212 """See `ISourcePackagePublishingHistory`.getStatusSummaryForBuilds.
213
214 This is provided here so it can be used by both the SPPH as well
215@@ -1728,31 +1783,9 @@
216 the same interface but uses cached results for builds and binaries
217 used in the calculation.
218 """
219- builds = source_publication.getBuilds()
220- summary = getUtility(
221- IBinaryPackageBuildSet).getStatusSummaryForBuilds(builds)
222-
223- # We only augment the result if:
224- # 1. we (the SPPH) are ourselves in an active publishing state, and
225- # 2. all the builds are fully-built, and
226- # 3. we are not being published in a rebuild/copy archive (in
227- # which case the binaries are not currently published anyway)
228- # In this case we check to see if they are all published, and if
229- # not we return FULLYBUILT_PENDING:
230- augmented_summary = summary
231- if (source_publication.status in active_publishing_status and
232- summary['status'] == BuildSetStatus.FULLYBUILT and
233- not source_publication.archive.is_copy):
234-
235- unpublished_builds = list(
236- source_publication.getUnpublishedBuilds())
237-
238- if unpublished_builds:
239- augmented_summary = {
240- 'status': BuildSetStatus.FULLYBUILT_PENDING,
241- 'builds': unpublished_builds
242- }
243- return augmented_summary
244+ source_id = source_publication.id
245+ return self.getBuildStatusSummariesForSourceIdsAndArchive([source_id],
246+ source_publication.archive, get_builds=get_builds)[source_id]
247
248 def requestDeletion(self, sources, removed_by, removal_comment=None):
249 """See `IPublishingSet`."""