Merge ~cjwatson/launchpad:refactor-publisher-queries into launchpad:master

Proposed by Colin Watson
Status: Merged
Approved by: Colin Watson
Approved revision: 4b7a45bf2d155a76137881db7d676e99905993df
Merge reported by: Otto Co-Pilot
Merged at revision: not available
Proposed branch: ~cjwatson/launchpad:refactor-publisher-queries
Merge into: launchpad:master
Diff against target: 230 lines (+91/-63)
4 files modified
lib/lp/soyuz/doc/publishing.rst (+2/-2)
lib/lp/soyuz/interfaces/publishing.py (+3/-2)
lib/lp/soyuz/model/publishing.py (+84/-58)
lib/lp/soyuz/tests/test_publishing_models.py (+2/-1)
Reviewer Review Type Date Requested Status
Simone Pelosi Approve
Review via email: mp+449440@code.launchpad.net

Commit message

Refactor queries in PublishingSet.getBuildsForSourceIds

Description of the change

`PublishingSet.getBuildsForSourceIds` made one database query to work out the set of build IDs (in an inefficient way that pulled a lot more data back into Python than needed), and then a second query to get the full result set to return. Refactor this to do all the work in a single query.

The approach I've taken here also moves the `BinaryPackageBuild` parts of the query into a left join, which should soon make it easier to extend this method to support CI builds as well.

To post a comment you must log in.
Revision history for this message
Simone Pelosi (pelpsi) wrote :

LGTM!

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1diff --git a/lib/lp/soyuz/doc/publishing.rst b/lib/lp/soyuz/doc/publishing.rst
2index 22ac0df..a711765 100644
3--- a/lib/lp/soyuz/doc/publishing.rst
4+++ b/lib/lp/soyuz/doc/publishing.rst
5@@ -1132,8 +1132,8 @@ its context.
6 >>> cprov_builds = publishing_set.getBuildsForSources(cprov_sources)
7
8 It returns a `ResultSet` and it contains 3-element tuples as
9-`SourcePackagePublishingHistory`, `Build` and `DistroArchseries` for
10-each build found.
11+`SourcePackagePublishingHistory`, `BinaryPackageBuild` and
12+`DistroArchSeries` for each build found.
13
14 >>> cprov_builds.count()
15 7
16diff --git a/lib/lp/soyuz/interfaces/publishing.py b/lib/lp/soyuz/interfaces/publishing.py
17index f0689a0..a481aa3 100644
18--- a/lib/lp/soyuz/interfaces/publishing.py
19+++ b/lib/lp/soyuz/interfaces/publishing.py
20@@ -1210,7 +1210,8 @@ class IPublishingSet(Interface):
21 :param need_build_farm_job: whether to include the `PackageBuild`
22 and `BuildFarmJob` in the result.
23 :return: a storm ResultSet containing tuples as
24- (`SourcePackagePublishingHistory`, `Build`, `DistroArchSeries`)
25+ (`SourcePackagePublishingHistory`, `BinaryPackageBuild`,
26+ `DistroArchSeries`)
27 :rtype: `storm.store.ResultSet`.
28 """
29
30@@ -1232,7 +1233,7 @@ class IPublishingSet(Interface):
31 be limited. Defaults to BuildStatus.FULLYBUILT if none are
32 specified.
33 :return: a storm ResultSet containing tuples of
34- (`SourcePackagePublishingHistory`, `Build`)
35+ (`SourcePackagePublishingHistory`, `BinaryPackageBuild`)
36 """
37
38 def getBinaryFilesForSources(one_or_more_source_publication):
39diff --git a/lib/lp/soyuz/model/publishing.py b/lib/lp/soyuz/model/publishing.py
40index 470032d..b5ded2d 100644
41--- a/lib/lp/soyuz/model/publishing.py
42+++ b/lib/lp/soyuz/model/publishing.py
43@@ -18,7 +18,18 @@ from operator import attrgetter, itemgetter
44 from pathlib import Path
45
46 from storm.databases.postgres import JSON
47-from storm.expr import And, Cast, Desc, Join, LeftJoin, Not, Or, Sum
48+from storm.expr import (
49+ And,
50+ Cast,
51+ Desc,
52+ Join,
53+ LeftJoin,
54+ Not,
55+ Or,
56+ Select,
57+ Sum,
58+ Union,
59+)
60 from storm.info import ClassAlias
61 from storm.properties import DateTime, Int, Unicode
62 from storm.references import Reference
63@@ -1719,9 +1730,26 @@ class PublishingSet:
64 self, source_publication_ids, archive=None, build_states=None
65 ):
66 """See `IPublishingSet`."""
67+ # We're interested in the binaries resulting from builds in the same
68+ # distroseries as the SPPH.
69+ bpb_tables = [
70+ LeftJoin(
71+ BinaryPackageBuild,
72+ And(
73+ SourcePackagePublishingHistory.distroseries_id
74+ == BinaryPackageBuild.distro_series_id,
75+ SourcePackagePublishingHistory.sourcepackagerelease_id
76+ == BinaryPackageBuild.source_package_release_id,
77+ ),
78+ ),
79+ ]
80+
81+ extra_exprs = [
82+ SourcePackagePublishingHistory.id.is_in(source_publication_ids)
83+ ]
84+
85 # If an archive was passed in as a parameter, add an extra expression
86 # to filter by archive:
87- extra_exprs = []
88 if archive is not None:
89 extra_exprs.append(
90 SourcePackagePublishingHistory.archive == archive
91@@ -1734,71 +1762,69 @@ class PublishingSet:
92
93 store = IStore(SourcePackagePublishingHistory)
94
95- # We'll be looking for builds in the same distroseries as the
96- # SPPH for the same release.
97- builds_for_distroseries_expr = (
98- SourcePackagePublishingHistory.distroseries_id
99- == BinaryPackageBuild.distro_series_id,
100- SourcePackagePublishingHistory.sourcepackagerelease_id
101- == BinaryPackageBuild.source_package_release_id,
102- SourcePackagePublishingHistory.id.is_in(source_publication_ids),
103- DistroArchSeries.id == BinaryPackageBuild.distro_arch_series_id,
104- )
105-
106- # First, we'll find the builds that were built in the same
107- # archive context as the published sources.
108- builds_in_same_archive = store.find(
109- BinaryPackageBuild,
110- builds_for_distroseries_expr,
111- (
112+ # First, we'll find the binary package builds that were built in the
113+ # same archive context as the published sources.
114+ build_ids_in_same_archive = Select(
115+ BinaryPackageBuild.id,
116+ tables=(SourcePackagePublishingHistory, *bpb_tables),
117+ where=And(
118 SourcePackagePublishingHistory.archive_id
119- == BinaryPackageBuild.archive_id
120+ == BinaryPackageBuild.archive_id,
121+ *extra_exprs,
122 ),
123- *extra_exprs,
124 )
125
126- # Next get all the builds that have a binary published in the
127- # same archive... even though the build was not built in
128- # the same context archive.
129- builds_copied_into_archive = store.find(
130- BinaryPackageBuild,
131- builds_for_distroseries_expr,
132- (
133+ # Next get all the binary package builds that have a binary
134+ # published in the same archive... even though the build was not
135+ # built in the same context archive.
136+ build_ids_copied_into_archive = Select(
137+ BinaryPackageBuild.id,
138+ tables=(
139+ SourcePackagePublishingHistory,
140+ *bpb_tables,
141+ Join(
142+ BinaryPackageRelease,
143+ BinaryPackageRelease.build == BinaryPackageBuild.id,
144+ ),
145+ Join(
146+ BinaryPackagePublishingHistory,
147+ And(
148+ BinaryPackagePublishingHistory.archive
149+ == SourcePackagePublishingHistory.archive_id,
150+ BinaryPackagePublishingHistory.binarypackagerelease
151+ == BinaryPackageRelease.id,
152+ ),
153+ ),
154+ ),
155+ where=And(
156 SourcePackagePublishingHistory.archive_id
157- != BinaryPackageBuild.archive_id
158+ != BinaryPackageBuild.archive_id,
159+ *extra_exprs,
160 ),
161- BinaryPackagePublishingHistory.archive
162- == SourcePackagePublishingHistory.archive_id,
163- BinaryPackagePublishingHistory.binarypackagerelease
164- == BinaryPackageRelease.id,
165- BinaryPackageRelease.build == BinaryPackageBuild.id,
166- *extra_exprs,
167 )
168
169- builds_union = builds_copied_into_archive.union(
170- builds_in_same_archive
171- ).config(distinct=True)
172-
173- # Now that we have a result_set of all the builds, we'll use it
174- # as a subquery to get the required publishing and arch to do
175- # the ordering. We do this in this round-about way because we
176- # can't sort on SourcePackagePublishingHistory.id after the
177- # union. See bug 443353 for details.
178- find_spec = (
179+ # Now that we have select expressions for all the builds, we'll use
180+ # them as subqueries to get the required publishing and arch to do
181+ # the ordering. We do this in this round-about way because we can't
182+ # sort on SourcePackagePublishingHistory.id after the union. See bug
183+ # 443353 for details.
184+ result_set = store.using(
185 SourcePackagePublishingHistory,
186- BinaryPackageBuild,
187- DistroArchSeries,
188- )
189-
190- # Storm doesn't let us do builds_union.values('id') -
191- # ('Union' object has no attribute 'columns'). So instead
192- # we have to instantiate the objects just to get the id.
193- build_ids = [build.id for build in builds_union]
194-
195- result_set = store.find(
196- find_spec,
197- builds_for_distroseries_expr,
198- BinaryPackageBuild.id.is_in(build_ids),
199+ *bpb_tables,
200+ Join(
201+ DistroArchSeries,
202+ BinaryPackageBuild.distro_arch_series == DistroArchSeries.id,
203+ ),
204+ ).find(
205+ (
206+ SourcePackagePublishingHistory,
207+ BinaryPackageBuild,
208+ DistroArchSeries,
209+ ),
210+ BinaryPackageBuild.id.is_in(
211+ Union(build_ids_in_same_archive, build_ids_copied_into_archive)
212+ ),
213+ *extra_exprs,
214 )
215
216 return result_set.order_by(
217diff --git a/lib/lp/soyuz/tests/test_publishing_models.py b/lib/lp/soyuz/tests/test_publishing_models.py
218index 6339b71..755d797 100644
219--- a/lib/lp/soyuz/tests/test_publishing_models.py
220+++ b/lib/lp/soyuz/tests/test_publishing_models.py
221@@ -40,7 +40,8 @@ class TestPublishingSet(BaseTestCaseWithThreeBuilds):
222 self.publishing_set = getUtility(IPublishingSet)
223
224 def _getBuildsForResults(self, results):
225- # The method returns (SPPH, Build) tuples, we just want the build.
226+ # The method returns (SPPH, BinaryPackageBuild, DistroArchSeries)
227+ # tuples. We just want the binary package build.
228 return [result[1] for result in results]
229
230 def test_getUnpublishedBuildsForSources_none_published(self):

Subscribers

People subscribed via source and target branches

to status/vote changes: