Merge lp:~wgrant/launchpad/bug-816870 into lp:launchpad

Proposed by William Grant
Status: Merged
Approved by: William Grant
Approved revision: no longer in the source branch.
Merged at revision: 15365
Proposed branch: lp:~wgrant/launchpad/bug-816870
Merge into: lp:launchpad
Diff against target: 115 lines (+25/-39)
3 files modified
lib/lp/registry/browser/distribution.py (+1/-1)
lib/lp/registry/model/distribution.py (+22/-36)
lib/lp/soyuz/browser/tests/distribution-views.txt (+2/-2)
To merge this branch: bzr merge lp:~wgrant/launchpad/bug-816870
Reviewer Review Type Date Requested Status
Richard Harding (community) Approve
Review via email: mp+108728@code.launchpad.net

Commit message

Optimise the two really slow bits of Distribution:+search. Should no longer time out.

Description of the change

This branch hopefully fixes the parts of Distribution:+search that contribute most to its almost unending timeouts.

Firstly, has_exact_matches no longer does a slow count(). is_empty() is fine for its purposes. Secondly, searchBinaryPackages has its extremely hideous and slow 8 table join replaced with a slightly less hideous but 50x faster set of LIKEs.

DistributionSourcePackageCache.binpkgnames is a space-separated string of all the binaries produced by the source. We'd ideally replace it with something like an array of SPN IDs, but schema changes like that aren't quite cheap yet, so I achieve an exact match by forcing there to be a word boundary on both sides of the name. A regular expression would be a lot prettier, but it ended up being 300ms vs 90ms, so I decided the LIKEs won for now.

To post a comment you must log in.
Revision history for this message
Richard Harding (rharding) wrote :

Thanks, it goes against everything I know to say LIKES are the fast way, but looks like your numbers say it's so.

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== modified file 'lib/lp/registry/browser/distribution.py'
--- lib/lp/registry/browser/distribution.py 2012-05-18 07:51:18 +0000
+++ lib/lp/registry/browser/distribution.py 2012-06-05 12:36:21 +0000
@@ -585,7 +585,7 @@
585585
586 @property586 @property
587 def has_exact_matches(self):587 def has_exact_matches(self):
588 return self.exact_matches.count() > 0588 return not self.exact_matches.is_empty()
589589
590 @property590 @property
591 def has_matches(self):591 def has_matches(self):
592592
=== modified file 'lib/lp/registry/model/distribution.py'
--- lib/lp/registry/model/distribution.py 2012-05-14 13:32:03 +0000
+++ lib/lp/registry/model/distribution.py 2012-06-05 12:36:21 +0000
@@ -179,7 +179,6 @@
179from lp.soyuz.interfaces.buildrecords import IHasBuildRecords179from lp.soyuz.interfaces.buildrecords import IHasBuildRecords
180from lp.soyuz.interfaces.publishing import active_publishing_status180from lp.soyuz.interfaces.publishing import active_publishing_status
181from lp.soyuz.model.archive import Archive181from lp.soyuz.model.archive import Archive
182from lp.soyuz.model.binarypackagebuild import BinaryPackageBuild
183from lp.soyuz.model.binarypackagename import BinaryPackageName182from lp.soyuz.model.binarypackagename import BinaryPackageName
184from lp.soyuz.model.binarypackagerelease import BinaryPackageRelease183from lp.soyuz.model.binarypackagerelease import BinaryPackageRelease
185from lp.soyuz.model.distributionsourcepackagerelease import (184from lp.soyuz.model.distributionsourcepackagerelease import (
@@ -1167,52 +1166,39 @@
1167 # results will only see DSPs1166 # results will only see DSPs
1168 return DecoratedResultSet(dsp_caches_with_ranks, result_to_dsp)1167 return DecoratedResultSet(dsp_caches_with_ranks, result_to_dsp)
11691168
1170 @property1169 def searchBinaryPackages(self, package_name, exact_match=False):
1171 def _binaryPackageSearchClause(self):1170 """See `IDistribution`."""
1172 """Return a Storm match clause for binary package searches."""
1173 # This matches all DistributionSourcePackageCache rows that have
1174 # a source package that generated the BinaryPackageName that
1175 # we're searching for.
1176 from lp.soyuz.model.distributionsourcepackagecache import (1171 from lp.soyuz.model.distributionsourcepackagecache import (
1177 DistributionSourcePackageCache,1172 DistributionSourcePackageCache,
1178 )1173 )
1179 return (1174 store = Store.of(self)
1180 DistroSeries.distribution == self,1175
1181 DistroSeries.status != SeriesStatus.OBSOLETE,1176 select_spec = (DistributionSourcePackageCache,)
1182 DistroArchSeries.distroseries == DistroSeries.id,1177
1183 BinaryPackageBuild.distro_arch_series == DistroArchSeries.id,1178 find_spec = (
1184 BinaryPackageRelease.build == BinaryPackageBuild.id,1179 DistributionSourcePackageCache.distribution == self,
1185 (BinaryPackageBuild.source_package_release ==
1186 SourcePackageRelease.id),
1187 SourcePackageRelease.sourcepackagename == SourcePackageName.id,
1188 DistributionSourcePackageCache.sourcepackagename ==
1189 SourcePackageName.id,
1190 DistributionSourcePackageCache.archiveID.is_in(1180 DistributionSourcePackageCache.archiveID.is_in(
1191 self.all_distro_archive_ids))1181 self.all_distro_archive_ids))
11921182
1193 def searchBinaryPackages(self, package_name, exact_match=False):
1194 """See `IDistribution`."""
1195 from lp.soyuz.model.distributionsourcepackagecache import (
1196 DistributionSourcePackageCache,
1197 )
1198 store = Store.of(self)
1199
1200 select_spec = (DistributionSourcePackageCache,)
1201
1202 if exact_match:1183 if exact_match:
1203 find_spec = self._binaryPackageSearchClause + (1184 # To match BinaryPackageName.name exactly requires a very
1204 BinaryPackageRelease.binarypackagename1185 # slow 8 table join. So let's instead use binpkgnames, with
1205 == BinaryPackageName.id,1186 # an ugly set of LIKEs matching spaces or either end of the
1206 )1187 # string on either side of the name. A regex is several
1207 match_clause = (BinaryPackageName.name == package_name,)1188 # times slower and harder to escape.
1189 match_clause = (Or(
1190 DistributionSourcePackageCache.binpkgnames.like(
1191 '%% %s %%' % package_name.lower()),
1192 DistributionSourcePackageCache.binpkgnames.like(
1193 '%% %s' % package_name.lower()),
1194 DistributionSourcePackageCache.binpkgnames.like(
1195 '%s %%' % package_name.lower()),
1196 DistributionSourcePackageCache.binpkgnames ==
1197 package_name.lower()), )
1208 else:1198 else:
1209 # In this case we can use a simplified find-spec as the1199 # In this case we can use a simplified find-spec as the
1210 # binary package names are present on the1200 # binary package names are present on the
1211 # DistributionSourcePackageCache records.1201 # DistributionSourcePackageCache records.
1212 find_spec = (
1213 DistributionSourcePackageCache.distribution == self,
1214 DistributionSourcePackageCache.archiveID.is_in(
1215 self.all_distro_archive_ids))
1216 match_clause = (1202 match_clause = (
1217 DistributionSourcePackageCache.binpkgnames.like(1203 DistributionSourcePackageCache.binpkgnames.like(
1218 "%%%s%%" % package_name.lower()),)1204 "%%%s%%" % package_name.lower()),)
12191205
=== modified file 'lib/lp/soyuz/browser/tests/distribution-views.txt'
--- lib/lp/soyuz/browser/tests/distribution-views.txt 2011-05-16 11:20:41 +0000
+++ lib/lp/soyuz/browser/tests/distribution-views.txt 2012-06-05 12:36:21 +0000
@@ -195,9 +195,9 @@
195 >>> distro_pkg_search_view.distroseries_names195 >>> distro_pkg_search_view.distroseries_names
196 {u'mozilla-firefox': u'warty'}196 {u'mozilla-firefox': u'warty'}
197 >>> distro_pkg_search_view = create_initialized_view(197 >>> distro_pkg_search_view = create_initialized_view(
198 ... ubuntu, name="+search", form={'text': 'foobar'})198 ... ubuntu, name="+search", form={'text': 'mozilla-firefox'})
199 >>> distro_pkg_search_view.distroseries_names199 >>> distro_pkg_search_view.distroseries_names
200 {u'foobar': ''}200 {u'mozilla-firefox': u'warty'}
201201
202Another helper on the DistributionPackageSearchView is the202Another helper on the DistributionPackageSearchView is the
203matching_binary_names property which can be used by templates to get203matching_binary_names property which can be used by templates to get