Merge lp:~wgrant/launchpad/bug-816870 into lp:launchpad

Proposed by William Grant
Status: Merged
Approved by: William Grant
Approved revision: no longer in the source branch.
Merged at revision: 15365
Proposed branch: lp:~wgrant/launchpad/bug-816870
Merge into: lp:launchpad
Diff against target: 115 lines (+25/-39)
3 files modified
lib/lp/registry/browser/distribution.py (+1/-1)
lib/lp/registry/model/distribution.py (+22/-36)
lib/lp/soyuz/browser/tests/distribution-views.txt (+2/-2)
To merge this branch: bzr merge lp:~wgrant/launchpad/bug-816870
Reviewer Review Type Date Requested Status
Richard Harding (community) Approve
Review via email: mp+108728@code.launchpad.net

Commit message

Optimise the two really slow bits of Distribution:+search. Should no longer time out.

Description of the change

This branch hopefully fixes the parts of Distribution:+search that contribute most to its almost unending timeouts.

Firstly, has_exact_matches no longer does a slow count(). is_empty() is fine for its purposes. Secondly, searchBinaryPackages has its extremely hideous and slow 8 table join replaced with a slightly less hideous but 50x faster set of LIKEs.

DistributionSourcePackageCache.binpkgnames is a space-separated string of all the binaries produced by the source. We'd ideally replace it with something like an array of SPN IDs, but schema changes like that aren't quite cheap yet, so I achieve an exact match by forcing there to be a word boundary on both sides of the name. A regular expression would be a lot prettier, but it ended up being 300ms vs 90ms, so I decided the LIKEs won for now.

To post a comment you must log in.
Revision history for this message
Richard Harding (rharding) wrote :

Thanks, it goes against everything I know to say LIKES are the fast way, but looks like your numbers say it's so.

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'lib/lp/registry/browser/distribution.py'
2--- lib/lp/registry/browser/distribution.py 2012-05-18 07:51:18 +0000
3+++ lib/lp/registry/browser/distribution.py 2012-06-05 12:36:21 +0000
4@@ -585,7 +585,7 @@
5
6 @property
7 def has_exact_matches(self):
8- return self.exact_matches.count() > 0
9+ return not self.exact_matches.is_empty()
10
11 @property
12 def has_matches(self):
13
14=== modified file 'lib/lp/registry/model/distribution.py'
15--- lib/lp/registry/model/distribution.py 2012-05-14 13:32:03 +0000
16+++ lib/lp/registry/model/distribution.py 2012-06-05 12:36:21 +0000
17@@ -179,7 +179,6 @@
18 from lp.soyuz.interfaces.buildrecords import IHasBuildRecords
19 from lp.soyuz.interfaces.publishing import active_publishing_status
20 from lp.soyuz.model.archive import Archive
21-from lp.soyuz.model.binarypackagebuild import BinaryPackageBuild
22 from lp.soyuz.model.binarypackagename import BinaryPackageName
23 from lp.soyuz.model.binarypackagerelease import BinaryPackageRelease
24 from lp.soyuz.model.distributionsourcepackagerelease import (
25@@ -1167,52 +1166,39 @@
26 # results will only see DSPs
27 return DecoratedResultSet(dsp_caches_with_ranks, result_to_dsp)
28
29- @property
30- def _binaryPackageSearchClause(self):
31- """Return a Storm match clause for binary package searches."""
32- # This matches all DistributionSourcePackageCache rows that have
33- # a source package that generated the BinaryPackageName that
34- # we're searching for.
35+ def searchBinaryPackages(self, package_name, exact_match=False):
36+ """See `IDistribution`."""
37 from lp.soyuz.model.distributionsourcepackagecache import (
38 DistributionSourcePackageCache,
39 )
40- return (
41- DistroSeries.distribution == self,
42- DistroSeries.status != SeriesStatus.OBSOLETE,
43- DistroArchSeries.distroseries == DistroSeries.id,
44- BinaryPackageBuild.distro_arch_series == DistroArchSeries.id,
45- BinaryPackageRelease.build == BinaryPackageBuild.id,
46- (BinaryPackageBuild.source_package_release ==
47- SourcePackageRelease.id),
48- SourcePackageRelease.sourcepackagename == SourcePackageName.id,
49- DistributionSourcePackageCache.sourcepackagename ==
50- SourcePackageName.id,
51+ store = Store.of(self)
52+
53+ select_spec = (DistributionSourcePackageCache,)
54+
55+ find_spec = (
56+ DistributionSourcePackageCache.distribution == self,
57 DistributionSourcePackageCache.archiveID.is_in(
58 self.all_distro_archive_ids))
59
60- def searchBinaryPackages(self, package_name, exact_match=False):
61- """See `IDistribution`."""
62- from lp.soyuz.model.distributionsourcepackagecache import (
63- DistributionSourcePackageCache,
64- )
65- store = Store.of(self)
66-
67- select_spec = (DistributionSourcePackageCache,)
68-
69 if exact_match:
70- find_spec = self._binaryPackageSearchClause + (
71- BinaryPackageRelease.binarypackagename
72- == BinaryPackageName.id,
73- )
74- match_clause = (BinaryPackageName.name == package_name,)
75+ # To match BinaryPackageName.name exactly requires a very
76+ # slow 8 table join. So let's instead use binpkgnames, with
77+ # an ugly set of LIKEs matching spaces or either end of the
78+ # string on either side of the name. A regex is several
79+ # times slower and harder to escape.
80+ match_clause = (Or(
81+ DistributionSourcePackageCache.binpkgnames.like(
82+ '%% %s %%' % package_name.lower()),
83+ DistributionSourcePackageCache.binpkgnames.like(
84+ '%% %s' % package_name.lower()),
85+ DistributionSourcePackageCache.binpkgnames.like(
86+ '%s %%' % package_name.lower()),
87+ DistributionSourcePackageCache.binpkgnames ==
88+ package_name.lower()), )
89 else:
90 # In this case we can use a simplified find-spec as the
91 # binary package names are present on the
92 # DistributionSourcePackageCache records.
93- find_spec = (
94- DistributionSourcePackageCache.distribution == self,
95- DistributionSourcePackageCache.archiveID.is_in(
96- self.all_distro_archive_ids))
97 match_clause = (
98 DistributionSourcePackageCache.binpkgnames.like(
99 "%%%s%%" % package_name.lower()),)
100
101=== modified file 'lib/lp/soyuz/browser/tests/distribution-views.txt'
102--- lib/lp/soyuz/browser/tests/distribution-views.txt 2011-05-16 11:20:41 +0000
103+++ lib/lp/soyuz/browser/tests/distribution-views.txt 2012-06-05 12:36:21 +0000
104@@ -195,9 +195,9 @@
105 >>> distro_pkg_search_view.distroseries_names
106 {u'mozilla-firefox': u'warty'}
107 >>> distro_pkg_search_view = create_initialized_view(
108- ... ubuntu, name="+search", form={'text': 'foobar'})
109+ ... ubuntu, name="+search", form={'text': 'mozilla-firefox'})
110 >>> distro_pkg_search_view.distroseries_names
111- {u'foobar': ''}
112+ {u'mozilla-firefox': u'warty'}
113
114 Another helper on the DistributionPackageSearchView is the
115 matching_binary_names property which can be used by templates to get