Merge lp:~cjwatson/launchpad/optimise-merge-detection into lp:launchpad

Proposed by Colin Watson
Status: Merged
Merged at revision: 18610
Proposed branch: lp:~cjwatson/launchpad/optimise-merge-detection
Merge into: lp:launchpad
Diff against target: 140 lines (+34/-8)
4 files modified
lib/lp/codehosting/scanner/mergedetection.py (+6/-1)
lib/lp/codehosting/scanner/tests/test_mergedetection.py (+2/-1)
lib/lp/services/tests/test_utils.py (+21/-4)
lib/lp/services/utils.py (+5/-2)
To merge this branch: bzr merge lp:~cjwatson/launchpad/optimise-merge-detection
Reviewer Review Type Date Requested Status
William Grant code Approve
Review via email: mp+342975@code.launchpad.net

Commit message

Optimise merge detection when the branch has no landing candidates.

Description of the change

I noticed an OOPS when scanning ~vcs-imports/gcc/git-mirror which appears to be due to a fair amount of non-SQL time somewhere around here; this seems like a likely candidate.

To post a comment you must log in.
Revision history for this message
William Grant (wgrant) :
review: Approve (code)

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'lib/lp/codehosting/scanner/mergedetection.py'
2--- lib/lp/codehosting/scanner/mergedetection.py 2016-04-06 12:14:54 +0000
3+++ lib/lp/codehosting/scanner/mergedetection.py 2018-04-10 23:11:40 +0000
4@@ -92,6 +92,7 @@
5 #
6 # XXX: JonathanLange 2009-05-11 spec=package-branches: This assumes that
7 # merge detection only works with product branches.
8+ logger.info("Looking for merged branches.")
9 branches = getUtility(IAllBranches).inProduct(db_branch.product)
10 branches = branches.withLifecycleStatus(
11 BranchLifecycleStatus.DEVELOPMENT,
12@@ -151,12 +152,16 @@
13 # which introduced the change, that will either be set through the web
14 # ui by a person, or by PQM once it is integrated.
15
16+ logger.info("Looking for merged proposals.")
17 if scan_completed.bzr_branch is None:
18 # Only happens in tests.
19 merge_sorted = []
20 else:
21+ # This is potentially slow for deep histories; we defer even
22+ # initialising it until we need it, and we cache the iterator's
23+ # results.
24 merge_sorted = CachingIterator(
25- scan_completed.bzr_branch.iter_merge_sorted_revisions())
26+ scan_completed.bzr_branch.iter_merge_sorted_revisions)
27 for proposal in db_branch.landing_candidates:
28 tip_rev_id = proposal.source_branch.last_scanned_id
29 if tip_rev_id in new_ancestry:
30
31=== modified file 'lib/lp/codehosting/scanner/tests/test_mergedetection.py'
32--- lib/lp/codehosting/scanner/tests/test_mergedetection.py 2015-10-05 17:03:27 +0000
33+++ lib/lp/codehosting/scanner/tests/test_mergedetection.py 2018-04-10 23:11:40 +0000
34@@ -32,6 +32,7 @@
35 )
36 from lp.services.config import config
37 from lp.services.database.interfaces import IStore
38+from lp.services.log.logger import DevNullLogger
39 from lp.services.osutils import override_environ
40 from lp.testing import (
41 TestCase,
42@@ -199,7 +200,7 @@
43 mergedetection.auto_merge_branches(
44 events.ScanCompleted(
45 db_branch=db_branch, bzr_branch=None,
46- logger=None, new_ancestry=new_ancestry))
47+ logger=DevNullLogger(), new_ancestry=new_ancestry))
48
49 def mergeDetected(self, logger, source, target):
50 # Record the merged branches
51
52=== modified file 'lib/lp/services/tests/test_utils.py'
53--- lib/lp/services/tests/test_utils.py 2018-03-23 12:26:35 +0000
54+++ lib/lp/services/tests/test_utils.py 2018-04-10 23:11:40 +0000
55@@ -7,6 +7,7 @@
56
57 from contextlib import contextmanager
58 from datetime import datetime
59+from functools import partial
60 import hashlib
61 import itertools
62 import os
63@@ -145,7 +146,7 @@
64
65 def test_reuse(self):
66 # The same iterator can be used multiple times.
67- iterator = CachingIterator(itertools.count())
68+ iterator = CachingIterator(itertools.count)
69 self.assertEqual(
70 [0, 1, 2, 3, 4], list(itertools.islice(iterator, 0, 5)))
71 self.assertEqual(
72@@ -154,7 +155,7 @@
73 def test_more_values(self):
74 # If a subsequent call to iter causes more values to be fetched, they
75 # are also cached.
76- iterator = CachingIterator(itertools.count())
77+ iterator = CachingIterator(itertools.count)
78 self.assertEqual(
79 [0, 1, 2], list(itertools.islice(iterator, 0, 3)))
80 self.assertEqual(
81@@ -162,14 +163,14 @@
82
83 def test_limited_iterator(self):
84 # Make sure that StopIteration is handled correctly.
85- iterator = CachingIterator(iter([0, 1, 2, 3, 4]))
86+ iterator = CachingIterator(partial(iter, [0, 1, 2, 3, 4]))
87 self.assertEqual(
88 [0, 1, 2], list(itertools.islice(iterator, 0, 3)))
89 self.assertEqual([0, 1, 2, 3, 4], list(iterator))
90
91 def test_parallel_iteration(self):
92 # There can be parallel iterators over the CachingIterator.
93- ci = CachingIterator(iter([0, 1, 2, 3, 4]))
94+ ci = CachingIterator(partial(iter, [0, 1, 2, 3, 4]))
95 i1 = iter(ci)
96 i2 = iter(ci)
97 self.assertEqual(0, i1.next())
98@@ -177,6 +178,22 @@
99 self.assertEqual([1, 2, 3, 4], list(i2))
100 self.assertEqual([1, 2, 3, 4], list(i1))
101
102+ def test_deferred_initialisation(self):
103+ # Initialising the iterator may be expensive, so CachingIterator
104+ # defers this until it needs it.
105+ self.initialised = False
106+
107+ def iterator():
108+ self.initialised = True
109+ return iter([0, 1, 2])
110+
111+ ci = CachingIterator(iterator)
112+ self.assertFalse(self.initialised)
113+ self.assertEqual([0, 1, 2], list(ci))
114+ self.assertTrue(self.initialised)
115+ self.assertEqual([0, 1, 2], list(ci))
116+ self.assertTrue(self.initialised)
117+
118
119 class TestDecorateWith(TestCase):
120 """Tests for `decorate_with`."""
121
122=== modified file 'lib/lp/services/utils.py'
123--- lib/lp/services/utils.py 2018-03-23 12:26:35 +0000
124+++ lib/lp/services/utils.py 2018-04-10 23:11:40 +0000
125@@ -209,10 +209,13 @@
126 iterator if necessary.
127 """
128
129- def __init__(self, iterator):
130- self.iterator = iterator
131+ def __init__(self, iterator_factory):
132+ self.iterator_factory = iterator_factory
133+ self.iterator = None
134
135 def __iter__(self):
136+ if self.iterator is None:
137+ self.iterator = self.iterator_factory()
138 # Teeing an iterator previously returned by tee won't cause heat
139 # death. See tee_copy in itertoolsmodule.c in the Python source.
140 self.iterator, iterator = tee(self.iterator)