Merge ~cjwatson/launchpad:faster-detect-merges into launchpad:master

Proposed by Colin Watson
Status: Merged
Approved by: Colin Watson
Approved revision: f08d401db953497711354c4de47a5293261c36a6
Merge reported by: Otto Co-Pilot
Merged at revision: not available
Proposed branch: ~cjwatson/launchpad:faster-detect-merges
Merge into: launchpad:master
Diff against target: 196 lines (+61/-17)
7 files modified
lib/lp/code/interfaces/githosting.py (+3/-1)
lib/lp/code/interfaces/gitrepository.py (+4/-1)
lib/lp/code/model/githosting.py (+14/-4)
lib/lp/code/model/gitrepository.py (+2/-1)
lib/lp/code/model/tests/test_githosting.py (+13/-1)
lib/lp/code/model/tests/test_gitrepository.py (+17/-8)
lib/lp/code/subscribers/git.py (+8/-1)
Reviewer Review Type Date Requested Status
Simone Pelosi Approve
Review via email: mp+449451@code.launchpad.net

Commit message

Speed up merge detection in repositories with deep history

Description of the change

Merge detection can be very slow for repositories with deep history, because as currently defined it has to walk through the whole ancestry of a given target commit looking for "mainline" (first parent only) commits that introduce a merge of any of the given source commits. In particular, this often times out for Linux kernel repositories.

However, we can do much better. Merge proposals are normally only created in Launchpad once the target branch has been scanned, and we can reasonably assume that when a merge proposal is created it won't yet have been merged. This means that Launchpad can tell turnip's `detect-merges` API to stop walking through history whenever it encounters the commit that it had previously scanned on the target branch, and then we only have to look through the changes introduced by a push rather than the entire history of the branch. This should be much less likely to time out.

(See also https://code.launchpad.net/~cjwatson/turnip/+git/turnip/+merge/445341.)

To post a comment you must log in.
Revision history for this message
Simone Pelosi (pelpsi) wrote :

LGTM!

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1diff --git a/lib/lp/code/interfaces/githosting.py b/lib/lp/code/interfaces/githosting.py
2index 03cdcab..5486efa 100644
3--- a/lib/lp/code/interfaces/githosting.py
4+++ b/lib/lp/code/interfaces/githosting.py
5@@ -105,12 +105,14 @@ class IGitHostingClient(Interface):
6 to a list of conflicted paths.
7 """
8
9- def detectMerges(path, target, sources, logger=None):
10+ def detectMerges(path, target, sources, previous_target=None, logger=None):
11 """Detect merges of any of 'sources' into 'target'.
12
13 :param path: Physical path of the repository on the hosting service.
14 :param target: The OID of the merge proposal target commit.
15 :param sources: The OIDs of the merge proposal source commits.
16+ :param previous_target: If not None, stop walking back through
17+ history upon reaching this commit.
18 :param logger: An optional logger.
19 :return: A dict mapping merged commit OIDs from 'sources' to the
20 first commit OID in the left-hand (first parent only) history of
21diff --git a/lib/lp/code/interfaces/gitrepository.py b/lib/lp/code/interfaces/gitrepository.py
22index 4f57972..354d2df 100644
23--- a/lib/lp/code/interfaces/gitrepository.py
24+++ b/lib/lp/code/interfaces/gitrepository.py
25@@ -826,12 +826,15 @@ class IGitRepositoryView(IHasRecipes):
26 based on one of these paths will be marked as stale.
27 """
28
29- def detectMerges(paths, logger=None):
30+ def detectMerges(paths, previous_targets, logger=None):
31 """Detect merges of landing candidates.
32
33 :param paths: A list of reference paths. Any merge proposals whose
34 target is this repository and one of these paths will be
35 checked.
36+ :param previous_targets: A dictionary mapping merge proposal IDs to
37+ their previous target commit IDs, before the current ref scan
38+ job updated them.
39 :param logger: An optional logger.
40 """
41
42diff --git a/lib/lp/code/model/githosting.py b/lib/lp/code/model/githosting.py
43index 0679bb0..5ed215a 100644
44--- a/lib/lp/code/model/githosting.py
45+++ b/lib/lp/code/model/githosting.py
46@@ -261,18 +261,28 @@ class GitHostingClient:
47 "Failed to get merge diff from Git repository: %s" % str(e)
48 )
49
50- def detectMerges(self, path, target, sources, logger=None):
51+ def detectMerges(
52+ self, path, target, sources, previous_target=None, logger=None
53+ ):
54 """See `IGitHostingClient`."""
55 sources = list(sources)
56+ stop = [] if previous_target is None else [previous_target]
57 try:
58 if logger is not None:
59 logger.info(
60- "Detecting merges for %s from %s to %s"
61- % (path, sources, target)
62+ "Detecting merges for %s from %s to %s%s"
63+ % (
64+ path,
65+ sources,
66+ target,
67+ ""
68+ if previous_target is None
69+ else " (stopping at %s)" % previous_target,
70+ )
71 )
72 return self._post(
73 "/repo/%s/detect-merges/%s" % (path, quote(target)),
74- json={"sources": sources},
75+ json={"sources": sources, "stop": stop},
76 )
77 except requests.RequestException as e:
78 raise GitRepositoryScanFault(
79diff --git a/lib/lp/code/model/gitrepository.py b/lib/lp/code/model/gitrepository.py
80index a91df2e..85240eb 100644
81--- a/lib/lp/code/model/gitrepository.py
82+++ b/lib/lp/code/model/gitrepository.py
83@@ -1550,7 +1550,7 @@ class GitRepository(
84 with BranchMergeProposalNoPreviewDiffDelta.monitor(proposal):
85 proposal.markAsMerged(merged_revision_id=merged_revision_id)
86
87- def detectMerges(self, paths, logger=None):
88+ def detectMerges(self, paths, previous_targets, logger=None):
89 """See `IGitRepository`."""
90 hosting_client = getUtility(IGitHostingClient)
91 all_proposals = self.getActiveLandingCandidates(paths).order_by(
92@@ -1562,6 +1562,7 @@ class GitRepository(
93 self.getInternalPath(),
94 proposals[0].target_git_commit_sha1,
95 {proposal.source_git_commit_sha1 for proposal in proposals},
96+ previous_target=previous_targets.get(proposals[0].id),
97 )
98 for proposal in proposals:
99 merged_revision_id = merges.get(
100diff --git a/lib/lp/code/model/tests/test_githosting.py b/lib/lp/code/model/tests/test_githosting.py
101index af50950..0f5cf62 100644
102--- a/lib/lp/code/model/tests/test_githosting.py
103+++ b/lib/lp/code/model/tests/test_githosting.py
104@@ -443,7 +443,19 @@ class TestGitHostingClient(TestCase):
105 self.assertRequest(
106 "repo/123/detect-merges/a",
107 method="POST",
108- json_data={"sources": ["b", "c"]},
109+ json_data={"sources": ["b", "c"], "stop": []},
110+ )
111+
112+ def test_detectMerges_previous_target(self):
113+ with self.mockRequests("POST", json={"b": "0"}):
114+ merges = self.client.detectMerges(
115+ "123", "a", ["b", "c"], previous_target="d"
116+ )
117+ self.assertEqual({"b": "0"}, merges)
118+ self.assertRequest(
119+ "repo/123/detect-merges/a",
120+ method="POST",
121+ json_data={"sources": ["b", "c"], "stop": ["d"]},
122 )
123
124 def test_detectMerges_failure(self):
125diff --git a/lib/lp/code/model/tests/test_gitrepository.py b/lib/lp/code/model/tests/test_gitrepository.py
126index 6f05eb7..29e8d59 100644
127--- a/lib/lp/code/model/tests/test_gitrepository.py
128+++ b/lib/lp/code/model/tests/test_gitrepository.py
129@@ -3937,6 +3937,9 @@ class TestGitRepositoryDetectMerges(TestCaseWithFactory):
130 bmp3 = self.factory.makeBranchMergeProposalForGit(
131 target_ref=target_2, source_ref=source_1
132 )
133+ previous_targets = {
134+ bmp.id: bmp.target_git_commit_sha1 for bmp in (bmp1, bmp2, bmp3)
135+ }
136 hosting_fixture = self.useFixture(
137 GitHostingFixture(merges={source_1.commit_sha1: "0" * 40})
138 )
139@@ -3958,20 +3961,26 @@ class TestGitRepositoryDetectMerges(TestCaseWithFactory):
140 _, events = self.assertNotifies(
141 expected_events, True, repository.createOrUpdateRefs, refs_info
142 )
143- expected_args = [
144+ expected_calls = [
145 (
146- repository.getInternalPath(),
147- target_1.commit_sha1,
148- {source_1.commit_sha1, source_2.commit_sha1},
149+ (
150+ repository.getInternalPath(),
151+ target_1.commit_sha1,
152+ {source_1.commit_sha1, source_2.commit_sha1},
153+ ),
154+ {"previous_target": previous_targets[bmp1.id]},
155 ),
156 (
157- repository.getInternalPath(),
158- target_2.commit_sha1,
159- {source_1.commit_sha1},
160+ (
161+ repository.getInternalPath(),
162+ target_2.commit_sha1,
163+ {source_1.commit_sha1},
164+ ),
165+ {"previous_target": previous_targets[bmp3.id]},
166 ),
167 ]
168 self.assertContentEqual(
169- expected_args, hosting_fixture.detectMerges.extract_args()
170+ expected_calls, hosting_fixture.detectMerges.calls
171 )
172 self.assertEqual(BranchMergeProposalStatus.MERGED, bmp1.queue_status)
173 self.assertEqual("0" * 40, bmp1.merged_revision_id)
174diff --git a/lib/lp/code/subscribers/git.py b/lib/lp/code/subscribers/git.py
175index 99c4643..8fc11fb 100644
176--- a/lib/lp/code/subscribers/git.py
177+++ b/lib/lp/code/subscribers/git.py
178@@ -21,10 +21,17 @@ def refs_created(repository, event):
179
180 def refs_updated(repository, event):
181 """Some references in a Git repository have been updated."""
182+ # Remember the previous target commit IDs so that detectMerges can know
183+ # how far back in history it needs to search.
184+ previous_targets = {
185+ bmp.id: bmp.target_git_commit_sha1
186+ for bmp in repository.getActiveLandingCandidates(event.paths)
187+ }
188+
189 repository.updateMergeCommitIDs(event.paths)
190 repository.updateLandingTargets(event.paths)
191 repository.markRecipesStale(event.paths)
192 repository.markSnapsStale(event.paths)
193 repository.markCharmRecipesStale(event.paths)
194- repository.detectMerges(event.paths, logger=event.logger)
195+ repository.detectMerges(event.paths, previous_targets, logger=event.logger)
196 _request_ci_builds(repository, event)

Subscribers

People subscribed via source and target branches

to status/vote changes: