Merge lp:~cjwatson/launchpad/optimise-git-ref-scan into lp:launchpad

Proposed by Colin Watson
Status: Merged
Merged at revision: 18825
Proposed branch: lp:~cjwatson/launchpad/optimise-git-ref-scan
Merge into: lp:launchpad
Diff against target: 86 lines (+51/-9)
2 files modified
lib/lp/code/model/gitrepository.py (+16/-9)
lib/lp/code/model/tests/test_gitrepository.py (+35/-0)
To merge this branch: bzr merge lp:~cjwatson/launchpad/optimise-git-ref-scan
Reviewer Review Type Date Requested Status
William Grant code Approve
Review via email: mp+359171@code.launchpad.net

Commit message

Reduce the number of GitRef columns fetched by ref scan jobs.

Description of the change

GitRef rows can be pretty wide due to commit_message, and we don't need the whole thing here.

I'm sure there's more to do here, since there are repositories with more than 100000 refs, but this may help with memory exhaustion issues.

To post a comment you must log in.
Revision history for this message
William Grant (wgrant) :
review: Approve (code)

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'lib/lp/code/model/gitrepository.py'
2--- lib/lp/code/model/gitrepository.py 2018-11-09 22:06:43 +0000
3+++ lib/lp/code/model/gitrepository.py 2018-11-22 07:29:19 +0000
4@@ -679,20 +679,27 @@
5 if logger is not None:
6 logger.warning(
7 "Unconvertible ref %s %s: %s" % (path, info, e))
8- current_refs = {ref.path: ref for ref in self.refs}
9+ # GitRef rows can be large (especially commit_message), and we don't
10+ # need the whole thing.
11+ current_refs = {
12+ ref[0]: ref[1:]
13+ for ref in Store.of(self).find(
14+ (GitRef.path, GitRef.commit_sha1, GitRef.object_type,
15+ And(
16+ GitRef.author_id != None,
17+ GitRef.author_date != None,
18+ GitRef.committer_id != None,
19+ GitRef.committer_date != None,
20+ GitRef.commit_message != None)),
21+ GitRef.repository_id == self.id)}
22 refs_to_upsert = {}
23 for path, info in new_refs.items():
24 current_ref = current_refs.get(path)
25 if (current_ref is None or
26- info["sha1"] != current_ref.commit_sha1 or
27- info["type"] != current_ref.object_type):
28+ info["sha1"] != current_ref[0] or
29+ info["type"] != current_ref[1]):
30 refs_to_upsert[path] = info
31- elif (info["type"] == GitObjectType.COMMIT and
32- (current_ref.author_id is None or
33- current_ref.author_date is None or
34- current_ref.committer_id is None or
35- current_ref.committer_date is None or
36- current_ref.commit_message is None)):
37+ elif info["type"] == GitObjectType.COMMIT and not current_ref[2]:
38 # Only request detailed commit metadata for refs that point
39 # to commits.
40 refs_to_upsert[path] = info
41
42=== modified file 'lib/lp/code/model/tests/test_gitrepository.py'
43--- lib/lp/code/model/tests/test_gitrepository.py 2018-11-09 22:06:43 +0000
44+++ lib/lp/code/model/tests/test_gitrepository.py 2018-11-22 07:29:19 +0000
45@@ -1453,6 +1453,41 @@
46 }))
47 self.assertEqual(({}, set()), repository.planRefChanges("dummy"))
48
49+ def test_planRefChanges_includes_unfetched_commits(self):
50+ # planRefChanges plans updates to refs pointing to commits for which
51+ # we haven't yet fetched detailed metadata.
52+ repository = self.factory.makeGitRepository()
53+ paths = ("refs/heads/master", "refs/heads/foo")
54+ self.factory.makeGitRefs(repository=repository, paths=paths)
55+ author_addr = (
56+ removeSecurityProxy(repository.owner).preferredemail.email)
57+ [author] = getUtility(IRevisionSet).acquireRevisionAuthors(
58+ [author_addr]).values()
59+ naked_master = removeSecurityProxy(
60+ repository.getRefByPath("refs/heads/master"))
61+ naked_master.author_id = naked_master.committer_id = author.id
62+ naked_master.author_date = naked_master.committer_date = (
63+ datetime.now(pytz.UTC))
64+ naked_master.commit_message = "message"
65+ self.useFixture(GitHostingFixture(refs={
66+ path: {
67+ "object": {
68+ "sha1": repository.getRefByPath(path).commit_sha1,
69+ "type": "commit",
70+ },
71+ }
72+ for path in paths}))
73+ refs_to_upsert, refs_to_remove = repository.planRefChanges("dummy")
74+
75+ expected_upsert = {
76+ "refs/heads/foo": {
77+ "sha1": repository.getRefByPath("refs/heads/foo").commit_sha1,
78+ "type": GitObjectType.COMMIT,
79+ },
80+ }
81+ self.assertEqual(expected_upsert, refs_to_upsert)
82+ self.assertEqual(set(), refs_to_remove)
83+
84 def test_fetchRefCommits(self):
85 # fetchRefCommits fetches detailed tip commit metadata for the
86 # requested refs.