Merge ~cjwatson/launchpad:handle-invalid-git-commit-addresses into launchpad:master

Proposed by Colin Watson
Status: Merged
Approved by: Colin Watson
Approved revision: a5ffc7bea959e535f741d8e0ea2cad38dc1ecd0c
Merge reported by: Otto Co-Pilot
Merged at revision: not available
Proposed branch: ~cjwatson/launchpad:handle-invalid-git-commit-addresses
Merge into: launchpad:master
Diff against target: 274 lines (+184/-19)
5 files modified
lib/lp/code/browser/tests/test_gitref.py (+28/-0)
lib/lp/code/model/gitrepository.py (+22/-8)
lib/lp/code/model/tests/test_gitrepository.py (+119/-0)
lib/lp/code/templates/git-macros.pt (+10/-8)
lib/lp/code/templates/gitref-listing.pt (+5/-3)
Reviewer Review Type Date Requested Status
Jürgen Gmach Approve
Review via email: mp+414314@code.launchpad.net

Commit message

Tolerate git commits with invalid author/committer emails

Description of the change

`email.utils.formataddr` requires email addresses to be ASCII, per the email RFCs, and raises `UnicodeEncodeError` if they aren't. However, by the time a commit gets this far, we need to do something with it (it may be an import from a repository hosted somewhere else, in which case we wouldn't even theoretically be able to reject the bad metadata on push). Leave the author or committer unset in this case and ensure that we can still render the commit properly when we need to.

To post a comment you must log in.
Revision history for this message
Jürgen Gmach (jugmac00) wrote :

LGTM

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1diff --git a/lib/lp/code/browser/tests/test_gitref.py b/lib/lp/code/browser/tests/test_gitref.py
2index 75b5012..8af99ca 100644
3--- a/lib/lp/code/browser/tests/test_gitref.py
4+++ b/lib/lp/code/browser/tests/test_gitref.py
5@@ -630,6 +630,34 @@ class TestGitRefView(BrowserTestCase):
6 self.assertThat(
7 contents, Not(soupmatchers.HTMLContains(MissingCommitsNote())))
8
9+ def test_recent_commits_with_invalid_author_email(self):
10+ # If an author's email address is syntactically invalid, then we
11+ # ignore the authorship information for that commit and do our best
12+ # to render what we have.
13+ [ref] = self.factory.makeGitRefs(paths=["refs/heads/branch"])
14+ log = self.makeCommitLog()
15+ log[4]["author"]["email"] = "“%s”" % log[4]["author"]["email"]
16+ self.hosting_fixture.getLog.result = list(reversed(log))
17+ self.scanRef(ref, log[-1])
18+ view = create_initialized_view(ref, "+index")
19+ contents = view()
20+ expected_texts = ["%.7s...\non 2015-01-05" % log[4]["sha1"]]
21+ expected_texts.extend(reversed([
22+ "%.7s...\nby\n%s\non 2015-01-%02d" % (
23+ log[i]["sha1"], log[i]["author"]["name"], i + 1)
24+ for i in range(4)]))
25+ details = find_tags_by_class(contents, "commit-details")
26+ self.assertEqual(
27+ expected_texts, [extract_text(detail) for detail in details])
28+ expected_urls = list(reversed([
29+ "https://git.launchpad.test/%s/commit/?id=%s" % (
30+ ref.repository.shortened_path, log[i]["sha1"])
31+ for i in range(5)]))
32+ self.assertEqual(
33+ expected_urls, [detail.a["href"] for detail in details])
34+ self.assertThat(
35+ contents, Not(soupmatchers.HTMLContains(MissingCommitsNote())))
36+
37 def test_show_merge_link_for_personal_repo(self):
38 person = self.factory.makePerson()
39 repo = self.factory.makeGitRepository(
40diff --git a/lib/lp/code/model/gitrepository.py b/lib/lp/code/model/gitrepository.py
41index fca7dfa..8a3b08c 100644
42--- a/lib/lp/code/model/gitrepository.py
43+++ b/lib/lp/code/model/gitrepository.py
44@@ -258,20 +258,34 @@ def parse_git_commits(commits):
45 info["author_date"] = datetime.fromtimestamp(
46 author["time"], tz=pytz.UTC)
47 if "name" in author and "email" in author:
48- author_addr = email.utils.formataddr(
49- (author["name"], author["email"]))
50- info["author_addr"] = author_addr
51- authors_to_acquire.append(author_addr)
52+ try:
53+ author_addr = email.utils.formataddr(
54+ (author["name"], author["email"]))
55+ except UnicodeEncodeError:
56+ # Addresses must be ASCII; formataddr raises
57+ # UnicodeEncodeError if they aren't. Just skip the
58+ # author in that case.
59+ pass
60+ else:
61+ info["author_addr"] = author_addr
62+ authors_to_acquire.append(author_addr)
63 committer = commit.get("committer")
64 if committer is not None:
65 if "time" in committer:
66 info["committer_date"] = datetime.fromtimestamp(
67 committer["time"], tz=pytz.UTC)
68 if "name" in committer and "email" in committer:
69- committer_addr = email.utils.formataddr(
70- (committer["name"], committer["email"]))
71- info["committer_addr"] = committer_addr
72- committers_to_acquire.append(committer_addr)
73+ try:
74+ committer_addr = email.utils.formataddr(
75+ (committer["name"], committer["email"]))
76+ except UnicodeEncodeError:
77+ # Addresses must be ASCII; formataddr raises
78+ # UnicodeEncodeError if they aren't. Just skip the
79+ # committer in that case.
80+ pass
81+ else:
82+ info["committer_addr"] = committer_addr
83+ committers_to_acquire.append(committer_addr)
84 if "message" in commit:
85 info["commit_message"] = commit["message"]
86 parsed[commit["sha1"]] = info
87diff --git a/lib/lp/code/model/tests/test_gitrepository.py b/lib/lp/code/model/tests/test_gitrepository.py
88index f78bf5d..39a46d5 100644
89--- a/lib/lp/code/model/tests/test_gitrepository.py
90+++ b/lib/lp/code/model/tests/test_gitrepository.py
91@@ -126,6 +126,7 @@ from lp.code.model.gitrepository import (
92 DeletionCallable,
93 DeletionOperation,
94 GitRepository,
95+ parse_git_commits,
96 REVISION_STATUS_REPORT_ALLOW_CREATE,
97 )
98 from lp.code.tests.helpers import GitHostingFixture
99@@ -201,6 +202,124 @@ from lp.xmlrpc import faults
100 from lp.xmlrpc.interfaces import IPrivateApplication
101
102
103+class TestParseGitCommits(TestCaseWithFactory):
104+
105+ layer = ZopelessDatabaseLayer
106+
107+ def test_valid(self):
108+ master_sha1 = hashlib.sha1(b"refs/heads/master").hexdigest()
109+ author = self.factory.makePerson()
110+ with person_logged_in(author):
111+ author_email = author.preferredemail.email
112+ author_date = datetime(2015, 1, 1, tzinfo=pytz.UTC)
113+ committer_date = datetime(2015, 1, 2, tzinfo=pytz.UTC)
114+ commits = [
115+ {
116+ "sha1": master_sha1,
117+ "message": "tip of master",
118+ "author": {
119+ "name": author.displayname,
120+ "email": author_email,
121+ "time": int(seconds_since_epoch(author_date)),
122+ },
123+ "committer": {
124+ "name": "New Person",
125+ "email": "new-person@example.org",
126+ "time": int(seconds_since_epoch(committer_date)),
127+ },
128+ },
129+ ]
130+ parsed_commits = parse_git_commits(commits)
131+ expected_author_addr = "%s <%s>" % (author.displayname, author_email)
132+ [expected_author] = getUtility(IRevisionSet).acquireRevisionAuthors(
133+ [expected_author_addr]).values()
134+ expected_committer_addr = "New Person <new-person@example.org>"
135+ [expected_committer] = getUtility(IRevisionSet).acquireRevisionAuthors(
136+ ["New Person <new-person@example.org>"]).values()
137+ self.assertEqual(
138+ {
139+ master_sha1: {
140+ "sha1": master_sha1,
141+ "author": expected_author,
142+ "author_addr": expected_author_addr,
143+ "author_date": author_date,
144+ "committer": expected_committer,
145+ "committer_addr": expected_committer_addr,
146+ "committer_date": committer_date,
147+ "commit_message": "tip of master",
148+ },
149+ },
150+ parsed_commits)
151+
152+ def test_invalid_author_address(self):
153+ master_sha1 = hashlib.sha1(b"refs/heads/master").hexdigest()
154+ author_date = datetime(2022, 1, 1, tzinfo=pytz.UTC)
155+ commits = [
156+ {
157+ "sha1": master_sha1,
158+ "author": {
159+ "name": "New Person",
160+ "email": "“accidental-quotes@example.org”",
161+ "time": int(seconds_since_epoch(author_date)),
162+ },
163+ "committer": {
164+ "name": "New Person",
165+ "email": "accidental-quotes@example.org",
166+ "time": int(seconds_since_epoch(author_date)),
167+ },
168+ }
169+ ]
170+ parsed_commits = parse_git_commits(commits)
171+ [expected_author] = getUtility(IRevisionSet).acquireRevisionAuthors(
172+ ["New Person <accidental-quotes@example.org>"]).values()
173+ self.assertEqual(
174+ {
175+ master_sha1: {
176+ "sha1": master_sha1,
177+ "author_date": author_date,
178+ "committer": expected_author,
179+ "committer_addr": (
180+ "New Person <accidental-quotes@example.org>"),
181+ "committer_date": author_date,
182+ },
183+ },
184+ parsed_commits)
185+
186+ def test_invalid_committer_address(self):
187+ master_sha1 = hashlib.sha1(b"refs/heads/master").hexdigest()
188+ author_date = datetime(2022, 1, 1, tzinfo=pytz.UTC)
189+ commits = [
190+ {
191+ "sha1": master_sha1,
192+ "author": {
193+ "name": "New Person",
194+ "email": "accidental-quotes@example.org",
195+ "time": int(seconds_since_epoch(author_date)),
196+ },
197+ "committer": {
198+ "name": "New Person",
199+ "email": "“accidental-quotes@example.org”",
200+ "time": int(seconds_since_epoch(author_date)),
201+ },
202+ }
203+ ]
204+ parsed_commits = parse_git_commits(commits)
205+ [expected_author] = getUtility(IRevisionSet).acquireRevisionAuthors(
206+ ["New Person <accidental-quotes@example.org>"]).values()
207+ self.assertEqual(
208+ {
209+ master_sha1: {
210+ "sha1": master_sha1,
211+ "author": expected_author,
212+ "author_addr": (
213+ "New Person <accidental-quotes@example.org>"),
214+ "author_date": author_date,
215+ "committer_date": author_date,
216+ },
217+ },
218+ parsed_commits)
219+
220+
221 class TestGitRepository(TestCaseWithFactory):
222 """Test basic properties about Launchpad database Git repositories."""
223
224diff --git a/lib/lp/code/templates/git-macros.pt b/lib/lp/code/templates/git-macros.pt
225index 8679590..a990c18 100644
226--- a/lib/lp/code/templates/git-macros.pt
227+++ b/lib/lp/code/templates/git-macros.pt
228@@ -200,17 +200,19 @@
229 <dt class="commit-details"
230 tal:define="
231 sha1 python:commit_info['sha1'];
232- author python:commit_info['author'];
233+ author python:commit_info.get('author');
234 author_date python:commit_info['author_date']">
235 <a tal:attributes="href python: ref.getCodebrowseUrlForRevision(sha1)"
236 tal:content="sha1/fmt:shorten/10" />
237- by
238- <tal:known-person condition="author/person">
239- <tal:person-link replace="structure author/person/fmt:link" />
240- </tal:known-person>
241- <tal:unknown-person condition="not: author/person">
242- <strong tal:content="author/name/fmt:obfuscate-email" />
243- </tal:unknown-person>
244+ <tal:has-author condition="author">
245+ by
246+ <tal:known-person condition="author/person">
247+ <tal:person-link replace="structure author/person/fmt:link" />
248+ </tal:known-person>
249+ <tal:unknown-person condition="not: author/person">
250+ <strong tal:content="author/name/fmt:obfuscate-email" />
251+ </tal:unknown-person>
252+ </tal:has-author>
253 <tal:author-date replace="structure author_date/fmt:displaydatetitle" />
254 </dt>
255 <dd class="subordinate commit-message"
256diff --git a/lib/lp/code/templates/gitref-listing.pt b/lib/lp/code/templates/gitref-listing.pt
257index d9f3260..4eaa8e9 100644
258--- a/lib/lp/code/templates/gitref-listing.pt
259+++ b/lib/lp/code/templates/gitref-listing.pt
260@@ -59,9 +59,11 @@ function hide_git_commit(id) {
261 tal:attributes="id string:git-ref-${repeat/ref/index};
262 onmouseover string:hide_commit(${repeat/ref/index});">
263 <p>
264- <strong>Author:</strong>
265- <tal:author replace="structure ref/author/fmt:link" />
266- <br />
267+ <tal:has-author condition="ref/author">
268+ <strong>Author:</strong>
269+ <tal:author replace="structure ref/author/fmt:link" />
270+ <br />
271+ </tal:has-author>
272 <strong>Author Date:</strong>
273 <tal:author replace="structure ref/author_date/fmt:datetime" />
274 </p>

Subscribers

People subscribed via source and target branches

to status/vote changes: