Merge ~cjwatson/launchpad:handle-invalid-git-commit-addresses into launchpad:master

Proposed by Colin Watson
Status: Merged
Approved by: Colin Watson
Approved revision: a5ffc7bea959e535f741d8e0ea2cad38dc1ecd0c
Merge reported by: Otto Co-Pilot
Merged at revision: not available
Proposed branch: ~cjwatson/launchpad:handle-invalid-git-commit-addresses
Merge into: launchpad:master
Diff against target: 274 lines (+184/-19)
5 files modified
lib/lp/code/browser/tests/test_gitref.py (+28/-0)
lib/lp/code/model/gitrepository.py (+22/-8)
lib/lp/code/model/tests/test_gitrepository.py (+119/-0)
lib/lp/code/templates/git-macros.pt (+10/-8)
lib/lp/code/templates/gitref-listing.pt (+5/-3)
Reviewer Review Type Date Requested Status
Jürgen Gmach Approve
Review via email: mp+414314@code.launchpad.net

Commit message

Tolerate git commits with invalid author/committer emails

Description of the change

`email.utils.formataddr` requires email addresses to be ASCII, per the email RFCs, and raises `UnicodeEncodeError` if they aren't. However, by the time a commit gets this far, we need to do something with it (it may be an import from a repository hosted somewhere else, in which case we wouldn't even theoretically be able to reject the bad metadata on push). Leave the author or committer unset in this case and ensure that we can still render the commit properly when we need to.

To post a comment you must log in.
Revision history for this message
Jürgen Gmach (jugmac00) wrote :

LGTM

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
diff --git a/lib/lp/code/browser/tests/test_gitref.py b/lib/lp/code/browser/tests/test_gitref.py
index 75b5012..8af99ca 100644
--- a/lib/lp/code/browser/tests/test_gitref.py
+++ b/lib/lp/code/browser/tests/test_gitref.py
@@ -630,6 +630,34 @@ class TestGitRefView(BrowserTestCase):
630 self.assertThat(630 self.assertThat(
631 contents, Not(soupmatchers.HTMLContains(MissingCommitsNote())))631 contents, Not(soupmatchers.HTMLContains(MissingCommitsNote())))
632632
633 def test_recent_commits_with_invalid_author_email(self):
634 # If an author's email address is syntactically invalid, then we
635 # ignore the authorship information for that commit and do our best
636 # to render what we have.
637 [ref] = self.factory.makeGitRefs(paths=["refs/heads/branch"])
638 log = self.makeCommitLog()
639 log[4]["author"]["email"] = "“%s”" % log[4]["author"]["email"]
640 self.hosting_fixture.getLog.result = list(reversed(log))
641 self.scanRef(ref, log[-1])
642 view = create_initialized_view(ref, "+index")
643 contents = view()
644 expected_texts = ["%.7s...\non 2015-01-05" % log[4]["sha1"]]
645 expected_texts.extend(reversed([
646 "%.7s...\nby\n%s\non 2015-01-%02d" % (
647 log[i]["sha1"], log[i]["author"]["name"], i + 1)
648 for i in range(4)]))
649 details = find_tags_by_class(contents, "commit-details")
650 self.assertEqual(
651 expected_texts, [extract_text(detail) for detail in details])
652 expected_urls = list(reversed([
653 "https://git.launchpad.test/%s/commit/?id=%s" % (
654 ref.repository.shortened_path, log[i]["sha1"])
655 for i in range(5)]))
656 self.assertEqual(
657 expected_urls, [detail.a["href"] for detail in details])
658 self.assertThat(
659 contents, Not(soupmatchers.HTMLContains(MissingCommitsNote())))
660
633 def test_show_merge_link_for_personal_repo(self):661 def test_show_merge_link_for_personal_repo(self):
634 person = self.factory.makePerson()662 person = self.factory.makePerson()
635 repo = self.factory.makeGitRepository(663 repo = self.factory.makeGitRepository(
diff --git a/lib/lp/code/model/gitrepository.py b/lib/lp/code/model/gitrepository.py
index fca7dfa..8a3b08c 100644
--- a/lib/lp/code/model/gitrepository.py
+++ b/lib/lp/code/model/gitrepository.py
@@ -258,20 +258,34 @@ def parse_git_commits(commits):
258 info["author_date"] = datetime.fromtimestamp(258 info["author_date"] = datetime.fromtimestamp(
259 author["time"], tz=pytz.UTC)259 author["time"], tz=pytz.UTC)
260 if "name" in author and "email" in author:260 if "name" in author and "email" in author:
261 author_addr = email.utils.formataddr(261 try:
262 (author["name"], author["email"]))262 author_addr = email.utils.formataddr(
263 info["author_addr"] = author_addr263 (author["name"], author["email"]))
264 authors_to_acquire.append(author_addr)264 except UnicodeEncodeError:
265 # Addresses must be ASCII; formataddr raises
266 # UnicodeEncodeError if they aren't. Just skip the
267 # author in that case.
268 pass
269 else:
270 info["author_addr"] = author_addr
271 authors_to_acquire.append(author_addr)
265 committer = commit.get("committer")272 committer = commit.get("committer")
266 if committer is not None:273 if committer is not None:
267 if "time" in committer:274 if "time" in committer:
268 info["committer_date"] = datetime.fromtimestamp(275 info["committer_date"] = datetime.fromtimestamp(
269 committer["time"], tz=pytz.UTC)276 committer["time"], tz=pytz.UTC)
270 if "name" in committer and "email" in committer:277 if "name" in committer and "email" in committer:
271 committer_addr = email.utils.formataddr(278 try:
272 (committer["name"], committer["email"]))279 committer_addr = email.utils.formataddr(
273 info["committer_addr"] = committer_addr280 (committer["name"], committer["email"]))
274 committers_to_acquire.append(committer_addr)281 except UnicodeEncodeError:
282 # Addresses must be ASCII; formataddr raises
283 # UnicodeEncodeError if they aren't. Just skip the
284 # committer in that case.
285 pass
286 else:
287 info["committer_addr"] = committer_addr
288 committers_to_acquire.append(committer_addr)
275 if "message" in commit:289 if "message" in commit:
276 info["commit_message"] = commit["message"]290 info["commit_message"] = commit["message"]
277 parsed[commit["sha1"]] = info291 parsed[commit["sha1"]] = info
diff --git a/lib/lp/code/model/tests/test_gitrepository.py b/lib/lp/code/model/tests/test_gitrepository.py
index f78bf5d..39a46d5 100644
--- a/lib/lp/code/model/tests/test_gitrepository.py
+++ b/lib/lp/code/model/tests/test_gitrepository.py
@@ -126,6 +126,7 @@ from lp.code.model.gitrepository import (
126 DeletionCallable,126 DeletionCallable,
127 DeletionOperation,127 DeletionOperation,
128 GitRepository,128 GitRepository,
129 parse_git_commits,
129 REVISION_STATUS_REPORT_ALLOW_CREATE,130 REVISION_STATUS_REPORT_ALLOW_CREATE,
130 )131 )
131from lp.code.tests.helpers import GitHostingFixture132from lp.code.tests.helpers import GitHostingFixture
@@ -201,6 +202,124 @@ from lp.xmlrpc import faults
201from lp.xmlrpc.interfaces import IPrivateApplication202from lp.xmlrpc.interfaces import IPrivateApplication
202203
203204
205class TestParseGitCommits(TestCaseWithFactory):
206
207 layer = ZopelessDatabaseLayer
208
209 def test_valid(self):
210 master_sha1 = hashlib.sha1(b"refs/heads/master").hexdigest()
211 author = self.factory.makePerson()
212 with person_logged_in(author):
213 author_email = author.preferredemail.email
214 author_date = datetime(2015, 1, 1, tzinfo=pytz.UTC)
215 committer_date = datetime(2015, 1, 2, tzinfo=pytz.UTC)
216 commits = [
217 {
218 "sha1": master_sha1,
219 "message": "tip of master",
220 "author": {
221 "name": author.displayname,
222 "email": author_email,
223 "time": int(seconds_since_epoch(author_date)),
224 },
225 "committer": {
226 "name": "New Person",
227 "email": "new-person@example.org",
228 "time": int(seconds_since_epoch(committer_date)),
229 },
230 },
231 ]
232 parsed_commits = parse_git_commits(commits)
233 expected_author_addr = "%s <%s>" % (author.displayname, author_email)
234 [expected_author] = getUtility(IRevisionSet).acquireRevisionAuthors(
235 [expected_author_addr]).values()
236 expected_committer_addr = "New Person <new-person@example.org>"
237 [expected_committer] = getUtility(IRevisionSet).acquireRevisionAuthors(
238 ["New Person <new-person@example.org>"]).values()
239 self.assertEqual(
240 {
241 master_sha1: {
242 "sha1": master_sha1,
243 "author": expected_author,
244 "author_addr": expected_author_addr,
245 "author_date": author_date,
246 "committer": expected_committer,
247 "committer_addr": expected_committer_addr,
248 "committer_date": committer_date,
249 "commit_message": "tip of master",
250 },
251 },
252 parsed_commits)
253
254 def test_invalid_author_address(self):
255 master_sha1 = hashlib.sha1(b"refs/heads/master").hexdigest()
256 author_date = datetime(2022, 1, 1, tzinfo=pytz.UTC)
257 commits = [
258 {
259 "sha1": master_sha1,
260 "author": {
261 "name": "New Person",
262 "email": "“accidental-quotes@example.org”",
263 "time": int(seconds_since_epoch(author_date)),
264 },
265 "committer": {
266 "name": "New Person",
267 "email": "accidental-quotes@example.org",
268 "time": int(seconds_since_epoch(author_date)),
269 },
270 }
271 ]
272 parsed_commits = parse_git_commits(commits)
273 [expected_author] = getUtility(IRevisionSet).acquireRevisionAuthors(
274 ["New Person <accidental-quotes@example.org>"]).values()
275 self.assertEqual(
276 {
277 master_sha1: {
278 "sha1": master_sha1,
279 "author_date": author_date,
280 "committer": expected_author,
281 "committer_addr": (
282 "New Person <accidental-quotes@example.org>"),
283 "committer_date": author_date,
284 },
285 },
286 parsed_commits)
287
288 def test_invalid_committer_address(self):
289 master_sha1 = hashlib.sha1(b"refs/heads/master").hexdigest()
290 author_date = datetime(2022, 1, 1, tzinfo=pytz.UTC)
291 commits = [
292 {
293 "sha1": master_sha1,
294 "author": {
295 "name": "New Person",
296 "email": "accidental-quotes@example.org",
297 "time": int(seconds_since_epoch(author_date)),
298 },
299 "committer": {
300 "name": "New Person",
301 "email": "“accidental-quotes@example.org”",
302 "time": int(seconds_since_epoch(author_date)),
303 },
304 }
305 ]
306 parsed_commits = parse_git_commits(commits)
307 [expected_author] = getUtility(IRevisionSet).acquireRevisionAuthors(
308 ["New Person <accidental-quotes@example.org>"]).values()
309 self.assertEqual(
310 {
311 master_sha1: {
312 "sha1": master_sha1,
313 "author": expected_author,
314 "author_addr": (
315 "New Person <accidental-quotes@example.org>"),
316 "author_date": author_date,
317 "committer_date": author_date,
318 },
319 },
320 parsed_commits)
321
322
204class TestGitRepository(TestCaseWithFactory):323class TestGitRepository(TestCaseWithFactory):
205 """Test basic properties about Launchpad database Git repositories."""324 """Test basic properties about Launchpad database Git repositories."""
206325
diff --git a/lib/lp/code/templates/git-macros.pt b/lib/lp/code/templates/git-macros.pt
index 8679590..a990c18 100644
--- a/lib/lp/code/templates/git-macros.pt
+++ b/lib/lp/code/templates/git-macros.pt
@@ -200,17 +200,19 @@
200 <dt class="commit-details"200 <dt class="commit-details"
201 tal:define="201 tal:define="
202 sha1 python:commit_info['sha1'];202 sha1 python:commit_info['sha1'];
203 author python:commit_info['author'];203 author python:commit_info.get('author');
204 author_date python:commit_info['author_date']">204 author_date python:commit_info['author_date']">
205 <a tal:attributes="href python: ref.getCodebrowseUrlForRevision(sha1)"205 <a tal:attributes="href python: ref.getCodebrowseUrlForRevision(sha1)"
206 tal:content="sha1/fmt:shorten/10" />206 tal:content="sha1/fmt:shorten/10" />
207 by207 <tal:has-author condition="author">
208 <tal:known-person condition="author/person">208 by
209 <tal:person-link replace="structure author/person/fmt:link" />209 <tal:known-person condition="author/person">
210 </tal:known-person>210 <tal:person-link replace="structure author/person/fmt:link" />
211 <tal:unknown-person condition="not: author/person">211 </tal:known-person>
212 <strong tal:content="author/name/fmt:obfuscate-email" />212 <tal:unknown-person condition="not: author/person">
213 </tal:unknown-person>213 <strong tal:content="author/name/fmt:obfuscate-email" />
214 </tal:unknown-person>
215 </tal:has-author>
214 <tal:author-date replace="structure author_date/fmt:displaydatetitle" />216 <tal:author-date replace="structure author_date/fmt:displaydatetitle" />
215 </dt>217 </dt>
216 <dd class="subordinate commit-message"218 <dd class="subordinate commit-message"
diff --git a/lib/lp/code/templates/gitref-listing.pt b/lib/lp/code/templates/gitref-listing.pt
index d9f3260..4eaa8e9 100644
--- a/lib/lp/code/templates/gitref-listing.pt
+++ b/lib/lp/code/templates/gitref-listing.pt
@@ -59,9 +59,11 @@ function hide_git_commit(id) {
59 tal:attributes="id string:git-ref-${repeat/ref/index};59 tal:attributes="id string:git-ref-${repeat/ref/index};
60 onmouseover string:hide_commit(${repeat/ref/index});">60 onmouseover string:hide_commit(${repeat/ref/index});">
61 <p>61 <p>
62 <strong>Author:</strong>62 <tal:has-author condition="ref/author">
63 <tal:author replace="structure ref/author/fmt:link" />63 <strong>Author:</strong>
64 <br />64 <tal:author replace="structure ref/author/fmt:link" />
65 <br />
66 </tal:has-author>
65 <strong>Author Date:</strong>67 <strong>Author Date:</strong>
66 <tal:author replace="structure ref/author_date/fmt:datetime" />68 <tal:author replace="structure ref/author_date/fmt:datetime" />
67 </p>69 </p>

Subscribers

People subscribed via source and target branches

to status/vote changes: