Merge ~pfsmorigo/review-tools:check_link_improvements into review-tools:master

Proposed by Paulo Flabiano Smorigo
Status: Merged
Merged at revision: 80021aab0553f647060685ec62f862314af53766
Proposed branch: ~pfsmorigo/review-tools:check_link_improvements
Merge into: review-tools:master
Diff against target: 220 lines (+105/-34)
4 files modified
reviewtools/common.py (+0/-3)
reviewtools/sr_lint.py (+56/-10)
reviewtools/tests/test_sr_lint.py (+49/-18)
snapcraft.yaml (+0/-3)
Reviewer Review Type Date Requested Status
Alex Murray Approve
Review via email: mp+412055@code.launchpad.net

Description of the change

I used the ValidateLinks from snapd but had to make some changes. In snapd, the validation relies on an url parser that can check the syntax. For the review tools I'm using a regex for that.

To post a comment you must log in.
Revision history for this message
Alex Murray (alexmurray) wrote :

Thanks for this - I don't think we need LINK_TYPES anymore and I think you should just use the python standard library to parse / validate the URLs rather than a regex as that will be too fragile.

review: Needs Fixing
Revision history for this message
Paulo Flabiano Smorigo (pfsmorigo) wrote :

I just removed LINK_TYPES. For the regex, I'm willing to remove it and use a python library to validade but, since the system runs on xenial, it's hard to find a good library for it. I can't use python3-validators for example. There is urllib that can parse the url but don't validate it unless you use it for a query and check for the returning error. There is also the requests library but you also needs to do a request in order to check the result. About the regex, why too fragile? Maybe there would be cases that it will fails but than we can manually approve, right?

Revision history for this message
Alex Murray (alexmurray) wrote :

The snap store now uses focal not xenial and the review-tools snap is base: core18 so you should be able to use anything in bionic.

I personally don't want to have to manually approve every snap upload for a snap which uses a non-ascii URL or similar - I also don't think this is a good situation for such publishers either - so if we can just use an existing trusted library to parse + validate the URL then that seems like a better solution to me.

Revision history for this message
Alex Murray (alexmurray) wrote :

Let's merge this as is since it is very good - we can revisit it later if it turns out there are URLs which get caught by the regex.

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
diff --git a/reviewtools/common.py b/reviewtools/common.py
index ce47886..548b8d7 100644
--- a/reviewtools/common.py
+++ b/reviewtools/common.py
@@ -140,9 +140,6 @@ OS_RELEASE_MAP = {
140 }140 }
141}141}
142142
143# Link categories used in snap.yaml
144LINK_TYPES = ["website", "source-code", "contact", "donation", "issues"]
145
146143
147def cleanup_unpack():144def cleanup_unpack():
148 global UNPACK_DIR145 global UNPACK_DIR
diff --git a/reviewtools/sr_lint.py b/reviewtools/sr_lint.py
index 6cca8d9..819ef5a 100644
--- a/reviewtools/sr_lint.py
+++ b/reviewtools/sr_lint.py
@@ -20,7 +20,6 @@ from reviewtools.common import (
20 find_external_symlinks,20 find_external_symlinks,
21 STORE_PKGNAME_SNAPV2_MAXLEN,21 STORE_PKGNAME_SNAPV2_MAXLEN,
22 STORE_PKGNAME_SNAPV2_MINLEN,22 STORE_PKGNAME_SNAPV2_MINLEN,
23 LINK_TYPES,
24)23)
25from reviewtools.overrides import (24from reviewtools.overrides import (
26 redflagged_snap_types_overrides,25 redflagged_snap_types_overrides,
@@ -348,23 +347,70 @@ class SnapReviewLint(SnapReview):
348 if "links" not in self.snap_yaml:347 if "links" not in self.snap_yaml:
349 return348 return
350349
351 for val in self.snap_yaml["links"]:350 # Check links is partially based on:
352 if val not in LINK_TYPES:351 # https://github.com/snapcore/snapd/blob/master/snap/validate.go#L1132
352
353 valid_link = re.compile(r"^[a-zA-Z](?:-?[a-zA-Z0-9])*$")
354 valid_url = re.compile(
355 r"^http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+$"
356 )
357 valid_email = re.compile(
358 r"^(mailto:|)[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}$"
359 )
360
361 for linkskey in self.snap_yaml["links"]:
362 if linkskey.strip() == "":
353 t = "error"363 t = "error"
354 s = "'unknown field for links in snap.yaml: %s" % val364 s = "links key cannot be empty"
355 self._add_result(t, n, s)365 self._add_result(t, n, s)
356 return366 return
357367
358 content = self.snap_yaml["links"][val]368 if not valid_link.match(linkskey):
359 if not isinstance(content, str) and not (369 t = "error"
360 isinstance(content, list)370 s = "links key is invalid: %s" % linkskey
361 and all(isinstance(item, str) for item in content)371 self._add_result(t, n, s)
362 ):372 return
373
374 linksvalues = self.snap_yaml["links"][linkskey]
375
376 if not isinstance(linksvalues, list):
377 t = "error"
378 s = "%s is not a list of strings" % linkskey
379 self._add_result(t, n, s)
380 return
381
382 if len(linksvalues) == 0:
363 t = "error"383 t = "error"
364 s = "'invalid value for %s in snap.yaml" % val384 s = "%s links cannot be specified and empty" % linkskey
365 self._add_result(t, n, s)385 self._add_result(t, n, s)
366 return386 return
367387
388 for link in linksvalues:
389 if link == "":
390 t = "error"
391 s = "empty %s link" % linkskey
392 self._add_result(t, n, s)
393 return
394
395 if not isinstance(link, str):
396 t = "error"
397 s = "%s is not a string" % link
398 self._add_result(t, n, s)
399 return
400
401 if not (
402 linkskey == "contact"
403 and valid_email.match(link)
404 or valid_url.match(link)
405 ):
406 t = "error"
407 s = (
408 "%s link must have one of http|https schemes or it must be an email address: %s"
409 % (linkskey, link)
410 )
411 self._add_result(t, n, s, manual_review=True)
412 return
413
368 def check_unknown_entries(self):414 def check_unknown_entries(self):
369 """Check for any unknown fields"""415 """Check for any unknown fields"""
370 t = "info"416 t = "info"
diff --git a/reviewtools/tests/test_sr_lint.py b/reviewtools/tests/test_sr_lint.py
index 5d9aba7..27b7533 100644
--- a/reviewtools/tests/test_sr_lint.py
+++ b/reviewtools/tests/test_sr_lint.py
@@ -976,21 +976,16 @@ class TestSnapReviewLint(sr_tests.TestSnapReview):
976976
977 def test_check_link(self):977 def test_check_link(self):
978 """Test check_link"""978 """Test check_link"""
979 self.set_test_snap_yaml("links", [])
980 c = SnapReviewLint(self.test_name)
981 c.check_links()
982 r = c.review_report
983 expected_counts = {"info": None, "warn": 0, "error": 0}
984 self.check_results(r, expected_counts)
985
986 def test_check_link_valid(self):
987 """Test check_link_valid"""
988 links = {979 links = {
989 "website": "https://snapcraft.io",980 "website": ["https://snapcraft.io"],
990 "source-code": "https://github.com/snapcore/snapcraft",981 "source-code": ["https://github.com/snapcore/snapcraft"],
991 "contact": ["https://forum.snapcraft.io", "snapcraft@forum.snapcraft.io"],982 "contact": [
992 "donation": "http://donate.me",983 "https://forum.snapcraft.io",
993 "issues": "https://bugs.launchpad.net/snapcraft/+filebug",984 "mailto:snapcraft@forum.snapcraft.io",
985 "another@email.com",
986 ],
987 "donation": ["http://donate.me"],
988 "issues": ["https://bugs.launchpad.net/snapcraft/+filebug"],
994 }989 }
995 self.set_test_snap_yaml("links", links)990 self.set_test_snap_yaml("links", links)
996 c = SnapReviewLint(self.test_name)991 c = SnapReviewLint(self.test_name)
@@ -1001,15 +996,51 @@ class TestSnapReviewLint(sr_tests.TestSnapReview):
1001996
1002 def test_check_link_invalid_field(self):997 def test_check_link_invalid_field(self):
1003 """Test check_link_invalid_field"""998 """Test check_link_invalid_field"""
1004 self.set_test_snap_yaml("links", {"invalid_field": ""})999 self.set_test_snap_yaml("links", {"invalid_field": [""]})
1000 c = SnapReviewLint(self.test_name)
1001 c.check_links()
1002 r = c.review_report
1003 expected_counts = {"info": None, "warn": 0, "error": 1}
1004 self.check_results(r, expected_counts)
1005
1006 def test_check_link_invalid_url_no_http_prefix(self):
1007 """Test check_link_invalid_url_no_http_prefix"""
1008 self.set_test_snap_yaml("links", {"website": "www.snapcraft.io"})
1005 c = SnapReviewLint(self.test_name)1009 c = SnapReviewLint(self.test_name)
1006 c.check_links()1010 c.check_links()
1007 r = c.review_report1011 r = c.review_report
1008 expected_counts = {"info": None, "warn": 0, "error": 1}1012 expected_counts = {"info": None, "warn": 0, "error": 1}
1009 self.check_results(r, expected_counts)1013 self.check_results(r, expected_counts)
10101014
1011 def test_check_link_invalid_content_1(self):1015 def test_check_link_invalid_email(self):
1012 """Test check_link_invalid_email"""1016 """Test check_link_invalid_email"""
1017 self.set_test_snap_yaml("links", {"contact": "invalid(at)email.com"})
1018 c = SnapReviewLint(self.test_name)
1019 c.check_links()
1020 r = c.review_report
1021 expected_counts = {"info": None, "warn": 0, "error": 1}
1022 self.check_results(r, expected_counts)
1023
1024 def test_check_link_email_not_in_contacts(self):
1025 """Test check_link_email_not_in_contacts"""
1026 self.set_test_snap_yaml("links", {"issues": "valid@email.com"})
1027 c = SnapReviewLint(self.test_name)
1028 c.check_links()
1029 r = c.review_report
1030 expected_counts = {"info": None, "warn": 0, "error": 1}
1031 self.check_results(r, expected_counts)
1032
1033 def test_check_link_invalid_content_string(self):
1034 """Test check_link_invalid_content_string"""
1035 self.set_test_snap_yaml("links", {"website": "http://www.snapcraft.io"})
1036 c = SnapReviewLint(self.test_name)
1037 c.check_links()
1038 r = c.review_report
1039 expected_counts = {"info": None, "warn": 0, "error": 1}
1040 self.check_results(r, expected_counts)
1041
1042 def test_check_link_invalid_content_number(self):
1043 """Test check_link_invalid_content_number"""
1013 self.set_test_snap_yaml("links", {"contact": 123})1044 self.set_test_snap_yaml("links", {"contact": 123})
1014 c = SnapReviewLint(self.test_name)1045 c = SnapReviewLint(self.test_name)
1015 c.check_links()1046 c.check_links()
@@ -1017,8 +1048,8 @@ class TestSnapReviewLint(sr_tests.TestSnapReview):
1017 expected_counts = {"info": None, "warn": 0, "error": 1}1048 expected_counts = {"info": None, "warn": 0, "error": 1}
1018 self.check_results(r, expected_counts)1049 self.check_results(r, expected_counts)
10191050
1020 def test_check_link_invalid_content_2(self):1051 def test_check_link_invalid_content_number_list(self):
1021 """Test check_link_invalid_email"""1052 """Test check_link_invalid_content_number_list"""
1022 self.set_test_snap_yaml("links", {"website": [456, 789]})1053 self.set_test_snap_yaml("links", {"website": [456, 789]})
1023 c = SnapReviewLint(self.test_name)1054 c = SnapReviewLint(self.test_name)
1024 c.check_links()1055 c.check_links()
diff --git a/snapcraft.yaml b/snapcraft.yaml
index b651682..1cc7be2 100644
--- a/snapcraft.yaml
+++ b/snapcraft.yaml
@@ -7,9 +7,6 @@ description: |
7 and can be used to verify your snap before upload.7 and can be used to verify your snap before upload.
8confinement: strict8confinement: strict
9base: core189base: core18
10website: https://launchpad.net/review-tools
11source-code: https://git.launchpad.net/review-tools
12issues: https://bugs.launchpad.net/review-tools
1310
14environment:11environment:
15 LANG: C.UTF-812 LANG: C.UTF-8

Subscribers

People subscribed via source and target branches