Merge ~pfsmorigo/review-tools:check_link_improvements into review-tools:master

Proposed by Paulo Flabiano Smorigo
Status: Merged
Merged at revision: 80021aab0553f647060685ec62f862314af53766
Proposed branch: ~pfsmorigo/review-tools:check_link_improvements
Merge into: review-tools:master
Diff against target: 220 lines (+105/-34)
4 files modified
reviewtools/common.py (+0/-3)
reviewtools/sr_lint.py (+56/-10)
reviewtools/tests/test_sr_lint.py (+49/-18)
snapcraft.yaml (+0/-3)
Reviewer Review Type Date Requested Status
Alex Murray Approve
Review via email: mp+412055@code.launchpad.net

Description of the change

I used the ValidateLinks from snapd but had to make some changes. In snapd, the validation relies on an url parser that can check the syntax. For the review tools I'm using a regex for that.

To post a comment you must log in.
Revision history for this message
Alex Murray (alexmurray) wrote :

Thanks for this - I don't think we need LINK_TYPES anymore and I think you should just use the python standard library to parse / validate the URLs rather than a regex as that will be too fragile.

review: Needs Fixing
Revision history for this message
Paulo Flabiano Smorigo (pfsmorigo) wrote :

I just removed LINK_TYPES. For the regex, I'm willing to remove it and use a python library to validade but, since the system runs on xenial, it's hard to find a good library for it. I can't use python3-validators for example. There is urllib that can parse the url but don't validate it unless you use it for a query and check for the returning error. There is also the requests library but you also needs to do a request in order to check the result. About the regex, why too fragile? Maybe there would be cases that it will fails but than we can manually approve, right?

Revision history for this message
Alex Murray (alexmurray) wrote :

The snap store now uses focal not xenial and the review-tools snap is base: core18 so you should be able to use anything in bionic.

I personally don't want to have to manually approve every snap upload for a snap which uses a non-ascii URL or similar - I also don't think this is a good situation for such publishers either - so if we can just use an existing trusted library to parse + validate the URL then that seems like a better solution to me.

Revision history for this message
Alex Murray (alexmurray) wrote :

Let's merge this as is since it is very good - we can revisit it later if it turns out there are URLs which get caught by the regex.

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1diff --git a/reviewtools/common.py b/reviewtools/common.py
2index ce47886..548b8d7 100644
3--- a/reviewtools/common.py
4+++ b/reviewtools/common.py
5@@ -140,9 +140,6 @@ OS_RELEASE_MAP = {
6 }
7 }
8
9-# Link categories used in snap.yaml
10-LINK_TYPES = ["website", "source-code", "contact", "donation", "issues"]
11-
12
13 def cleanup_unpack():
14 global UNPACK_DIR
15diff --git a/reviewtools/sr_lint.py b/reviewtools/sr_lint.py
16index 6cca8d9..819ef5a 100644
17--- a/reviewtools/sr_lint.py
18+++ b/reviewtools/sr_lint.py
19@@ -20,7 +20,6 @@ from reviewtools.common import (
20 find_external_symlinks,
21 STORE_PKGNAME_SNAPV2_MAXLEN,
22 STORE_PKGNAME_SNAPV2_MINLEN,
23- LINK_TYPES,
24 )
25 from reviewtools.overrides import (
26 redflagged_snap_types_overrides,
27@@ -348,23 +347,70 @@ class SnapReviewLint(SnapReview):
28 if "links" not in self.snap_yaml:
29 return
30
31- for val in self.snap_yaml["links"]:
32- if val not in LINK_TYPES:
33+ # Check links is partially based on:
34+ # https://github.com/snapcore/snapd/blob/master/snap/validate.go#L1132
35+
36+ valid_link = re.compile(r"^[a-zA-Z](?:-?[a-zA-Z0-9])*$")
37+ valid_url = re.compile(
38+ r"^http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+$"
39+ )
40+ valid_email = re.compile(
41+ r"^(mailto:|)[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}$"
42+ )
43+
44+ for linkskey in self.snap_yaml["links"]:
45+ if linkskey.strip() == "":
46 t = "error"
47- s = "'unknown field for links in snap.yaml: %s" % val
48+ s = "links key cannot be empty"
49 self._add_result(t, n, s)
50 return
51
52- content = self.snap_yaml["links"][val]
53- if not isinstance(content, str) and not (
54- isinstance(content, list)
55- and all(isinstance(item, str) for item in content)
56- ):
57+ if not valid_link.match(linkskey):
58+ t = "error"
59+ s = "links key is invalid: %s" % linkskey
60+ self._add_result(t, n, s)
61+ return
62+
63+ linksvalues = self.snap_yaml["links"][linkskey]
64+
65+ if not isinstance(linksvalues, list):
66+ t = "error"
67+ s = "%s is not a list of strings" % linkskey
68+ self._add_result(t, n, s)
69+ return
70+
71+ if len(linksvalues) == 0:
72 t = "error"
73- s = "'invalid value for %s in snap.yaml" % val
74+ s = "%s links cannot be specified and empty" % linkskey
75 self._add_result(t, n, s)
76 return
77
78+ for link in linksvalues:
79+ if link == "":
80+ t = "error"
81+ s = "empty %s link" % linkskey
82+ self._add_result(t, n, s)
83+ return
84+
85+ if not isinstance(link, str):
86+ t = "error"
87+ s = "%s is not a string" % link
88+ self._add_result(t, n, s)
89+ return
90+
91+ if not (
92+ linkskey == "contact"
93+ and valid_email.match(link)
94+ or valid_url.match(link)
95+ ):
96+ t = "error"
97+ s = (
98+ "%s link must have one of http|https schemes or it must be an email address: %s"
99+ % (linkskey, link)
100+ )
101+ self._add_result(t, n, s, manual_review=True)
102+ return
103+
104 def check_unknown_entries(self):
105 """Check for any unknown fields"""
106 t = "info"
107diff --git a/reviewtools/tests/test_sr_lint.py b/reviewtools/tests/test_sr_lint.py
108index 5d9aba7..27b7533 100644
109--- a/reviewtools/tests/test_sr_lint.py
110+++ b/reviewtools/tests/test_sr_lint.py
111@@ -976,21 +976,16 @@ class TestSnapReviewLint(sr_tests.TestSnapReview):
112
113 def test_check_link(self):
114 """Test check_link"""
115- self.set_test_snap_yaml("links", [])
116- c = SnapReviewLint(self.test_name)
117- c.check_links()
118- r = c.review_report
119- expected_counts = {"info": None, "warn": 0, "error": 0}
120- self.check_results(r, expected_counts)
121-
122- def test_check_link_valid(self):
123- """Test check_link_valid"""
124 links = {
125- "website": "https://snapcraft.io",
126- "source-code": "https://github.com/snapcore/snapcraft",
127- "contact": ["https://forum.snapcraft.io", "snapcraft@forum.snapcraft.io"],
128- "donation": "http://donate.me",
129- "issues": "https://bugs.launchpad.net/snapcraft/+filebug",
130+ "website": ["https://snapcraft.io"],
131+ "source-code": ["https://github.com/snapcore/snapcraft"],
132+ "contact": [
133+ "https://forum.snapcraft.io",
134+ "mailto:snapcraft@forum.snapcraft.io",
135+ "another@email.com",
136+ ],
137+ "donation": ["http://donate.me"],
138+ "issues": ["https://bugs.launchpad.net/snapcraft/+filebug"],
139 }
140 self.set_test_snap_yaml("links", links)
141 c = SnapReviewLint(self.test_name)
142@@ -1001,15 +996,51 @@ class TestSnapReviewLint(sr_tests.TestSnapReview):
143
144 def test_check_link_invalid_field(self):
145 """Test check_link_invalid_field"""
146- self.set_test_snap_yaml("links", {"invalid_field": ""})
147+ self.set_test_snap_yaml("links", {"invalid_field": [""]})
148+ c = SnapReviewLint(self.test_name)
149+ c.check_links()
150+ r = c.review_report
151+ expected_counts = {"info": None, "warn": 0, "error": 1}
152+ self.check_results(r, expected_counts)
153+
154+ def test_check_link_invalid_url_no_http_prefix(self):
155+ """Test check_link_invalid_url_no_http_prefix"""
156+ self.set_test_snap_yaml("links", {"website": "www.snapcraft.io"})
157 c = SnapReviewLint(self.test_name)
158 c.check_links()
159 r = c.review_report
160 expected_counts = {"info": None, "warn": 0, "error": 1}
161 self.check_results(r, expected_counts)
162
163- def test_check_link_invalid_content_1(self):
164+ def test_check_link_invalid_email(self):
165 """Test check_link_invalid_email"""
166+ self.set_test_snap_yaml("links", {"contact": "invalid(at)email.com"})
167+ c = SnapReviewLint(self.test_name)
168+ c.check_links()
169+ r = c.review_report
170+ expected_counts = {"info": None, "warn": 0, "error": 1}
171+ self.check_results(r, expected_counts)
172+
173+ def test_check_link_email_not_in_contacts(self):
174+ """Test check_link_email_not_in_contacts"""
175+ self.set_test_snap_yaml("links", {"issues": "valid@email.com"})
176+ c = SnapReviewLint(self.test_name)
177+ c.check_links()
178+ r = c.review_report
179+ expected_counts = {"info": None, "warn": 0, "error": 1}
180+ self.check_results(r, expected_counts)
181+
182+ def test_check_link_invalid_content_string(self):
183+ """Test check_link_invalid_content_string"""
184+ self.set_test_snap_yaml("links", {"website": "http://www.snapcraft.io"})
185+ c = SnapReviewLint(self.test_name)
186+ c.check_links()
187+ r = c.review_report
188+ expected_counts = {"info": None, "warn": 0, "error": 1}
189+ self.check_results(r, expected_counts)
190+
191+ def test_check_link_invalid_content_number(self):
192+ """Test check_link_invalid_content_number"""
193 self.set_test_snap_yaml("links", {"contact": 123})
194 c = SnapReviewLint(self.test_name)
195 c.check_links()
196@@ -1017,8 +1048,8 @@ class TestSnapReviewLint(sr_tests.TestSnapReview):
197 expected_counts = {"info": None, "warn": 0, "error": 1}
198 self.check_results(r, expected_counts)
199
200- def test_check_link_invalid_content_2(self):
201- """Test check_link_invalid_email"""
202+ def test_check_link_invalid_content_number_list(self):
203+ """Test check_link_invalid_content_number_list"""
204 self.set_test_snap_yaml("links", {"website": [456, 789]})
205 c = SnapReviewLint(self.test_name)
206 c.check_links()
207diff --git a/snapcraft.yaml b/snapcraft.yaml
208index b651682..1cc7be2 100644
209--- a/snapcraft.yaml
210+++ b/snapcraft.yaml
211@@ -7,9 +7,6 @@ description: |
212 and can be used to verify your snap before upload.
213 confinement: strict
214 base: core18
215-website: https://launchpad.net/review-tools
216-source-code: https://git.launchpad.net/review-tools
217-issues: https://bugs.launchpad.net/review-tools
218
219 environment:
220 LANG: C.UTF-8

Subscribers

People subscribed via source and target branches