Merge ~pfsmorigo/review-tools:check_link_improvements into review-tools:master
- Git
- lp:~pfsmorigo/review-tools
- check_link_improvements
- Merge into master
Status: | Merged |
---|---|
Merged at revision: | 80021aab0553f647060685ec62f862314af53766 |
Proposed branch: | ~pfsmorigo/review-tools:check_link_improvements |
Merge into: | review-tools:master |
Diff against target: |
220 lines (+105/-34) 4 files modified
reviewtools/common.py (+0/-3) reviewtools/sr_lint.py (+56/-10) reviewtools/tests/test_sr_lint.py (+49/-18) snapcraft.yaml (+0/-3) |
Related bugs: |
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
Alex Murray | Approve | ||
Review via email:
|
Commit message
Description of the change
I used the ValidateLinks from snapd but had to make some changes. In snapd, the validation relies on an url parser that can check the syntax. For the review tools I'm using a regex for that.
![](/+icing/build/overlay/assets/skins/sam/images/close.gif)
Paulo Flabiano Smorigo (pfsmorigo) wrote : | # |
I just removed LINK_TYPES. For the regex, I'm willing to remove it and use a python library to validade but, since the system runs on xenial, it's hard to find a good library for it. I can't use python3-validators for example. There is urllib that can parse the url but don't validate it unless you use it for a query and check for the returning error. There is also the requests library but you also needs to do a request in order to check the result. About the regex, why too fragile? Maybe there would be cases that it will fails but than we can manually approve, right?
![](/+icing/build/overlay/assets/skins/sam/images/close.gif)
Alex Murray (alexmurray) wrote : | # |
The snap store now uses focal not xenial and the review-tools snap is base: core18 so you should be able to use anything in bionic.
I personally don't want to have to manually approve every snap upload for a snap which uses a non-ascii URL or similar - I also don't think this is a good situation for such publishers either - so if we can just use an existing trusted library to parse + validate the URL then that seems like a better solution to me.
![](/+icing/build/overlay/assets/skins/sam/images/close.gif)
Alex Murray (alexmurray) wrote : | # |
Let's merge this as is since it is very good - we can revisit it later if it turns out there are URLs which get caught by the regex.
Preview Diff
1 | diff --git a/reviewtools/common.py b/reviewtools/common.py | |||
2 | index ce47886..548b8d7 100644 | |||
3 | --- a/reviewtools/common.py | |||
4 | +++ b/reviewtools/common.py | |||
5 | @@ -140,9 +140,6 @@ OS_RELEASE_MAP = { | |||
6 | 140 | } | 140 | } |
7 | 141 | } | 141 | } |
8 | 142 | 142 | ||
9 | 143 | # Link categories used in snap.yaml | ||
10 | 144 | LINK_TYPES = ["website", "source-code", "contact", "donation", "issues"] | ||
11 | 145 | |||
12 | 146 | 143 | ||
13 | 147 | def cleanup_unpack(): | 144 | def cleanup_unpack(): |
14 | 148 | global UNPACK_DIR | 145 | global UNPACK_DIR |
15 | diff --git a/reviewtools/sr_lint.py b/reviewtools/sr_lint.py | |||
16 | index 6cca8d9..819ef5a 100644 | |||
17 | --- a/reviewtools/sr_lint.py | |||
18 | +++ b/reviewtools/sr_lint.py | |||
19 | @@ -20,7 +20,6 @@ from reviewtools.common import ( | |||
20 | 20 | find_external_symlinks, | 20 | find_external_symlinks, |
21 | 21 | STORE_PKGNAME_SNAPV2_MAXLEN, | 21 | STORE_PKGNAME_SNAPV2_MAXLEN, |
22 | 22 | STORE_PKGNAME_SNAPV2_MINLEN, | 22 | STORE_PKGNAME_SNAPV2_MINLEN, |
23 | 23 | LINK_TYPES, | ||
24 | 24 | ) | 23 | ) |
25 | 25 | from reviewtools.overrides import ( | 24 | from reviewtools.overrides import ( |
26 | 26 | redflagged_snap_types_overrides, | 25 | redflagged_snap_types_overrides, |
27 | @@ -348,23 +347,70 @@ class SnapReviewLint(SnapReview): | |||
28 | 348 | if "links" not in self.snap_yaml: | 347 | if "links" not in self.snap_yaml: |
29 | 349 | return | 348 | return |
30 | 350 | 349 | ||
33 | 351 | for val in self.snap_yaml["links"]: | 350 | # Check links is partially based on: |
34 | 352 | if val not in LINK_TYPES: | 351 | # https://github.com/snapcore/snapd/blob/master/snap/validate.go#L1132 |
35 | 352 | |||
36 | 353 | valid_link = re.compile(r"^[a-zA-Z](?:-?[a-zA-Z0-9])*$") | ||
37 | 354 | valid_url = re.compile( | ||
38 | 355 | r"^http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+$" | ||
39 | 356 | ) | ||
40 | 357 | valid_email = re.compile( | ||
41 | 358 | r"^(mailto:|)[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}$" | ||
42 | 359 | ) | ||
43 | 360 | |||
44 | 361 | for linkskey in self.snap_yaml["links"]: | ||
45 | 362 | if linkskey.strip() == "": | ||
46 | 353 | t = "error" | 363 | t = "error" |
48 | 354 | s = "'unknown field for links in snap.yaml: %s" % val | 364 | s = "links key cannot be empty" |
49 | 355 | self._add_result(t, n, s) | 365 | self._add_result(t, n, s) |
50 | 356 | return | 366 | return |
51 | 357 | 367 | ||
57 | 358 | content = self.snap_yaml["links"][val] | 368 | if not valid_link.match(linkskey): |
58 | 359 | if not isinstance(content, str) and not ( | 369 | t = "error" |
59 | 360 | isinstance(content, list) | 370 | s = "links key is invalid: %s" % linkskey |
60 | 361 | and all(isinstance(item, str) for item in content) | 371 | self._add_result(t, n, s) |
61 | 362 | ): | 372 | return |
62 | 373 | |||
63 | 374 | linksvalues = self.snap_yaml["links"][linkskey] | ||
64 | 375 | |||
65 | 376 | if not isinstance(linksvalues, list): | ||
66 | 377 | t = "error" | ||
67 | 378 | s = "%s is not a list of strings" % linkskey | ||
68 | 379 | self._add_result(t, n, s) | ||
69 | 380 | return | ||
70 | 381 | |||
71 | 382 | if len(linksvalues) == 0: | ||
72 | 363 | t = "error" | 383 | t = "error" |
74 | 364 | s = "'invalid value for %s in snap.yaml" % val | 384 | s = "%s links cannot be specified and empty" % linkskey |
75 | 365 | self._add_result(t, n, s) | 385 | self._add_result(t, n, s) |
76 | 366 | return | 386 | return |
77 | 367 | 387 | ||
78 | 388 | for link in linksvalues: | ||
79 | 389 | if link == "": | ||
80 | 390 | t = "error" | ||
81 | 391 | s = "empty %s link" % linkskey | ||
82 | 392 | self._add_result(t, n, s) | ||
83 | 393 | return | ||
84 | 394 | |||
85 | 395 | if not isinstance(link, str): | ||
86 | 396 | t = "error" | ||
87 | 397 | s = "%s is not a string" % link | ||
88 | 398 | self._add_result(t, n, s) | ||
89 | 399 | return | ||
90 | 400 | |||
91 | 401 | if not ( | ||
92 | 402 | linkskey == "contact" | ||
93 | 403 | and valid_email.match(link) | ||
94 | 404 | or valid_url.match(link) | ||
95 | 405 | ): | ||
96 | 406 | t = "error" | ||
97 | 407 | s = ( | ||
98 | 408 | "%s link must have one of http|https schemes or it must be an email address: %s" | ||
99 | 409 | % (linkskey, link) | ||
100 | 410 | ) | ||
101 | 411 | self._add_result(t, n, s, manual_review=True) | ||
102 | 412 | return | ||
103 | 413 | |||
104 | 368 | def check_unknown_entries(self): | 414 | def check_unknown_entries(self): |
105 | 369 | """Check for any unknown fields""" | 415 | """Check for any unknown fields""" |
106 | 370 | t = "info" | 416 | t = "info" |
107 | diff --git a/reviewtools/tests/test_sr_lint.py b/reviewtools/tests/test_sr_lint.py | |||
108 | index 5d9aba7..27b7533 100644 | |||
109 | --- a/reviewtools/tests/test_sr_lint.py | |||
110 | +++ b/reviewtools/tests/test_sr_lint.py | |||
111 | @@ -976,21 +976,16 @@ class TestSnapReviewLint(sr_tests.TestSnapReview): | |||
112 | 976 | 976 | ||
113 | 977 | def test_check_link(self): | 977 | def test_check_link(self): |
114 | 978 | """Test check_link""" | 978 | """Test check_link""" |
115 | 979 | self.set_test_snap_yaml("links", []) | ||
116 | 980 | c = SnapReviewLint(self.test_name) | ||
117 | 981 | c.check_links() | ||
118 | 982 | r = c.review_report | ||
119 | 983 | expected_counts = {"info": None, "warn": 0, "error": 0} | ||
120 | 984 | self.check_results(r, expected_counts) | ||
121 | 985 | |||
122 | 986 | def test_check_link_valid(self): | ||
123 | 987 | """Test check_link_valid""" | ||
124 | 988 | links = { | 979 | links = { |
130 | 989 | "website": "https://snapcraft.io", | 980 | "website": ["https://snapcraft.io"], |
131 | 990 | "source-code": "https://github.com/snapcore/snapcraft", | 981 | "source-code": ["https://github.com/snapcore/snapcraft"], |
132 | 991 | "contact": ["https://forum.snapcraft.io", "snapcraft@forum.snapcraft.io"], | 982 | "contact": [ |
133 | 992 | "donation": "http://donate.me", | 983 | "https://forum.snapcraft.io", |
134 | 993 | "issues": "https://bugs.launchpad.net/snapcraft/+filebug", | 984 | "mailto:snapcraft@forum.snapcraft.io", |
135 | 985 | "another@email.com", | ||
136 | 986 | ], | ||
137 | 987 | "donation": ["http://donate.me"], | ||
138 | 988 | "issues": ["https://bugs.launchpad.net/snapcraft/+filebug"], | ||
139 | 994 | } | 989 | } |
140 | 995 | self.set_test_snap_yaml("links", links) | 990 | self.set_test_snap_yaml("links", links) |
141 | 996 | c = SnapReviewLint(self.test_name) | 991 | c = SnapReviewLint(self.test_name) |
142 | @@ -1001,15 +996,51 @@ class TestSnapReviewLint(sr_tests.TestSnapReview): | |||
143 | 1001 | 996 | ||
144 | 1002 | def test_check_link_invalid_field(self): | 997 | def test_check_link_invalid_field(self): |
145 | 1003 | """Test check_link_invalid_field""" | 998 | """Test check_link_invalid_field""" |
147 | 1004 | self.set_test_snap_yaml("links", {"invalid_field": ""}) | 999 | self.set_test_snap_yaml("links", {"invalid_field": [""]}) |
148 | 1000 | c = SnapReviewLint(self.test_name) | ||
149 | 1001 | c.check_links() | ||
150 | 1002 | r = c.review_report | ||
151 | 1003 | expected_counts = {"info": None, "warn": 0, "error": 1} | ||
152 | 1004 | self.check_results(r, expected_counts) | ||
153 | 1005 | |||
154 | 1006 | def test_check_link_invalid_url_no_http_prefix(self): | ||
155 | 1007 | """Test check_link_invalid_url_no_http_prefix""" | ||
156 | 1008 | self.set_test_snap_yaml("links", {"website": "www.snapcraft.io"}) | ||
157 | 1005 | c = SnapReviewLint(self.test_name) | 1009 | c = SnapReviewLint(self.test_name) |
158 | 1006 | c.check_links() | 1010 | c.check_links() |
159 | 1007 | r = c.review_report | 1011 | r = c.review_report |
160 | 1008 | expected_counts = {"info": None, "warn": 0, "error": 1} | 1012 | expected_counts = {"info": None, "warn": 0, "error": 1} |
161 | 1009 | self.check_results(r, expected_counts) | 1013 | self.check_results(r, expected_counts) |
162 | 1010 | 1014 | ||
164 | 1011 | def test_check_link_invalid_content_1(self): | 1015 | def test_check_link_invalid_email(self): |
165 | 1012 | """Test check_link_invalid_email""" | 1016 | """Test check_link_invalid_email""" |
166 | 1017 | self.set_test_snap_yaml("links", {"contact": "invalid(at)email.com"}) | ||
167 | 1018 | c = SnapReviewLint(self.test_name) | ||
168 | 1019 | c.check_links() | ||
169 | 1020 | r = c.review_report | ||
170 | 1021 | expected_counts = {"info": None, "warn": 0, "error": 1} | ||
171 | 1022 | self.check_results(r, expected_counts) | ||
172 | 1023 | |||
173 | 1024 | def test_check_link_email_not_in_contacts(self): | ||
174 | 1025 | """Test check_link_email_not_in_contacts""" | ||
175 | 1026 | self.set_test_snap_yaml("links", {"issues": "valid@email.com"}) | ||
176 | 1027 | c = SnapReviewLint(self.test_name) | ||
177 | 1028 | c.check_links() | ||
178 | 1029 | r = c.review_report | ||
179 | 1030 | expected_counts = {"info": None, "warn": 0, "error": 1} | ||
180 | 1031 | self.check_results(r, expected_counts) | ||
181 | 1032 | |||
182 | 1033 | def test_check_link_invalid_content_string(self): | ||
183 | 1034 | """Test check_link_invalid_content_string""" | ||
184 | 1035 | self.set_test_snap_yaml("links", {"website": "http://www.snapcraft.io"}) | ||
185 | 1036 | c = SnapReviewLint(self.test_name) | ||
186 | 1037 | c.check_links() | ||
187 | 1038 | r = c.review_report | ||
188 | 1039 | expected_counts = {"info": None, "warn": 0, "error": 1} | ||
189 | 1040 | self.check_results(r, expected_counts) | ||
190 | 1041 | |||
191 | 1042 | def test_check_link_invalid_content_number(self): | ||
192 | 1043 | """Test check_link_invalid_content_number""" | ||
193 | 1013 | self.set_test_snap_yaml("links", {"contact": 123}) | 1044 | self.set_test_snap_yaml("links", {"contact": 123}) |
194 | 1014 | c = SnapReviewLint(self.test_name) | 1045 | c = SnapReviewLint(self.test_name) |
195 | 1015 | c.check_links() | 1046 | c.check_links() |
196 | @@ -1017,8 +1048,8 @@ class TestSnapReviewLint(sr_tests.TestSnapReview): | |||
197 | 1017 | expected_counts = {"info": None, "warn": 0, "error": 1} | 1048 | expected_counts = {"info": None, "warn": 0, "error": 1} |
198 | 1018 | self.check_results(r, expected_counts) | 1049 | self.check_results(r, expected_counts) |
199 | 1019 | 1050 | ||
202 | 1020 | def test_check_link_invalid_content_2(self): | 1051 | def test_check_link_invalid_content_number_list(self): |
203 | 1021 | """Test check_link_invalid_email""" | 1052 | """Test check_link_invalid_content_number_list""" |
204 | 1022 | self.set_test_snap_yaml("links", {"website": [456, 789]}) | 1053 | self.set_test_snap_yaml("links", {"website": [456, 789]}) |
205 | 1023 | c = SnapReviewLint(self.test_name) | 1054 | c = SnapReviewLint(self.test_name) |
206 | 1024 | c.check_links() | 1055 | c.check_links() |
207 | diff --git a/snapcraft.yaml b/snapcraft.yaml | |||
208 | index b651682..1cc7be2 100644 | |||
209 | --- a/snapcraft.yaml | |||
210 | +++ b/snapcraft.yaml | |||
211 | @@ -7,9 +7,6 @@ description: | | |||
212 | 7 | and can be used to verify your snap before upload. | 7 | and can be used to verify your snap before upload. |
213 | 8 | confinement: strict | 8 | confinement: strict |
214 | 9 | base: core18 | 9 | base: core18 |
215 | 10 | website: https://launchpad.net/review-tools | ||
216 | 11 | source-code: https://git.launchpad.net/review-tools | ||
217 | 12 | issues: https://bugs.launchpad.net/review-tools | ||
218 | 13 | 10 | ||
219 | 14 | environment: | 11 | environment: |
220 | 15 | LANG: C.UTF-8 | 12 | LANG: C.UTF-8 |
Thanks for this - I don't think we need LINK_TYPES anymore and I think you should just use the python standard library to parse / validate the URLs rather than a regex as that will be too fragile.