Merge lp:~stevanr/linaro-license-protection/automate-integration-tests into lp:~linaro-automation/linaro-license-protection/trunk
- automate-integration-tests
- Merge into trunk
Status: | Merged | ||||
---|---|---|---|---|---|
Approved by: | James Tunnicliffe | ||||
Approved revision: | 75 | ||||
Merged at revision: | 71 | ||||
Proposed branch: | lp:~stevanr/linaro-license-protection/automate-integration-tests | ||||
Merge into: | lp:~linaro-automation/linaro-license-protection/trunk | ||||
Diff against target: |
517 lines (+305/-143) 5 files modified
.htaccess (+2/-2) README (+7/-0) testing/filefetcher.py (+0/-129) testing/license_protected_file_downloader.py (+284/-0) testing/test_click_through_license.py (+12/-12) |
||||
To merge this branch: | bzr merge lp:~stevanr/linaro-license-protection/automate-integration-tests | ||||
Related bugs: |
|
||||
Related blueprints: |
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
James Tunnicliffe (community) | Approve | ||
Данило Шеган | code | Pending | |
Review via email: mp+105209@code.launchpad.net |
Commit message
Description of the change
Update filefetcher to the newest version from James' branch.
Fix https:/
Automate integration tests after deployment to production.
Stevan Radaković (stevanr) wrote : | # |
- 71. By Stevan Radaković
-
Reverting file so changes can be seen.
- 72. By Stevan Radaković
-
Done reverting file so changes can be seen.
Данило Шеган (danilo) wrote : | # |
This would be a good opportunity to add a dependencies section to the 'Setup' section in the README (or if you have a better idea of where it should go, just go for it). Something along the following lines:
Dependencies
............
libapache2-mod-php5
Testing: phpunit, testrepository, python-html2text
Данило Шеган (danilo) wrote : | # |
Also, tests are still not passing with these changes. Have you had a chance to investigate that?
- 73. By Stevan Radaković
-
Wrong identation fix in _get_license method
- 74. By Stevan Radaković
-
Incorrect parsing of the domain fixed
- 75. By Stevan Radaković
-
Tests updated to use new filefetcher
James Tunnicliffe (dooferlad) wrote : | # |
I am working on the assumption that the tests pass now :-)
This looks fine. Please add the lines to README that Danilo suggested as well, but I don't think there is any reason to re-review with that change, so I will approve this. Of course, if you have other ideas for the set up instructions, then please just check in without that change.
- 76. By Stevan Radaković
-
Add Dependencies section to README file
- 77. By Stevan Radaković
-
Revert accidental commit of __init__.py
Preview Diff
1 | === modified file '.htaccess' |
2 | --- .htaccess 2012-05-02 11:33:12 +0000 |
3 | +++ .htaccess 2012-05-11 12:35:21 +0000 |
4 | @@ -13,12 +13,12 @@ |
5 | ## without port number for use in cookie domain |
6 | RewriteCond %{SERVER_PORT} !^80$ [OR] |
7 | RewriteCond %{SERVER_PORT} !^443$ |
8 | -RewriteCond %{HTTP_HOST} (.*)(\:.*) |
9 | +RewriteCond %{HTTP_HOST} ^([^:]*)$ |
10 | RewriteRule .* - [E=CO_DOMAIN:%1] |
11 | |
12 | RewriteCond %{SERVER_PORT} !^80$ [OR] |
13 | RewriteCond %{SERVER_PORT} !^443$ |
14 | -RewriteCond %{HTTP_HOST} (^.*$) |
15 | +RewriteCond %{HTTP_HOST} ^([^:]*):(.*)$ |
16 | RewriteRule .* - [E=CO_DOMAIN:%1] |
17 | |
18 | ## Let internal hosts through always. |
19 | |
20 | === modified file 'README' |
21 | --- README 2012-05-08 19:51:41 +0000 |
22 | +++ README 2012-05-11 12:35:21 +0000 |
23 | @@ -15,6 +15,13 @@ |
24 | |
25 | Currently, all directories/files containing either 'origen' or 'snowball' in the URL path are protected with appropriate license (Samsung or ST-E) click-through. |
26 | |
27 | +Dependencies |
28 | +............ |
29 | + |
30 | +libapache2-mod-php5 |
31 | + |
32 | +Testing: phpunit, testrepository, python-html2text |
33 | + |
34 | |
35 | Technical details |
36 | ----------------- |
37 | |
38 | === removed file 'testing/filefetcher.py' |
39 | --- testing/filefetcher.py 2012-01-13 11:48:16 +0000 |
40 | +++ testing/filefetcher.py 1970-01-01 00:00:00 +0000 |
41 | @@ -1,129 +0,0 @@ |
42 | -#!/usr/bin/env python |
43 | - |
44 | -# Changes required to address EULA for the origen hwpacks |
45 | - |
46 | -import argparse |
47 | -import os |
48 | -import pycurl |
49 | -import re |
50 | -import urlparse |
51 | - |
52 | - |
53 | -class LicenseProtectedFileFetcher: |
54 | - """Fetch a file from the web that may be protected by a license redirect |
55 | - |
56 | - This is designed to run on snapshots.linaro.org. License HTML file are in |
57 | - the form: |
58 | - |
59 | - <vendor>.html has a link to <vendor>-accept.html |
60 | - |
61 | - If self.get is pointed at a file that has to go through one of these |
62 | - licenses, it should be able to automatically accept the license and |
63 | - download the file. |
64 | - |
65 | - Once a license has been accepted, it will be used for all following |
66 | - downloads. |
67 | - |
68 | - If self.close() is called before the object is deleted, cURL will store |
69 | - the license accept cookie to cookies.txt, so it can be used for later |
70 | - downloads. |
71 | - |
72 | - """ |
73 | - def __init__(self): |
74 | - """Set up cURL""" |
75 | - self.curl = pycurl.Curl() |
76 | - self.curl.setopt(pycurl.FOLLOWLOCATION, 1) |
77 | - self.curl.setopt(pycurl.WRITEFUNCTION, self._write_body) |
78 | - self.curl.setopt(pycurl.HEADERFUNCTION, self._write_header) |
79 | - self.curl.setopt(pycurl.COOKIEFILE, "cookies.txt") |
80 | - self.curl.setopt(pycurl.COOKIEJAR, "cookies.txt") |
81 | - |
82 | - def _get(self, url): |
83 | - """Clear out header and body storage, fetch URL, filling them in.""" |
84 | - self.curl.setopt(pycurl.URL, url) |
85 | - |
86 | - self.body = "" |
87 | - self.header = "" |
88 | - |
89 | - self.curl.perform() |
90 | - |
91 | - def get(self, url, ignore_license=False, accept_license=True): |
92 | - """Fetch the requested URL, ignoring license at all or |
93 | - accepting or declining licenses, returns file body. |
94 | - |
95 | - Fetches the file at url. If a redirect is encountered, it is |
96 | - expected to be to a license that has an accept or decline link. |
97 | - Follow that link, then download original file or nolicense notice. |
98 | - |
99 | - """ |
100 | - self._get(url) |
101 | - |
102 | - if ignore_license: |
103 | - return self.body |
104 | - |
105 | - location = self._get_location() |
106 | - if location: |
107 | - # Off to the races - we have been redirected. |
108 | - # Expect to find a link to self.location with -accepted or |
109 | - # -declined inserted before the .html, |
110 | - # i.e. ste.html -> ste-accepted.html |
111 | - |
112 | - # Get the file from the URL (full path) |
113 | - file = urlparse.urlparse(location).path |
114 | - |
115 | - # Get the file without the rest of the path |
116 | - file = os.path.split(file)[-1] |
117 | - |
118 | - # Look for a link with accepted.html or declined.html |
119 | - # in the page name. Follow it. |
120 | - new_file = None |
121 | - for line in self.body.splitlines(): |
122 | - if accept_license: |
123 | - link_search = re.search("""href=.*?["'](.*?-accepted.html)""", |
124 | - line) |
125 | - else: |
126 | - link_search = re.search("""href=.*?["'](.*?-declined.html)""", |
127 | - line) |
128 | - if link_search: |
129 | - # Have found license decline URL! |
130 | - new_file = link_search.group(1) |
131 | - |
132 | - if new_file: |
133 | - # accept or decline the license... |
134 | - next_url = re.sub(file, new_file, location) |
135 | - self._get(next_url) |
136 | - |
137 | - # The above get *should* take us to the file requested via |
138 | - # a redirect. If we manually need to follow that redirect, |
139 | - # do that now. |
140 | - |
141 | - if accept_license and self._get_location(): |
142 | - # If we haven't been redirected to our original file, |
143 | - # we should be able to just download it now. |
144 | - self._get(url) |
145 | - |
146 | - return self.body |
147 | - |
148 | - def _search_header(self, field): |
149 | - """Search header for the supplied field, return field / None""" |
150 | - for line in self.header.splitlines(): |
151 | - search = re.search(field + ":\s+(.*?)$", line) |
152 | - if search: |
153 | - return search.group(1) |
154 | - return None |
155 | - |
156 | - def _get_location(self): |
157 | - """Return content of Location field in header / None""" |
158 | - return self._search_header("Location") |
159 | - |
160 | - def _write_body(self, buf): |
161 | - """Used by curl as a sink for body content""" |
162 | - self.body += buf |
163 | - |
164 | - def _write_header(self, buf): |
165 | - """Used by curl as a sink for header content""" |
166 | - self.header += buf |
167 | - |
168 | - def close(self): |
169 | - """Wrapper to close curl - this will allow curl to write out cookies""" |
170 | - self.curl.close() |
171 | |
172 | === added file 'testing/license_protected_file_downloader.py' |
173 | --- testing/license_protected_file_downloader.py 1970-01-01 00:00:00 +0000 |
174 | +++ testing/license_protected_file_downloader.py 2012-05-11 12:35:21 +0000 |
175 | @@ -0,0 +1,284 @@ |
176 | +#!/usr/bin/env python |
177 | + |
178 | +import argparse |
179 | +import os |
180 | +import pycurl |
181 | +import re |
182 | +import urlparse |
183 | +import html2text |
184 | +from BeautifulSoup import BeautifulSoup |
185 | + |
186 | +class LicenseProtectedFileFetcher: |
187 | + """Fetch a file from the web that may be protected by a license redirect |
188 | + |
189 | + This is designed to run on snapshots.linaro.org. License HTML file are in |
190 | + the form: |
191 | + |
192 | + <vendor>.html has a link to <vendor>-accept.html |
193 | + |
194 | + If self.get is pointed at a file that has to go through one of these |
195 | + licenses, it should be able to automatically accept the license and |
196 | + download the file. |
197 | + |
198 | + Once a license has been accepted, it will be used for all following |
199 | + downloads. |
200 | + |
201 | + If self.close() is called before the object is deleted, cURL will store |
202 | + the license accept cookie to cookies.txt, so it can be used for later |
203 | + downloads. |
204 | + |
205 | + """ |
206 | + def __init__(self, cookie_file="cookies.txt"): |
207 | + """Set up cURL""" |
208 | + self.curl = pycurl.Curl() |
209 | + self.curl.setopt(pycurl.WRITEFUNCTION, self._write_body) |
210 | + self.curl.setopt(pycurl.HEADERFUNCTION, self._write_header) |
211 | + self.curl.setopt(pycurl.FOLLOWLOCATION, 1) |
212 | + self.curl.setopt(pycurl.COOKIEFILE, cookie_file) |
213 | + self.curl.setopt(pycurl.COOKIEJAR, cookie_file) |
214 | + self.file_out = None |
215 | + |
216 | + def _get(self, url): |
217 | + """Clear out header and body storage, fetch URL, filling them in.""" |
218 | + url = url.encode("ascii") |
219 | + self.curl.setopt(pycurl.URL, url) |
220 | + |
221 | + self.body = "" |
222 | + self.header = "" |
223 | + |
224 | + if self.file_name: |
225 | + self.file_out = open(self.file_name, 'w') |
226 | + else: |
227 | + self.file_out = None |
228 | + |
229 | + self.curl.perform() |
230 | + self._parse_headers(url) |
231 | + |
232 | + if self.file_out: |
233 | + self.file_out.close() |
234 | + |
235 | + def _parse_headers(self, url): |
236 | + header = {} |
237 | + for line in self.header.splitlines(): |
238 | + # Header lines typically are of the form thing: value... |
239 | + test_line = re.search("^(.*?)\s*:\s*(.*)$", line) |
240 | + |
241 | + if test_line: |
242 | + header[test_line.group(1)] = test_line.group(2) |
243 | + |
244 | + # The location attribute is sometimes relative, but we would |
245 | + # like to have it as always absolute... |
246 | + if 'Location' in header: |
247 | + parsed_location = urlparse.urlparse(header["Location"]) |
248 | + |
249 | + # If not an absolute location... |
250 | + if not parsed_location.netloc: |
251 | + parsed_source_url = urlparse.urlparse(url) |
252 | + new_location = ["", "", "", "", ""] |
253 | + |
254 | + new_location[0] = parsed_source_url.scheme |
255 | + new_location[1] = parsed_source_url.netloc |
256 | + new_location[2] = header["Location"] |
257 | + |
258 | + # Update location with absolute URL |
259 | + header["Location"] = urlparse.urlunsplit(new_location) |
260 | + |
261 | + self.header_text = self.header |
262 | + self.header = header |
263 | + |
264 | + def get_headers(self, url): |
265 | + url = url.encode("ascii") |
266 | + self.curl.setopt(pycurl.URL, url) |
267 | + |
268 | + self.body = "" |
269 | + self.header = "" |
270 | + |
271 | + # Setting NOBODY causes CURL to just fetch the header. |
272 | + self.curl.setopt(pycurl.NOBODY, True) |
273 | + self.curl.perform() |
274 | + self.curl.setopt(pycurl.NOBODY, False) |
275 | + |
276 | + self._parse_headers(url) |
277 | + |
278 | + return self.header |
279 | + |
280 | + def get_or_return_license(self, url, file_name=None): |
281 | + """Get file at the requested URL or, if behind a license, return that. |
282 | + |
283 | + If the URL provided does not redirect us to a license, then return the |
284 | + body of that file. If we are redirected to a license click through |
285 | + then return (the license as plain text, url to accept the license). |
286 | + |
287 | + If the user of this function accepts the license, then they should |
288 | + call get_protected_file.""" |
289 | + |
290 | + self.file_name = file_name |
291 | + |
292 | + # Get the license details. If this returns None, the file isn't license |
293 | + # protected and we can just return the file we started to get in the |
294 | + # function (self.body). |
295 | + license_details = self._get_license(url) |
296 | + |
297 | + if license_details: |
298 | + return license_details |
299 | + |
300 | + return self.body |
301 | + |
302 | + def get(self, url, file_name=None, ignore_license=False, accept_license=True): |
303 | + """Fetch the requested URL, accepting licenses |
304 | + |
305 | + Fetches the file at url. If a redirect is encountered, it is |
306 | + expected to be to a license that has an accept link. Follow that link, |
307 | + then download the original file. Returns the fist 1MB of the file |
308 | + (see _write_body). |
309 | + |
310 | + """ |
311 | + |
312 | + self.file_name = file_name |
313 | + if ignore_license: |
314 | + self._get(url) |
315 | + return self.body |
316 | + |
317 | + license_details = self._get_license(url) |
318 | + |
319 | + if license_details: |
320 | + # Found a license. |
321 | + if accept_license: |
322 | + # Accept the license without looking at it and |
323 | + # start fetching the file we originally wanted. |
324 | + accept_url = license_details[1] |
325 | + self.get_protected_file(accept_url, url) |
326 | + else: |
327 | + # We want to decline the license and return the notice. |
328 | + decline_url = license_details[2] |
329 | + self._get(decline_url) |
330 | + |
331 | + else: |
332 | + # If we got here, there wasn't a license protecting the file |
333 | + # so we just fetch it. |
334 | + self._get(url) |
335 | + |
336 | + return self.body |
337 | + |
338 | + def _get_license(self, url): |
339 | + """Return (license, accept URL, decline URL) if found, |
340 | + else return None. |
341 | + |
342 | + """ |
343 | + |
344 | + self.get_headers(url) |
345 | + |
346 | + if "Location" in self.header and self.header["Location"] != url: |
347 | + # We have been redirected to a new location - the license file |
348 | + location = self.header["Location"] |
349 | + |
350 | + # Fetch the license HTML |
351 | + self._get(location) |
352 | + |
353 | + # Get the file from the URL (full path) |
354 | + file = urlparse.urlparse(location).path |
355 | + |
356 | + # Get the file without the rest of the path |
357 | + file = os.path.split(file)[-1] |
358 | + |
359 | + # Look for a link with accepted.html in the page name. Follow it. |
360 | + accept_search, decline_search = None, None |
361 | + for line in self.body.splitlines(): |
362 | + if not accept_search: |
363 | + accept_search = re.search( |
364 | + """href=.*?["'](.*?-accepted.html)""", |
365 | + line) |
366 | + if not decline_search: |
367 | + decline_search = re.search( |
368 | + """href=.*?["'](.*?-declined.html)""", |
369 | + line) |
370 | + |
371 | + if accept_search and decline_search: |
372 | + # Have found license accept URL! |
373 | + new_file = accept_search.group(1) |
374 | + accept_url = re.sub(file, new_file, location) |
375 | + |
376 | + # Found decline URL as well. |
377 | + new_file_decline = decline_search.group(1) |
378 | + decline_url = re.sub(file, new_file_decline, location) |
379 | + |
380 | + # Parse the HTML using BeautifulSoup |
381 | + soup = BeautifulSoup(self.body) |
382 | + |
383 | + # The license is in a div with the ID license-text, so we |
384 | + # use this to pull just the license out of the HTML. |
385 | + html_license = u"" |
386 | + for chunk in soup.findAll(id="license-text"): |
387 | + # Output of chunk.prettify is UTF8, but comes back |
388 | + # as a str, so convert it here. |
389 | + html_license += chunk.prettify().decode("utf-8") |
390 | + |
391 | + text_license = html2text.html2text(html_license) |
392 | + |
393 | + return text_license, accept_url, decline_url |
394 | + |
395 | + return None |
396 | + |
397 | + def get_protected_file(self, accept_url, url): |
398 | + """Gets the file redirected to by the accept_url""" |
399 | + |
400 | + self._get(accept_url) # Accept the license |
401 | + |
402 | + if not("Location" in self.header and self.header["Location"] == url): |
403 | + # If we got here, we don't have the file yet (weren't redirected |
404 | + # to it). Fetch our target file. This should work now that we have |
405 | + # the right cookie. |
406 | + self._get(url) # Download the target file |
407 | + |
408 | + return self.body |
409 | + |
410 | + def _write_body(self, buf): |
411 | + """Used by curl as a sink for body content""" |
412 | + |
413 | + # If we have a target file to write to, write to it |
414 | + if self.file_out: |
415 | + self.file_out.write(buf) |
416 | + |
417 | + # Only buffer first 1MB of body. This should be plenty for anything |
418 | + # we wish to parse internally. |
419 | + if len(self.body) < 1024*1024*1024: |
420 | + # XXX Would be nice to stop keeping the file in RAM at all and |
421 | + # passing large buffers around. Perhaps only keep in RAM if |
422 | + # file_name == None? (used for getting directory listings |
423 | + # normally). |
424 | + self.body += buf |
425 | + |
426 | + def _write_header(self, buf): |
427 | + """Used by curl as a sink for header content""" |
428 | + self.header += buf |
429 | + |
430 | + def register_progress_callback(self, callback): |
431 | + self.curl.setopt(pycurl.NOPROGRESS, 0) |
432 | + self.curl.setopt(pycurl.PROGRESSFUNCTION, callback) |
433 | + |
434 | + def close(self): |
435 | + """Wrapper to close curl - this will allow curl to write out cookies""" |
436 | + self.curl.close() |
437 | + |
438 | +def main(): |
439 | + """Download file specified on command line""" |
440 | + parser = argparse.ArgumentParser(description="Download a file, accepting " |
441 | + "any licenses required to do so.") |
442 | + |
443 | + parser.add_argument('url', metavar="URL", type=str, nargs=1, |
444 | + help="URL of file to download.") |
445 | + |
446 | + args = parser.parse_args() |
447 | + |
448 | + fetcher = LicenseProtectedFileFetcher() |
449 | + |
450 | + # Get file name from URL |
451 | + file_name = os.path.basename(urlparse.urlparse(args.url[0]).path) |
452 | + if not file_name: |
453 | + file_name = "downloaded" |
454 | + fetcher.get(args.url[0], file_name) |
455 | + |
456 | + fetcher.close() |
457 | + |
458 | +if __name__ == "__main__": |
459 | + main() |
460 | |
461 | === modified file 'testing/test_click_through_license.py' |
462 | --- testing/test_click_through_license.py 2012-05-07 08:48:51 +0000 |
463 | +++ testing/test_click_through_license.py 2012-05-11 12:35:21 +0000 |
464 | @@ -9,7 +9,7 @@ |
465 | |
466 | from testtools import TestCase |
467 | from testtools.matchers import Mismatch |
468 | -from filefetcher import LicenseProtectedFileFetcher |
469 | +from license_protected_file_downloader import LicenseProtectedFileFetcher |
470 | |
471 | fetcher = LicenseProtectedFileFetcher() |
472 | cwd = os.getcwd() |
473 | @@ -145,19 +145,19 @@ |
474 | self.assertThat(testfile, Contains(search)) |
475 | |
476 | def test_redirect_to_license_samsung(self): |
477 | - search = "LICENSE AGREEMENT" |
478 | - testfile = fetcher.get(host + samsung_test_file, ignore_license=True) |
479 | - self.assertThat(testfile, Contains(search)) |
480 | + search = "PLEASE READ THE FOLLOWING AGREEMENT CAREFULLY" |
481 | + testfile = fetcher.get_or_return_license(host + samsung_test_file) |
482 | + self.assertThat(testfile[0], Contains(search)) |
483 | |
484 | def test_redirect_to_license_ste(self): |
485 | - search = "LICENSE AGREEMENT" |
486 | - testfile = fetcher.get(host + ste_test_file, ignore_license=True) |
487 | - self.assertThat(testfile, Contains(search)) |
488 | + search = "PLEASE READ THE FOLLOWING AGREEMENT CAREFULLY" |
489 | + testfile = fetcher.get_or_return_license(host + ste_test_file) |
490 | + self.assertThat(testfile[0], Contains(search)) |
491 | |
492 | def test_redirect_to_license_linaro(self): |
493 | - search = "LICENSE AGREEMENT" |
494 | - testfile = fetcher.get(host + linaro_test_file, ignore_license=True) |
495 | - self.assertThat(testfile, Contains(search)) |
496 | + search = "Linaro license." |
497 | + testfile = fetcher.get_or_return_license(host + linaro_test_file) |
498 | + self.assertThat(testfile[0], Contains(search)) |
499 | |
500 | def test_decline_license_samsung(self): |
501 | search = "License has not been accepted" |
502 | @@ -214,13 +214,13 @@ |
503 | def test_license_accepted_samsung(self): |
504 | search = "This is protected with click-through Samsung license." |
505 | os.rename("%s/cookies.samsung" % docroot, "%s/cookies.txt" % docroot) |
506 | - testfile = fetcher.get(host + samsung_test_file, ignore_license=True) |
507 | + testfile = fetcher.get(host + samsung_test_file) |
508 | self.assertThat(testfile, Contains(search)) |
509 | |
510 | def test_license_accepted_ste(self): |
511 | search = "This is protected with click-through ST-E license." |
512 | os.rename("%s/cookies.ste" % docroot, "%s/cookies.txt" % docroot) |
513 | - testfile = fetcher.get(host + ste_test_file, ignore_license=True) |
514 | + testfile = fetcher.get(host + ste_test_file) |
515 | self.assertThat(testfile, Contains(search)) |
516 | |
517 | def test_internal_host_samsung(self): |
Sorry guys, I accidentally did everything in one commit. I reverted file to James' version and pushed my new version again.