1
=== modified file '.htaccess'
2
--- .htaccess	2012-05-02 11:33:12 +0000
3
+++ .htaccess	2012-05-11 12:35:21 +0000
4
@@ -13,12 +13,12 @@
5
13
## without port number for use in cookie domain
13
## without port number for use in cookie domain
6
14
RewriteCond %{SERVER_PORT} !^80$ [OR]
14
RewriteCond %{SERVER_PORT} !^80$ [OR]
7
15
RewriteCond %{SERVER_PORT} !^443$
15
RewriteCond %{SERVER_PORT} !^443$
9
16
RewriteCond %{HTTP_HOST} (.*)(\:.*)
16
RewriteCond %{HTTP_HOST} ^([^:]*)$
10
17
RewriteRule .* - [E=CO_DOMAIN:%1]
17
RewriteRule .* - [E=CO_DOMAIN:%1]
11
18
18
12
19
RewriteCond %{SERVER_PORT} !^80$ [OR]
19
RewriteCond %{SERVER_PORT} !^80$ [OR]
13
20
RewriteCond %{SERVER_PORT} !^443$
20
RewriteCond %{SERVER_PORT} !^443$
15
21
RewriteCond %{HTTP_HOST} (^.*$)
21
RewriteCond %{HTTP_HOST} ^([^:]*):(.*)$
16
22
RewriteRule .* - [E=CO_DOMAIN:%1]
22
RewriteRule .* - [E=CO_DOMAIN:%1]
17
23
23
18
24
## Let internal hosts through always.
24
## Let internal hosts through always.
19
25
25
20
=== modified file 'README'
21
--- README	2012-05-08 19:51:41 +0000
22
+++ README	2012-05-11 12:35:21 +0000
23
@@ -15,6 +15,13 @@
24
15
15
25
16
Currently, all directories/files containing either 'origen' or 'snowball' in the URL path are protected with appropriate license (Samsung or ST-E) click-through.
16
Currently, all directories/files containing either 'origen' or 'snowball' in the URL path are protected with appropriate license (Samsung or ST-E) click-through.
26
17
17
27
18
Dependencies
28
19
............
29
20
30
21
libapache2-mod-php5
31
22
32
23
Testing: phpunit, testrepository, python-html2text
33
24
34
18
25
35
19
Technical details
26
Technical details
36
20
-----------------
27
-----------------
37
21
28
38
=== removed file 'testing/filefetcher.py'
39
--- testing/filefetcher.py	2012-01-13 11:48:16 +0000
40
+++ testing/filefetcher.py	1970-01-01 00:00:00 +0000
41
@@ -1,129 +0,0 @@
42
1
#!/usr/bin/env python
43
2
44
3
# Changes required to address EULA for the origen hwpacks
45
4
46
5
import argparse
47
6
import os
48
7
import pycurl
49
8
import re
50
9
import urlparse
51
10
52
11
53
12
class LicenseProtectedFileFetcher:
54
13
    """Fetch a file from the web that may be protected by a license redirect
55
14
56
15
    This is designed to run on snapshots.linaro.org. License HTML file are in
57
16
    the form:
58
17
59
18
    <vendor>.html has a link to <vendor>-accept.html
60
19
61
20
    If self.get is pointed at a file that has to go through one of these
62
21
    licenses, it should be able to automatically accept the license and
63
22
    download the file.
64
23
65
24
    Once a license has been accepted, it will be used for all following
66
25
    downloads.
67
26
68
27
    If self.close() is called before the object is deleted, cURL will store
69
28
    the license accept cookie to cookies.txt, so it can be used for later
70
29
    downloads.
71
30
72
31
    """
73
32
    def __init__(self):
74
33
        """Set up cURL"""
75
34
        self.curl = pycurl.Curl()
76
35
        self.curl.setopt(pycurl.FOLLOWLOCATION, 1)
77
36
        self.curl.setopt(pycurl.WRITEFUNCTION, self._write_body)
78
37
        self.curl.setopt(pycurl.HEADERFUNCTION, self._write_header)
79
38
        self.curl.setopt(pycurl.COOKIEFILE, "cookies.txt")
80
39
        self.curl.setopt(pycurl.COOKIEJAR, "cookies.txt")
81
40
82
41
    def _get(self, url):
83
42
        """Clear out header and body storage, fetch URL, filling them in."""
84
43
        self.curl.setopt(pycurl.URL, url)
85
44
86
45
        self.body = ""
87
46
        self.header = ""
88
47
89
48
        self.curl.perform()
90
49
91
50
    def get(self, url, ignore_license=False, accept_license=True):
92
51
        """Fetch the requested URL, ignoring license at all or
93
52
        accepting or declining licenses, returns file body.
94
53
95
54
        Fetches the file at url. If a redirect is encountered, it is
96
55
        expected to be to a license that has an accept or decline link.
97
56
        Follow that link, then download original file or nolicense notice.
98
57
99
58
        """
100
59
        self._get(url)
101
60
102
61
        if ignore_license:
103
62
            return self.body
104
63
105
64
        location = self._get_location()
106
65
        if location:
107
66
            # Off to the races - we have been redirected.
108
67
            # Expect to find a link to self.location with -accepted or
109
68
            # -declined inserted before the .html,
110
69
            # i.e. ste.html -> ste-accepted.html
111
70
112
71
            # Get the file from the URL (full path)
113
72
            file = urlparse.urlparse(location).path
114
73
115
74
            # Get the file without the rest of the path
116
75
            file = os.path.split(file)[-1]
117
76
118
77
            # Look for a link with accepted.html or declined.html
119
78
            # in the page name. Follow it.
120
79
            new_file = None
121
80
            for line in self.body.splitlines():
122
81
                if accept_license:
123
82
                    link_search = re.search("""href=.*?["'](.*?-accepted.html)""",
124
83
                                        line)
125
84
                else:
126
85
                    link_search = re.search("""href=.*?["'](.*?-declined.html)""",
127
86
                                        line)
128
87
                if link_search:
129
88
                    # Have found license decline URL!
130
89
                    new_file = link_search.group(1)
131
90
132
91
            if new_file:
133
92
                # accept or decline the license...
134
93
                next_url = re.sub(file, new_file, location)
135
94
                self._get(next_url)
136
95
137
96
                # The above get *should* take us to the file requested via
138
97
                # a redirect. If we manually need to follow that redirect,
139
98
                # do that now.
140
99
141
100
                if accept_license and self._get_location():
142
101
                    # If we haven't been redirected to our original file,
143
102
                    # we should be able to just download it now.
144
103
                    self._get(url)
145
104
146
105
        return self.body
147
106
148
107
    def _search_header(self, field):
149
108
        """Search header for the supplied field, return field / None"""
150
109
        for line in self.header.splitlines():
151
110
            search = re.search(field + ":\s+(.*?)$", line)
152
111
            if search:
153
112
                return search.group(1)
154
113
        return None
155
114
156
115
    def _get_location(self):
157
116
        """Return content of Location field in header / None"""
158
117
        return self._search_header("Location")
159
118
160
119
    def _write_body(self, buf):
161
120
        """Used by curl as a sink for body content"""
162
121
        self.body += buf
163
122
164
123
    def _write_header(self, buf):
165
124
        """Used by curl as a sink for header content"""
166
125
        self.header += buf
167
126
168
127
    def close(self):
169
128
        """Wrapper to close curl - this will allow curl to write out cookies"""
170
129
        self.curl.close()
171
130
0
172
=== added file 'testing/license_protected_file_downloader.py'
173
--- testing/license_protected_file_downloader.py	1970-01-01 00:00:00 +0000
174
+++ testing/license_protected_file_downloader.py	2012-05-11 12:35:21 +0000
175
@@ -0,0 +1,284 @@
176
1
#!/usr/bin/env python
177
2
178
3
import argparse
179
4
import os
180
5
import pycurl
181
6
import re
182
7
import urlparse
183
8
import html2text
184
9
from BeautifulSoup import BeautifulSoup
185
10
186
11
class LicenseProtectedFileFetcher:
187
12
    """Fetch a file from the web that may be protected by a license redirect
188
13
189
14
    This is designed to run on snapshots.linaro.org. License HTML file are in
190
15
    the form:
191
16
192
17
    <vendor>.html has a link to <vendor>-accept.html
193
18
194
19
    If self.get is pointed at a file that has to go through one of these
195
20
    licenses, it should be able to automatically accept the license and
196
21
    download the file.
197
22
198
23
    Once a license has been accepted, it will be used for all following
199
24
    downloads.
200
25
201
26
    If self.close() is called before the object is deleted, cURL will store
202
27
    the license accept cookie to cookies.txt, so it can be used for later
203
28
    downloads.
204
29
205
30
    """
206
31
    def __init__(self, cookie_file="cookies.txt"):
207
32
        """Set up cURL"""
208
33
        self.curl = pycurl.Curl()
209
34
        self.curl.setopt(pycurl.WRITEFUNCTION, self._write_body)
210
35
        self.curl.setopt(pycurl.HEADERFUNCTION, self._write_header)
211
36
        self.curl.setopt(pycurl.FOLLOWLOCATION, 1)
212
37
        self.curl.setopt(pycurl.COOKIEFILE, cookie_file)
213
38
        self.curl.setopt(pycurl.COOKIEJAR, cookie_file)
214
39
        self.file_out = None
215
40
216
41
    def _get(self, url):
217
42
        """Clear out header and body storage, fetch URL, filling them in."""
218
43
        url = url.encode("ascii")
219
44
        self.curl.setopt(pycurl.URL, url)
220
45
221
46
        self.body = ""
222
47
        self.header = ""
223
48
224
49
        if self.file_name:
225
50
            self.file_out = open(self.file_name, 'w')
226
51
        else:
227
52
            self.file_out = None
228
53
229
54
        self.curl.perform()
230
55
        self._parse_headers(url)
231
56
232
57
        if self.file_out:
233
58
            self.file_out.close()
234
59
235
60
    def _parse_headers(self, url):
236
61
        header = {}
237
62
        for line in self.header.splitlines():
238
63
            # Header lines typically are of the form thing: value...
239
64
            test_line = re.search("^(.*?)\s*:\s*(.*)$", line)
240
65
241
66
            if test_line:
242
67
                header[test_line.group(1)] = test_line.group(2)
243
68
244
69
        # The location attribute is sometimes relative, but we would
245
70
        # like to have it as always absolute...
246
71
        if 'Location' in header:
247
72
            parsed_location = urlparse.urlparse(header["Location"])
248
73
249
74
            # If not an absolute location...
250
75
            if not parsed_location.netloc:
251
76
                parsed_source_url = urlparse.urlparse(url)
252
77
                new_location = ["", "", "", "", ""]
253
78
254
79
                new_location[0] = parsed_source_url.scheme
255
80
                new_location[1] = parsed_source_url.netloc
256
81
                new_location[2] = header["Location"]
257
82
258
83
                # Update location with absolute URL
259
84
                header["Location"] = urlparse.urlunsplit(new_location)
260
85
261
86
        self.header_text = self.header
262
87
        self.header = header
263
88
264
89
    def get_headers(self, url):
265
90
        url = url.encode("ascii")
266
91
        self.curl.setopt(pycurl.URL, url)
267
92
268
93
        self.body = ""
269
94
        self.header = ""
270
95
271
96
        # Setting NOBODY causes CURL to just fetch the header.
272
97
        self.curl.setopt(pycurl.NOBODY, True)
273
98
        self.curl.perform()
274
99
        self.curl.setopt(pycurl.NOBODY, False)
275
100
276
101
        self._parse_headers(url)
277
102
278
103
        return self.header
279
104
280
105
    def get_or_return_license(self, url, file_name=None):
281
106
        """Get file at the requested URL or, if behind a license, return that.
282
107
283
108
        If the URL provided does not redirect us to a license, then return the
284
109
        body of that file. If we are redirected to a license click through
285
110
        then return (the license as plain text, url to accept the license).
286
111
287
112
        If the user of this function accepts the license, then they should
288
113
        call get_protected_file."""
289
114
290
115
        self.file_name = file_name
291
116
292
117
        # Get the license details. If this returns None, the file isn't license
293
118
        # protected and we can just return the file we started to get in the
294
119
        # function (self.body).
295
120
        license_details = self._get_license(url)
296
121
297
122
        if license_details:
298
123
            return license_details
299
124
300
125
        return self.body
301
126
302
127
    def get(self, url, file_name=None, ignore_license=False, accept_license=True):
303
128
        """Fetch the requested URL, accepting licenses
304
129
305
130
        Fetches the file at url. If a redirect is encountered, it is
306
131
        expected to be to a license that has an accept link. Follow that link,
307
132
        then download the original file. Returns the fist 1MB of the file
308
133
        (see _write_body).
309
134
310
135
        """
311
136
312
137
        self.file_name = file_name
313
138
        if ignore_license:
314
139
            self._get(url)
315
140
            return self.body
316
141
317
142
        license_details = self._get_license(url)
318
143
319
144
        if license_details:
320
145
            # Found a license.
321
146
            if accept_license:
322
147
                # Accept the license without looking at it and
323
148
                # start fetching the file we originally wanted.
324
149
                accept_url = license_details[1]
325
150
                self.get_protected_file(accept_url, url)
326
151
            else:
327
152
                # We want to decline the license and return the notice.
328
153
                decline_url = license_details[2]
329
154
                self._get(decline_url)
330
155
331
156
        else:
332
157
            # If we got here, there wasn't a license protecting the file
333
158
            # so we just fetch it.
334
159
            self._get(url)
335
160
336
161
        return self.body
337
162
338
163
    def _get_license(self, url):
339
164
        """Return (license, accept URL, decline URL) if found,
340
165
        else return None.
341
166
342
167
        """
343
168
344
169
        self.get_headers(url)
345
170
346
171
        if "Location" in self.header and self.header["Location"] != url:
347
172
            # We have been redirected to a new location - the license file
348
173
            location = self.header["Location"]
349
174
350
175
            # Fetch the license HTML
351
176
            self._get(location)
352
177
353
178
            # Get the file from the URL (full path)
354
179
            file = urlparse.urlparse(location).path
355
180
356
181
            # Get the file without the rest of the path
357
182
            file = os.path.split(file)[-1]
358
183
359
184
            # Look for a link with accepted.html in the page name. Follow it.
360
185
            accept_search, decline_search = None, None
361
186
            for line in self.body.splitlines():
362
187
                if not accept_search:
363
188
                    accept_search = re.search(
364
189
                    """href=.*?["'](.*?-accepted.html)""",
365
190
                    line)
366
191
                if not decline_search:
367
192
                    decline_search = re.search(
368
193
                    """href=.*?["'](.*?-declined.html)""",
369
194
                    line)
370
195
371
196
            if accept_search and decline_search:
372
197
                # Have found license accept URL!
373
198
                new_file = accept_search.group(1)
374
199
                accept_url = re.sub(file, new_file, location)
375
200
376
201
                # Found decline URL as well.
377
202
                new_file_decline = decline_search.group(1)
378
203
                decline_url = re.sub(file, new_file_decline, location)
379
204
380
205
                # Parse the HTML using BeautifulSoup
381
206
                soup = BeautifulSoup(self.body)
382
207
383
208
                # The license is in a div with the ID license-text, so we
384
209
                # use this to pull just the license out of the HTML.
385
210
                html_license = u""
386
211
                for chunk in soup.findAll(id="license-text"):
387
212
                    # Output of chunk.prettify is UTF8, but comes back
388
213
                    # as a str, so convert it here.
389
214
                    html_license += chunk.prettify().decode("utf-8")
390
215
391
216
                text_license = html2text.html2text(html_license)
392
217
393
218
                return text_license, accept_url, decline_url
394
219
395
220
        return None
396
221
397
222
    def get_protected_file(self, accept_url, url):
398
223
        """Gets the file redirected to by the accept_url"""
399
224
400
225
        self._get(accept_url)  # Accept the license
401
226
402
227
        if not("Location" in self.header and self.header["Location"] == url):
403
228
            # If we got here, we don't have the file yet (weren't redirected
404
229
            # to it). Fetch our target file. This should work now that we have
405
230
            # the right cookie.
406
231
            self._get(url)  # Download the target file
407
232
408
233
        return self.body
409
234
410
235
    def _write_body(self, buf):
411
236
        """Used by curl as a sink for body content"""
412
237
413
238
        # If we have a target file to write to, write to it
414
239
        if self.file_out:
415
240
            self.file_out.write(buf)
416
241
417
242
        # Only buffer first 1MB of body. This should be plenty for anything
418
243
        # we wish to parse internally.
419
244
        if len(self.body) < 1024*1024*1024:
420
245
            # XXX Would be nice to stop keeping the file in RAM at all and
421
246
            # passing large buffers around. Perhaps only keep in RAM if
422
247
            # file_name == None? (used for getting directory listings
423
248
            # normally).
424
249
            self.body += buf
425
250
426
251
    def _write_header(self, buf):
427
252
        """Used by curl as a sink for header content"""
428
253
        self.header += buf
429
254
430
255
    def register_progress_callback(self, callback):
431
256
        self.curl.setopt(pycurl.NOPROGRESS, 0)
432
257
        self.curl.setopt(pycurl.PROGRESSFUNCTION, callback)
433
258
434
259
    def close(self):
435
260
        """Wrapper to close curl - this will allow curl to write out cookies"""
436
261
        self.curl.close()
437
262
438
263
def main():
439
264
    """Download file specified on command line"""
440
265
    parser = argparse.ArgumentParser(description="Download a file, accepting "
441
266
                                    "any licenses required to do so.")
442
267
443
268
    parser.add_argument('url', metavar="URL", type=str, nargs=1,
444
269
                        help="URL of file to download.")
445
270
446
271
    args = parser.parse_args()
447
272
448
273
    fetcher = LicenseProtectedFileFetcher()
449
274
450
275
    # Get file name from URL
451
276
    file_name = os.path.basename(urlparse.urlparse(args.url[0]).path)
452
277
    if not file_name:
453
278
        file_name = "downloaded"
454
279
    fetcher.get(args.url[0], file_name)
455
280
456
281
    fetcher.close()
457
282
458
283
if __name__ == "__main__":
459
284
    main()
460
0
285
461
=== modified file 'testing/test_click_through_license.py'
462
--- testing/test_click_through_license.py	2012-05-07 08:48:51 +0000
463
+++ testing/test_click_through_license.py	2012-05-11 12:35:21 +0000
464
@@ -9,7 +9,7 @@
465
9
9
466
10
from testtools import TestCase
10
from testtools import TestCase
467
11
from testtools.matchers import Mismatch
11
from testtools.matchers import Mismatch
469
12
from filefetcher import LicenseProtectedFileFetcher
12
from license_protected_file_downloader import LicenseProtectedFileFetcher
470
13
13
471
14
fetcher = LicenseProtectedFileFetcher()
14
fetcher = LicenseProtectedFileFetcher()
472
15
cwd = os.getcwd()
15
cwd = os.getcwd()
473
@@ -145,19 +145,19 @@
474
145
        self.assertThat(testfile, Contains(search))
145
        self.assertThat(testfile, Contains(search))
475
146
146
476
147
    def test_redirect_to_license_samsung(self):
147
    def test_redirect_to_license_samsung(self):
480
148
        search = "LICENSE AGREEMENT"
148
        search = "PLEASE READ THE FOLLOWING AGREEMENT CAREFULLY"
481
149
        testfile = fetcher.get(host + samsung_test_file, ignore_license=True)
149
        testfile = fetcher.get_or_return_license(host + samsung_test_file)
482
150
        self.assertThat(testfile, Contains(search))
150
        self.assertThat(testfile[0], Contains(search))
483
151
151
484
152
    def test_redirect_to_license_ste(self):
152
    def test_redirect_to_license_ste(self):
488
153
        search = "LICENSE AGREEMENT"
153
        search = "PLEASE READ THE FOLLOWING AGREEMENT CAREFULLY"
489
154
        testfile = fetcher.get(host + ste_test_file, ignore_license=True)
154
        testfile = fetcher.get_or_return_license(host + ste_test_file)
490
155
        self.assertThat(testfile, Contains(search))
155
        self.assertThat(testfile[0], Contains(search))
491
156
156
492
157
    def test_redirect_to_license_linaro(self):
157
    def test_redirect_to_license_linaro(self):
496
158
        search = "LICENSE AGREEMENT"
158
        search = "Linaro license."
497
159
        testfile = fetcher.get(host + linaro_test_file, ignore_license=True)
159
        testfile = fetcher.get_or_return_license(host + linaro_test_file)
498
160
        self.assertThat(testfile, Contains(search))
160
        self.assertThat(testfile[0], Contains(search))
499
161
161
500
162
    def test_decline_license_samsung(self):
162
    def test_decline_license_samsung(self):
501
163
        search = "License has not been accepted"
163
        search = "License has not been accepted"
502
@@ -214,13 +214,13 @@
503
214
    def test_license_accepted_samsung(self):
214
    def test_license_accepted_samsung(self):
504
215
        search = "This is protected with click-through Samsung license."
215
        search = "This is protected with click-through Samsung license."
505
216
        os.rename("%s/cookies.samsung" % docroot, "%s/cookies.txt" % docroot)
216
        os.rename("%s/cookies.samsung" % docroot, "%s/cookies.txt" % docroot)
507
217
        testfile = fetcher.get(host + samsung_test_file, ignore_license=True)
217
        testfile = fetcher.get(host + samsung_test_file)
508
218
        self.assertThat(testfile, Contains(search))
218
        self.assertThat(testfile, Contains(search))
509
219
219
510
220
    def test_license_accepted_ste(self):
220
    def test_license_accepted_ste(self):
511
221
        search = "This is protected with click-through ST-E license."
221
        search = "This is protected with click-through ST-E license."
512
222
        os.rename("%s/cookies.ste" % docroot, "%s/cookies.txt" % docroot)
222
        os.rename("%s/cookies.ste" % docroot, "%s/cookies.txt" % docroot)
514
223
        testfile = fetcher.get(host + ste_test_file, ignore_license=True)
223
        testfile = fetcher.get(host + ste_test_file)
515
224
        self.assertThat(testfile, Contains(search))
224
        self.assertThat(testfile, Contains(search))
516
225
225
517
226
    def test_internal_host_samsung(self):
226
    def test_internal_host_samsung(self):
Reviewer	Review Type	Date Requested	Status
James Tunnicliffe (community)		2012-05-09	Approve on 2012-05-11
Данило Шеган	code	2012-05-09	Pending
Review via email: mp+105209@code.launchpad.net