Merge lp:~jelmer/brz/split-http into lp:brz/3.1
- split-http
- Merge into 3.1
Proposed by
Jelmer Vernooij
Status: | Merged |
---|---|
Approved by: | Jelmer Vernooij |
Approved revision: | no longer in the source branch. |
Merge reported by: | The Breezy Bot |
Merged at revision: | not available |
Proposed branch: | lp:~jelmer/brz/split-http |
Merge into: | lp:brz/3.1 |
Diff against target: |
5544 lines (+2668/-2624) 11 files modified
breezy/bzr/tests/test_bzrdir.py (+1/-1) breezy/bzr/tests/test_smart_transport.py (+2/-2) breezy/plugins/fossil/__init__.py (+1/-1) breezy/tests/__init__.py (+1/-1) breezy/tests/test_http.py (+24/-17) breezy/tests/test_http_response.py (+3/-3) breezy/tests/test_selftest.py (+1/-1) breezy/tests/test_transport.py (+2/-1) breezy/transport/__init__.py (+4/-4) breezy/transport/http/__init__.py (+3/-2593) breezy/transport/http/urllib.py (+2626/-0) |
To merge this branch: | bzr merge lp:~jelmer/brz/split-http |
Related bugs: |
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
Martin Packman | Approve | ||
Review via email: mp+395620@code.launchpad.net |
Commit message
Description of the change
Move urllib-specific bits into a separate module.
To post a comment you must log in.
Revision history for this message
The Breezy Bot (the-breezy-bot) wrote : | # |
Running landing tests failed
https:/
Revision history for this message
The Breezy Bot (the-breezy-bot) wrote : | # |
Running landing tests failed
https:/
Preview Diff
[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1 | === modified file 'breezy/bzr/tests/test_bzrdir.py' |
2 | --- breezy/bzr/tests/test_bzrdir.py 2020-07-26 15:29:07 +0000 |
3 | +++ breezy/bzr/tests/test_bzrdir.py 2020-12-27 18:07:47 +0000 |
4 | @@ -68,7 +68,7 @@ |
5 | memory, |
6 | pathfilter, |
7 | ) |
8 | -from ...transport.http import HttpTransport |
9 | +from ...transport.http.urllib import HttpTransport |
10 | from ...transport.nosmart import NoSmartTransportDecorator |
11 | from ...transport.readonly import ReadonlyTransportDecorator |
12 | from .. import knitrepo, knitpack_repo |
13 | |
14 | === modified file 'breezy/bzr/tests/test_smart_transport.py' |
15 | --- breezy/bzr/tests/test_smart_transport.py 2020-06-10 02:56:53 +0000 |
16 | +++ breezy/bzr/tests/test_smart_transport.py 2020-12-27 18:07:47 +0000 |
17 | @@ -59,12 +59,12 @@ |
18 | int2byte, |
19 | ) |
20 | from ...transport import ( |
21 | - http, |
22 | local, |
23 | memory, |
24 | remote, |
25 | ssh, |
26 | ) |
27 | +from ...transport.http import urllib |
28 | from . import ( |
29 | test_smart, |
30 | ) |
31 | @@ -4276,7 +4276,7 @@ |
32 | |
33 | def test_smart_http_medium_request_accept_bytes(self): |
34 | medium = FakeHTTPMedium() |
35 | - request = http.SmartClientHTTPMediumRequest(medium) |
36 | + request = urllib.SmartClientHTTPMediumRequest(medium) |
37 | request.accept_bytes(b'abc') |
38 | request.accept_bytes(b'def') |
39 | self.assertEqual(None, medium.written_request) |
40 | |
41 | === modified file 'breezy/plugins/fossil/__init__.py' |
42 | --- breezy/plugins/fossil/__init__.py 2020-07-29 22:12:53 +0000 |
43 | +++ breezy/plugins/fossil/__init__.py 2020-12-27 18:07:47 +0000 |
44 | @@ -71,7 +71,7 @@ |
45 | |
46 | @classmethod |
47 | def probe_transport(klass, transport): |
48 | - from breezy.transport.http import HttpTransport |
49 | + from breezy.transport.http.urllib import HttpTransport |
50 | if not isinstance(transport, HttpTransport): |
51 | raise errors.NotBranchError(path=transport.base) |
52 | response = transport.request( |
53 | |
54 | === modified file 'breezy/tests/__init__.py' |
55 | --- breezy/tests/__init__.py 2020-07-28 01:28:20 +0000 |
56 | +++ breezy/tests/__init__.py 2020-12-27 18:07:47 +0000 |
57 | @@ -4208,7 +4208,7 @@ |
58 | 'breezy.tests', |
59 | 'breezy.tests.fixtures', |
60 | 'breezy.timestamp', |
61 | - 'breezy.transport.http', |
62 | + 'breezy.transport.http.urllib', |
63 | 'breezy.version_info_formats.format_custom', |
64 | ] |
65 | |
66 | |
67 | === modified file 'breezy/tests/test_http.py' |
68 | --- breezy/tests/test_http.py 2019-05-19 22:33:19 +0000 |
69 | +++ breezy/tests/test_http.py 2020-12-27 18:07:47 +0000 |
70 | @@ -62,11 +62,18 @@ |
71 | multiply_scenarios, |
72 | ) |
73 | from ..transport import ( |
74 | - http, |
75 | remote, |
76 | ) |
77 | -from ..transport.http import ( |
78 | +from ..transport.http import urllib |
79 | +from ..transport.http.urllib import ( |
80 | + AbstractAuthHandler, |
81 | + BasicAuthHandler, |
82 | HttpTransport, |
83 | + HTTPAuthHandler, |
84 | + HTTPConnection, |
85 | + HTTPSConnection, |
86 | + ProxyHandler, |
87 | + Request, |
88 | ) |
89 | |
90 | |
91 | @@ -225,7 +232,7 @@ |
92 | |
93 | def parse_header(self, header, auth_handler_class=None): |
94 | if auth_handler_class is None: |
95 | - auth_handler_class = http.AbstractAuthHandler |
96 | + auth_handler_class = AbstractAuthHandler |
97 | self.auth_handler = auth_handler_class() |
98 | return self.auth_handler._parse_auth_header(header) |
99 | |
100 | @@ -246,7 +253,7 @@ |
101 | self.assertEqual('realm="Thou should not pass"', remainder) |
102 | |
103 | def test_build_basic_header_with_long_creds(self): |
104 | - handler = http.BasicAuthHandler() |
105 | + handler = BasicAuthHandler() |
106 | user = 'user' * 10 # length 40 |
107 | password = 'password' * 5 # length 40 |
108 | header = handler.build_auth_header( |
109 | @@ -258,7 +265,7 @@ |
110 | def test_basic_extract_realm(self): |
111 | scheme, remainder = self.parse_header( |
112 | 'Basic realm="Thou should not pass"', |
113 | - http.BasicAuthHandler) |
114 | + BasicAuthHandler) |
115 | match, realm = self.auth_handler.extract_realm(remainder) |
116 | self.assertTrue(match is not None) |
117 | self.assertEqual(u'Thou should not pass', realm) |
118 | @@ -514,7 +521,7 @@ |
119 | offsets = [(start, end - start + 1) for start, end in ranges] |
120 | coalesce = transport.Transport._coalesce_offsets |
121 | coalesced = list(coalesce(offsets, limit=0, fudge_factor=0)) |
122 | - range_header = http.HttpTransport._range_header |
123 | + range_header = HttpTransport._range_header |
124 | self.assertEqual(value, range_header(coalesced, tail)) |
125 | |
126 | def test_range_header_single(self): |
127 | @@ -1129,13 +1136,13 @@ |
128 | """ |
129 | |
130 | def _proxied_request(self): |
131 | - handler = http.ProxyHandler() |
132 | - request = http.Request('GET', 'http://baz/buzzle') |
133 | + handler = ProxyHandler() |
134 | + request = Request('GET', 'http://baz/buzzle') |
135 | handler.set_proxy(request, 'http') |
136 | return request |
137 | |
138 | def assertEvaluateProxyBypass(self, expected, host, no_proxy): |
139 | - handler = http.ProxyHandler() |
140 | + handler = ProxyHandler() |
141 | self.assertEqual(expected, |
142 | handler.evaluate_proxy_bypass(host, no_proxy)) |
143 | |
144 | @@ -1327,24 +1334,24 @@ |
145 | self.get_new_transport().get('a').read()) |
146 | |
147 | |
148 | -class RedirectedRequest(http.Request): |
149 | +class RedirectedRequest(Request): |
150 | """Request following redirections. """ |
151 | |
152 | - init_orig = http.Request.__init__ |
153 | + init_orig = Request.__init__ |
154 | |
155 | def __init__(self, method, url, *args, **kwargs): |
156 | """Constructor. |
157 | |
158 | """ |
159 | # Since the tests using this class will replace |
160 | - # http.Request, we can't just call the base class __init__ |
161 | + # Request, we can't just call the base class __init__ |
162 | # or we'll loop. |
163 | RedirectedRequest.init_orig(self, method, url, *args, **kwargs) |
164 | self.follow_redirections = True |
165 | |
166 | |
167 | def install_redirected_request(test): |
168 | - test.overrideAttr(http, 'Request', RedirectedRequest) |
169 | + test.overrideAttr(urllib, 'Request', RedirectedRequest) |
170 | |
171 | |
172 | def cleanup_http_redirection_connections(test): |
173 | @@ -1361,13 +1368,13 @@ |
174 | test.http_connect_orig(connection) |
175 | test.addCleanup(socket_disconnect, connection.sock) |
176 | test.http_connect_orig = test.overrideAttr( |
177 | - http.HTTPConnection, 'connect', connect) |
178 | + HTTPConnection, 'connect', connect) |
179 | |
180 | def connect(connection): |
181 | test.https_connect_orig(connection) |
182 | test.addCleanup(socket_disconnect, connection.sock) |
183 | test.https_connect_orig = test.overrideAttr( |
184 | - http.HTTPSConnection, 'connect', connect) |
185 | + HTTPSConnection, 'connect', connect) |
186 | |
187 | |
188 | class TestHTTPSilentRedirections(http_utils.TestCaseWithRedirectedWebserver): |
189 | @@ -1375,7 +1382,7 @@ |
190 | |
191 | http implementations do not redirect silently anymore (they |
192 | do not redirect at all in fact). The mechanism is still in |
193 | - place at the http.Request level and these tests |
194 | + place at the Request level and these tests |
195 | exercise it. |
196 | """ |
197 | |
198 | @@ -1499,7 +1506,7 @@ |
199 | password = 'foo' |
200 | _setup_authentication_config(scheme='http', host='localhost', |
201 | user=user, password=password) |
202 | - handler = http.HTTPAuthHandler() |
203 | + handler = HTTPAuthHandler() |
204 | got_pass = handler.get_user_password(dict( |
205 | user='joe', |
206 | protocol='http', |
207 | |
208 | === modified file 'breezy/tests/test_http_response.py' |
209 | --- breezy/tests/test_http_response.py 2020-06-09 17:24:27 +0000 |
210 | +++ breezy/tests/test_http_response.py 2020-12-27 18:07:47 +0000 |
211 | @@ -57,7 +57,7 @@ |
212 | ) |
213 | from ..transport.http import ( |
214 | response, |
215 | - HTTPConnection, |
216 | + urllib, |
217 | ) |
218 | from .file_utils import ( |
219 | FakeReadFile, |
220 | @@ -74,10 +74,10 @@ |
221 | return self.readfile |
222 | |
223 | |
224 | -class FakeHTTPConnection(HTTPConnection): |
225 | +class FakeHTTPConnection(urllib.HTTPConnection): |
226 | |
227 | def __init__(self, sock): |
228 | - HTTPConnection.__init__(self, 'localhost') |
229 | + urllib.HTTPConnection.__init__(self, 'localhost') |
230 | # Set the socket to bypass the connection |
231 | self.sock = sock |
232 | |
233 | |
234 | === modified file 'breezy/tests/test_selftest.py' |
235 | --- breezy/tests/test_selftest.py 2020-06-21 02:15:25 +0000 |
236 | +++ breezy/tests/test_selftest.py 2020-12-27 18:07:47 +0000 |
237 | @@ -692,7 +692,7 @@ |
238 | |
239 | def test_get_readonly_url_http(self): |
240 | from .http_server import HttpServer |
241 | - from ..transport.http import HttpTransport |
242 | + from ..transport.http.urllib import HttpTransport |
243 | self.transport_server = test_server.LocalURLServer |
244 | self.transport_readonly_server = HttpServer |
245 | # calling get_readonly_transport() gives us a HTTP server instance. |
246 | |
247 | === modified file 'breezy/tests/test_transport.py' |
248 | --- breezy/tests/test_transport.py 2019-03-03 19:49:20 +0000 |
249 | +++ breezy/tests/test_transport.py 2020-12-27 18:07:47 +0000 |
250 | @@ -40,6 +40,7 @@ |
251 | pathfilter, |
252 | readonly, |
253 | ) |
254 | +from ..transport.http import urllib |
255 | import breezy.transport.trace |
256 | from . import ( |
257 | features, |
258 | @@ -1104,6 +1105,6 @@ |
259 | |
260 | def test_truncation(self): |
261 | fake_html = "<p>something!\n" * 1000 |
262 | - result = http.unhtml_roughly(fake_html) |
263 | + result = urllib.unhtml_roughly(fake_html) |
264 | self.assertEqual(len(result), 1000) |
265 | self.assertStartsWith(result, " something!") |
266 | |
267 | === modified file 'breezy/transport/__init__.py' |
268 | --- breezy/transport/__init__.py 2020-06-21 02:15:25 +0000 |
269 | +++ breezy/transport/__init__.py 2020-12-27 18:07:47 +0000 |
270 | @@ -1661,12 +1661,12 @@ |
271 | register_transport_proto('http+urllib://', |
272 | # help="Read-only access of branches exported on the web." |
273 | register_netloc=True) |
274 | -register_lazy_transport('http+urllib://', 'breezy.transport.http', |
275 | +register_lazy_transport('http+urllib://', 'breezy.transport.http.urllib', |
276 | 'HttpTransport') |
277 | register_transport_proto('https+urllib://', |
278 | # help="Read-only access of branches exported on the web using SSL." |
279 | register_netloc=True) |
280 | -register_lazy_transport('https+urllib://', 'breezy.transport.http', |
281 | +register_lazy_transport('https+urllib://', 'breezy.transport.http.urllib', |
282 | 'HttpTransport') |
283 | # Default http transports (last declared wins (if it can be imported)) |
284 | register_transport_proto('http://', |
285 | @@ -1674,9 +1674,9 @@ |
286 | register_transport_proto('https://', |
287 | help="Read-only access of branches exported on the web using SSL.") |
288 | # The default http implementation is urllib |
289 | -register_lazy_transport('http://', 'breezy.transport.http', |
290 | +register_lazy_transport('http://', 'breezy.transport.http.urllib', |
291 | 'HttpTransport') |
292 | -register_lazy_transport('https://', 'breezy.transport.http', |
293 | +register_lazy_transport('https://', 'breezy.transport.http.urllib', |
294 | 'HttpTransport') |
295 | |
296 | register_transport_proto( |
297 | |
298 | === modified file 'breezy/transport/http/__init__.py' |
299 | --- breezy/transport/http/__init__.py 2020-07-30 21:37:51 +0000 |
300 | +++ breezy/transport/http/__init__.py 2020-12-27 18:07:47 +0000 |
301 | @@ -21,67 +21,15 @@ |
302 | |
303 | from __future__ import absolute_import |
304 | |
305 | -DEBUG = 0 |
306 | |
307 | -import base64 |
308 | -import cgi |
309 | -import errno |
310 | import os |
311 | -import re |
312 | -import socket |
313 | import ssl |
314 | import sys |
315 | -import time |
316 | -import urllib |
317 | -import weakref |
318 | - |
319 | -try: |
320 | - import http.client as http_client |
321 | -except ImportError: |
322 | - import httplib as http_client |
323 | -try: |
324 | - import urllib.request as urllib_request |
325 | -except ImportError: # python < 3 |
326 | - import urllib2 as urllib_request |
327 | -try: |
328 | - from urllib.parse import urljoin, splitport, splittype, splithost, urlencode |
329 | -except ImportError: |
330 | - from urlparse import urljoin |
331 | - from urllib import splitport, splittype, splithost, urlencode |
332 | - |
333 | -# TODO: handle_response should be integrated into the http/__init__.py |
334 | -from .response import handle_response |
335 | - |
336 | -# FIXME: Oversimplifying, two kind of exceptions should be |
337 | -# raised, once a request is issued: URLError before we have been |
338 | -# able to process the response, HTTPError after that. Process the |
339 | -# response means we are able to leave the socket clean, so if we |
340 | -# are not able to do that, we should close the connection. The |
341 | -# actual code more or less do that, tests should be written to |
342 | -# ensure that. |
343 | - |
344 | -from ... import __version__ as breezy_version |
345 | + |
346 | + |
347 | from ... import ( |
348 | + version_string as breezy_version, |
349 | config, |
350 | - debug, |
351 | - errors, |
352 | - lazy_import, |
353 | - osutils, |
354 | - trace, |
355 | - transport, |
356 | - ui, |
357 | - urlutils, |
358 | -) |
359 | -from ...bzr.smart import medium |
360 | -from ...sixish import ( |
361 | - PY3, |
362 | - reraise, |
363 | - text_type, |
364 | -) |
365 | -from ...trace import mutter |
366 | -from ...transport import ( |
367 | - ConnectedTransport, |
368 | - UnusableRedirect, |
369 | ) |
370 | |
371 | |
372 | @@ -181,2541 +129,3 @@ |
373 | * none: Certificates ignored |
374 | * required: Certificates required and validated |
375 | """) |
376 | - |
377 | -checked_kerberos = False |
378 | -kerberos = None |
379 | - |
380 | - |
381 | -class addinfourl(urllib_request.addinfourl): |
382 | - '''Replacement addinfourl class compatible with python-2.7's xmlrpclib |
383 | - |
384 | - In python-2.7, xmlrpclib expects that the response object that it receives |
385 | - has a getheader method. http_client.HTTPResponse provides this but |
386 | - urllib_request.addinfourl does not. Add the necessary functions here, ported to |
387 | - use the internal data structures of addinfourl. |
388 | - ''' |
389 | - |
390 | - def getheader(self, name, default=None): |
391 | - if self.headers is None: |
392 | - raise http_client.ResponseNotReady() |
393 | - return self.headers.getheader(name, default) |
394 | - |
395 | - def getheaders(self): |
396 | - if self.headers is None: |
397 | - raise http_client.ResponseNotReady() |
398 | - return list(self.headers.items()) |
399 | - |
400 | - |
401 | -class _ReportingFileSocket(object): |
402 | - |
403 | - def __init__(self, filesock, report_activity=None): |
404 | - self.filesock = filesock |
405 | - self._report_activity = report_activity |
406 | - |
407 | - def report_activity(self, size, direction): |
408 | - if self._report_activity: |
409 | - self._report_activity(size, direction) |
410 | - |
411 | - def read(self, size=1): |
412 | - s = self.filesock.read(size) |
413 | - self.report_activity(len(s), 'read') |
414 | - return s |
415 | - |
416 | - def readline(self, size=-1): |
417 | - s = self.filesock.readline(size) |
418 | - self.report_activity(len(s), 'read') |
419 | - return s |
420 | - |
421 | - def readinto(self, b): |
422 | - s = self.filesock.readinto(b) |
423 | - self.report_activity(s, 'read') |
424 | - return s |
425 | - |
426 | - def __getattr__(self, name): |
427 | - return getattr(self.filesock, name) |
428 | - |
429 | - |
430 | -class _ReportingSocket(object): |
431 | - |
432 | - def __init__(self, sock, report_activity=None): |
433 | - self.sock = sock |
434 | - self._report_activity = report_activity |
435 | - |
436 | - def report_activity(self, size, direction): |
437 | - if self._report_activity: |
438 | - self._report_activity(size, direction) |
439 | - |
440 | - def sendall(self, s, *args): |
441 | - self.sock.sendall(s, *args) |
442 | - self.report_activity(len(s), 'write') |
443 | - |
444 | - def recv(self, *args): |
445 | - s = self.sock.recv(*args) |
446 | - self.report_activity(len(s), 'read') |
447 | - return s |
448 | - |
449 | - def makefile(self, mode='r', bufsize=-1): |
450 | - # http_client creates a fileobject that doesn't do buffering, which |
451 | - # makes fp.readline() very expensive because it only reads one byte |
452 | - # at a time. So we wrap the socket in an object that forces |
453 | - # sock.makefile to make a buffered file. |
454 | - fsock = self.sock.makefile(mode, 65536) |
455 | - # And wrap that into a reporting kind of fileobject |
456 | - return _ReportingFileSocket(fsock, self._report_activity) |
457 | - |
458 | - def __getattr__(self, name): |
459 | - return getattr(self.sock, name) |
460 | - |
461 | - |
462 | -# We define our own Response class to keep our http_client pipe clean |
463 | -class Response(http_client.HTTPResponse): |
464 | - """Custom HTTPResponse, to avoid the need to decorate. |
465 | - |
466 | - http_client prefers to decorate the returned objects, rather |
467 | - than using a custom object. |
468 | - """ |
469 | - |
470 | - # Some responses have bodies in which we have no interest |
471 | - _body_ignored_responses = [301, 302, 303, 307, 308, 400, 401, 403, 404, 501] |
472 | - |
473 | - # in finish() below, we may have to discard several MB in the worst |
474 | - # case. To avoid buffering that much, we read and discard by chunks |
475 | - # instead. The underlying file is either a socket or a StringIO, so reading |
476 | - # 8k chunks should be fine. |
477 | - _discarded_buf_size = 8192 |
478 | - |
479 | - if PY3: |
480 | - def __init__(self, sock, debuglevel=0, method=None, url=None): |
481 | - self.url = url |
482 | - super(Response, self).__init__( |
483 | - sock, debuglevel=debuglevel, method=method, url=url) |
484 | - |
485 | - def begin(self): |
486 | - """Begin to read the response from the server. |
487 | - |
488 | - http_client assumes that some responses get no content and do |
489 | - not even attempt to read the body in that case, leaving |
490 | - the body in the socket, blocking the next request. Let's |
491 | - try to workaround that. |
492 | - """ |
493 | - http_client.HTTPResponse.begin(self) |
494 | - if self.status in self._body_ignored_responses: |
495 | - if self.debuglevel >= 2: |
496 | - print("For status: [%s], will ready body, length: %s" % ( |
497 | - self.status, self.length)) |
498 | - if not (self.length is None or self.will_close): |
499 | - # In some cases, we just can't read the body not |
500 | - # even try or we may encounter a 104, 'Connection |
501 | - # reset by peer' error if there is indeed no body |
502 | - # and the server closed the connection just after |
503 | - # having issued the response headers (even if the |
504 | - # headers indicate a Content-Type...) |
505 | - body = self.read(self.length) |
506 | - if self.debuglevel >= 9: |
507 | - # This one can be huge and is generally not interesting |
508 | - print("Consumed body: [%s]" % body) |
509 | - self.close() |
510 | - elif self.status == 200: |
511 | - # Whatever the request is, it went ok, so we surely don't want to |
512 | - # close the connection. Some cases are not correctly detected by |
513 | - # http_client.HTTPConnection.getresponse (called by |
514 | - # http_client.HTTPResponse.begin). The CONNECT response for the https |
515 | - # through proxy case is one. Note: the 'will_close' below refers |
516 | - # to the "true" socket between us and the server, whereas the |
517 | - # 'close()' above refers to the copy of that socket created by |
518 | - # http_client for the response itself. So, in the if above we close the |
519 | - # socket to indicate that we are done with the response whereas |
520 | - # below we keep the socket with the server opened. |
521 | - self.will_close = False |
522 | - |
523 | - def finish(self): |
524 | - """Finish reading the body. |
525 | - |
526 | - In some cases, the client may have left some bytes to read in the |
527 | - body. That will block the next request to succeed if we use a |
528 | - persistent connection. If we don't use a persistent connection, well, |
529 | - nothing will block the next request since a new connection will be |
530 | - issued anyway. |
531 | - |
532 | - :return: the number of bytes left on the socket (may be None) |
533 | - """ |
534 | - pending = None |
535 | - if not self.isclosed(): |
536 | - # Make sure nothing was left to be read on the socket |
537 | - pending = 0 |
538 | - data = True |
539 | - while data and self.length: |
540 | - # read() will update self.length |
541 | - data = self.read(min(self.length, self._discarded_buf_size)) |
542 | - pending += len(data) |
543 | - if pending: |
544 | - trace.mutter("%s bytes left on the HTTP socket", pending) |
545 | - self.close() |
546 | - return pending |
547 | - |
548 | - |
549 | -# Not inheriting from 'object' because http_client.HTTPConnection doesn't. |
550 | -class AbstractHTTPConnection: |
551 | - """A custom HTTP(S) Connection, which can reset itself on a bad response""" |
552 | - |
553 | - response_class = Response |
554 | - |
555 | - # When we detect a server responding with the whole file to range requests, |
556 | - # we want to warn. But not below a given thresold. |
557 | - _range_warning_thresold = 1024 * 1024 |
558 | - |
559 | - def __init__(self, report_activity=None): |
560 | - self._response = None |
561 | - self._report_activity = report_activity |
562 | - self._ranges_received_whole_file = None |
563 | - |
564 | - def _mutter_connect(self): |
565 | - netloc = '%s:%s' % (self.host, self.port) |
566 | - if self.proxied_host is not None: |
567 | - netloc += '(proxy for %s)' % self.proxied_host |
568 | - trace.mutter('* About to connect() to %s' % netloc) |
569 | - |
570 | - def getresponse(self): |
571 | - """Capture the response to be able to cleanup""" |
572 | - self._response = http_client.HTTPConnection.getresponse(self) |
573 | - return self._response |
574 | - |
575 | - def cleanup_pipe(self): |
576 | - """Read the remaining bytes of the last response if any.""" |
577 | - if self._response is not None: |
578 | - try: |
579 | - pending = self._response.finish() |
580 | - # Warn the user (once) |
581 | - if (self._ranges_received_whole_file is None |
582 | - and self._response.status == 200 |
583 | - and pending |
584 | - and pending > self._range_warning_thresold): |
585 | - self._ranges_received_whole_file = True |
586 | - trace.warning( |
587 | - 'Got a 200 response when asking for multiple ranges,' |
588 | - ' does your server at %s:%s support range requests?', |
589 | - self.host, self.port) |
590 | - except socket.error as e: |
591 | - # It's conceivable that the socket is in a bad state here |
592 | - # (including some test cases) and in this case, it doesn't need |
593 | - # cleaning anymore, so no need to fail, we just get rid of the |
594 | - # socket and let callers reconnect |
595 | - if (len(e.args) == 0 |
596 | - or e.args[0] not in (errno.ECONNRESET, errno.ECONNABORTED)): |
597 | - raise |
598 | - self.close() |
599 | - self._response = None |
600 | - # Preserve our preciousss |
601 | - sock = self.sock |
602 | - self.sock = None |
603 | - # Let http_client.HTTPConnection do its housekeeping |
604 | - self.close() |
605 | - # Restore our preciousss |
606 | - self.sock = sock |
607 | - |
608 | - def _wrap_socket_for_reporting(self, sock): |
609 | - """Wrap the socket before anybody use it.""" |
610 | - self.sock = _ReportingSocket(sock, self._report_activity) |
611 | - |
612 | - |
613 | -class HTTPConnection(AbstractHTTPConnection, http_client.HTTPConnection): |
614 | - |
615 | - # XXX: Needs refactoring at the caller level. |
616 | - def __init__(self, host, port=None, proxied_host=None, |
617 | - report_activity=None, ca_certs=None): |
618 | - AbstractHTTPConnection.__init__(self, report_activity=report_activity) |
619 | - if PY3: |
620 | - http_client.HTTPConnection.__init__(self, host, port) |
621 | - else: |
622 | - # Use strict=True since we don't support HTTP/0.9 |
623 | - http_client.HTTPConnection.__init__(self, host, port, strict=True) |
624 | - self.proxied_host = proxied_host |
625 | - # ca_certs is ignored, it's only relevant for https |
626 | - |
627 | - def connect(self): |
628 | - if 'http' in debug.debug_flags: |
629 | - self._mutter_connect() |
630 | - http_client.HTTPConnection.connect(self) |
631 | - self._wrap_socket_for_reporting(self.sock) |
632 | - |
633 | - |
634 | -class HTTPSConnection(AbstractHTTPConnection, http_client.HTTPSConnection): |
635 | - |
636 | - def __init__(self, host, port=None, key_file=None, cert_file=None, |
637 | - proxied_host=None, |
638 | - report_activity=None, ca_certs=None): |
639 | - AbstractHTTPConnection.__init__(self, report_activity=report_activity) |
640 | - if PY3: |
641 | - http_client.HTTPSConnection.__init__( |
642 | - self, host, port, key_file, cert_file) |
643 | - else: |
644 | - # Use strict=True since we don't support HTTP/0.9 |
645 | - http_client.HTTPSConnection.__init__(self, host, port, |
646 | - key_file, cert_file, strict=True) |
647 | - self.proxied_host = proxied_host |
648 | - self.ca_certs = ca_certs |
649 | - |
650 | - def connect(self): |
651 | - if 'http' in debug.debug_flags: |
652 | - self._mutter_connect() |
653 | - http_client.HTTPConnection.connect(self) |
654 | - self._wrap_socket_for_reporting(self.sock) |
655 | - if self.proxied_host is None: |
656 | - self.connect_to_origin() |
657 | - |
658 | - def connect_to_origin(self): |
659 | - # FIXME JRV 2011-12-18: Use location config here? |
660 | - config_stack = config.GlobalStack() |
661 | - cert_reqs = config_stack.get('ssl.cert_reqs') |
662 | - if self.proxied_host is not None: |
663 | - host = self.proxied_host.split(":", 1)[0] |
664 | - else: |
665 | - host = self.host |
666 | - if cert_reqs == ssl.CERT_NONE: |
667 | - ui.ui_factory.show_user_warning('not_checking_ssl_cert', host=host) |
668 | - ui.ui_factory.suppressed_warnings.add('not_checking_ssl_cert') |
669 | - ca_certs = None |
670 | - else: |
671 | - if self.ca_certs is None: |
672 | - ca_certs = config_stack.get('ssl.ca_certs') |
673 | - else: |
674 | - ca_certs = self.ca_certs |
675 | - if ca_certs is None: |
676 | - trace.warning( |
677 | - "No valid trusted SSL CA certificates file set. See " |
678 | - "'brz help ssl.ca_certs' for more information on setting " |
679 | - "trusted CAs.") |
680 | - try: |
681 | - ssl_context = ssl.create_default_context( |
682 | - purpose=ssl.Purpose.SERVER_AUTH, cafile=ca_certs) |
683 | - ssl_context.check_hostname = cert_reqs != ssl.CERT_NONE |
684 | - if self.cert_file: |
685 | - ssl_context.load_cert_chain( |
686 | - keyfile=self.key_file, certfile=self.cert_file) |
687 | - ssl_context.verify_mode = cert_reqs |
688 | - ssl_sock = ssl_context.wrap_socket( |
689 | - self.sock, server_hostname=self.host) |
690 | - except ssl.SSLError: |
691 | - trace.note( |
692 | - "\n" |
693 | - "See `brz help ssl.ca_certs` for how to specify trusted CA" |
694 | - "certificates.\n" |
695 | - "Pass -Ossl.cert_reqs=none to disable certificate " |
696 | - "verification entirely.\n") |
697 | - raise |
698 | - # Wrap the ssl socket before anybody use it |
699 | - self._wrap_socket_for_reporting(ssl_sock) |
700 | - |
701 | - |
702 | -class Request(urllib_request.Request): |
703 | - """A custom Request object. |
704 | - |
705 | - urllib_request determines the request method heuristically (based on |
706 | - the presence or absence of data). We set the method |
707 | - statically. |
708 | - |
709 | - The Request object tracks: |
710 | - - the connection the request will be made on. |
711 | - - the authentication parameters needed to preventively set |
712 | - the authentication header once a first authentication have |
713 | - been made. |
714 | - """ |
715 | - |
716 | - def __init__(self, method, url, data=None, headers={}, |
717 | - origin_req_host=None, unverifiable=False, |
718 | - connection=None, parent=None): |
719 | - urllib_request.Request.__init__( |
720 | - self, url, data, headers, |
721 | - origin_req_host, unverifiable) |
722 | - self.method = method |
723 | - self.connection = connection |
724 | - # To handle redirections |
725 | - self.parent = parent |
726 | - self.redirected_to = None |
727 | - # Unless told otherwise, redirections are not followed |
728 | - self.follow_redirections = False |
729 | - # auth and proxy_auth are dicts containing, at least |
730 | - # (scheme, host, port, realm, user, password, protocol, path). |
731 | - # The dict entries are mostly handled by the AuthHandler. |
732 | - # Some authentication schemes may add more entries. |
733 | - self.auth = {} |
734 | - self.proxy_auth = {} |
735 | - self.proxied_host = None |
736 | - |
737 | - def get_method(self): |
738 | - return self.method |
739 | - |
740 | - def set_proxy(self, proxy, type): |
741 | - """Set the proxy and remember the proxied host.""" |
742 | - if PY3: |
743 | - host, port = splitport(self.host) |
744 | - else: |
745 | - host, port = splitport(self.get_host()) |
746 | - if port is None: |
747 | - # We need to set the default port ourselves way before it gets set |
748 | - # in the HTTP[S]Connection object at build time. |
749 | - if self.type == 'https': |
750 | - conn_class = HTTPSConnection |
751 | - else: |
752 | - conn_class = HTTPConnection |
753 | - port = conn_class.default_port |
754 | - self.proxied_host = '%s:%s' % (host, port) |
755 | - urllib_request.Request.set_proxy(self, proxy, type) |
756 | - # When urllib_request makes a https request with our wrapper code and a proxy, |
757 | - # it sets Host to the https proxy, not the host we want to talk to. |
758 | - # I'm fairly sure this is our fault, but what is the cause is an open |
759 | - # question. -- Robert Collins May 8 2010. |
760 | - self.add_unredirected_header('Host', self.proxied_host) |
761 | - |
762 | - |
763 | -class _ConnectRequest(Request): |
764 | - |
765 | - def __init__(self, request): |
766 | - """Constructor |
767 | - |
768 | - :param request: the first request sent to the proxied host, already |
769 | - processed by the opener (i.e. proxied_host is already set). |
770 | - """ |
771 | - # We give a fake url and redefine selector or urllib_request will be |
772 | - # confused |
773 | - Request.__init__(self, 'CONNECT', request.get_full_url(), |
774 | - connection=request.connection) |
775 | - if request.proxied_host is None: |
776 | - raise AssertionError() |
777 | - self.proxied_host = request.proxied_host |
778 | - |
779 | - @property |
780 | - def selector(self): |
781 | - return self.proxied_host |
782 | - |
783 | - def get_selector(self): |
784 | - return self.selector |
785 | - |
786 | - def set_proxy(self, proxy, type): |
787 | - """Set the proxy without remembering the proxied host. |
788 | - |
789 | - We already know the proxied host by definition, the CONNECT request |
790 | - occurs only when the connection goes through a proxy. The usual |
791 | - processing (masquerade the request so that the connection is done to |
792 | - the proxy while the request is targeted at another host) does not apply |
793 | - here. In fact, the connection is already established with proxy and we |
794 | - just want to enable the SSL tunneling. |
795 | - """ |
796 | - urllib_request.Request.set_proxy(self, proxy, type) |
797 | - |
798 | - |
799 | -class ConnectionHandler(urllib_request.BaseHandler): |
800 | - """Provides connection-sharing by pre-processing requests. |
801 | - |
802 | - urllib_request provides no way to access the HTTPConnection object |
803 | - internally used. But we need it in order to achieve |
804 | - connection sharing. So, we add it to the request just before |
805 | - it is processed, and then we override the do_open method for |
806 | - http[s] requests in AbstractHTTPHandler. |
807 | - """ |
808 | - |
809 | - handler_order = 1000 # after all pre-processings |
810 | - |
811 | - def __init__(self, report_activity=None, ca_certs=None): |
812 | - self._report_activity = report_activity |
813 | - self.ca_certs = ca_certs |
814 | - |
815 | - def create_connection(self, request, http_connection_class): |
816 | - host = request.host |
817 | - if not host: |
818 | - # Just a bit of paranoia here, this should have been |
819 | - # handled in the higher levels |
820 | - raise urlutils.InvalidURL(request.get_full_url(), 'no host given.') |
821 | - |
822 | - # We create a connection (but it will not connect until the first |
823 | - # request is made) |
824 | - try: |
825 | - connection = http_connection_class( |
826 | - host, proxied_host=request.proxied_host, |
827 | - report_activity=self._report_activity, |
828 | - ca_certs=self.ca_certs) |
829 | - except http_client.InvalidURL as exception: |
830 | - # There is only one occurrence of InvalidURL in http_client |
831 | - raise urlutils.InvalidURL(request.get_full_url(), |
832 | - extra='nonnumeric port') |
833 | - |
834 | - return connection |
835 | - |
836 | - def capture_connection(self, request, http_connection_class): |
837 | - """Capture or inject the request connection. |
838 | - |
839 | - Two cases: |
840 | - - the request have no connection: create a new one, |
841 | - |
842 | - - the request have a connection: this one have been used |
843 | - already, let's capture it, so that we can give it to |
844 | - another transport to be reused. We don't do that |
845 | - ourselves: the Transport object get the connection from |
846 | - a first request and then propagate it, from request to |
847 | - request or to cloned transports. |
848 | - """ |
849 | - connection = request.connection |
850 | - if connection is None: |
851 | - # Create a new one |
852 | - connection = self.create_connection(request, http_connection_class) |
853 | - request.connection = connection |
854 | - |
855 | - # All connections will pass here, propagate debug level |
856 | - connection.set_debuglevel(DEBUG) |
857 | - return request |
858 | - |
859 | - def http_request(self, request): |
860 | - return self.capture_connection(request, HTTPConnection) |
861 | - |
862 | - def https_request(self, request): |
863 | - return self.capture_connection(request, HTTPSConnection) |
864 | - |
865 | - |
866 | -class AbstractHTTPHandler(urllib_request.AbstractHTTPHandler): |
867 | - """A custom handler for HTTP(S) requests. |
868 | - |
869 | - We overrive urllib_request.AbstractHTTPHandler to get a better |
870 | - control of the connection, the ability to implement new |
871 | - request types and return a response able to cope with |
872 | - persistent connections. |
873 | - """ |
874 | - |
875 | - # We change our order to be before urllib_request HTTP[S]Handlers |
876 | - # and be chosen instead of them (the first http_open called |
877 | - # wins). |
878 | - handler_order = 400 |
879 | - |
880 | - _default_headers = {'Pragma': 'no-cache', |
881 | - 'Cache-control': 'max-age=0', |
882 | - 'Connection': 'Keep-Alive', |
883 | - 'User-agent': default_user_agent(), |
884 | - 'Accept': '*/*', |
885 | - } |
886 | - |
887 | - def __init__(self): |
888 | - urllib_request.AbstractHTTPHandler.__init__(self, debuglevel=DEBUG) |
889 | - |
890 | - def http_request(self, request): |
891 | - """Common headers setting""" |
892 | - |
893 | - for name, value in self._default_headers.items(): |
894 | - if name not in request.headers: |
895 | - request.headers[name] = value |
896 | - # FIXME: We may have to add the Content-Length header if |
897 | - # we have data to send. |
898 | - return request |
899 | - |
900 | - def retry_or_raise(self, http_class, request, first_try): |
901 | - """Retry the request (once) or raise the exception. |
902 | - |
903 | - urllib_request raises exception of application level kind, we |
904 | - just have to translate them. |
905 | - |
906 | - http_client can raise exceptions of transport level (badly |
907 | - formatted dialog, loss of connexion or socket level |
908 | - problems). In that case we should issue the request again |
909 | - (http_client will close and reopen a new connection if |
910 | - needed). |
911 | - """ |
912 | - # When an exception occurs, we give back the original |
913 | - # Traceback or the bugs are hard to diagnose. |
914 | - exc_type, exc_val, exc_tb = sys.exc_info() |
915 | - if exc_type == socket.gaierror: |
916 | - # No need to retry, that will not help |
917 | - if PY3: |
918 | - origin_req_host = request.origin_req_host |
919 | - else: |
920 | - origin_req_host = request.get_origin_req_host() |
921 | - raise errors.ConnectionError("Couldn't resolve host '%s'" |
922 | - % origin_req_host, |
923 | - orig_error=exc_val) |
924 | - elif isinstance(exc_val, http_client.ImproperConnectionState): |
925 | - # The http_client pipeline is in incorrect state, it's a bug in our |
926 | - # implementation. |
927 | - reraise(exc_type, exc_val, exc_tb) |
928 | - else: |
929 | - if first_try: |
930 | - if self._debuglevel >= 2: |
931 | - print('Received exception: [%r]' % exc_val) |
932 | - print(' On connection: [%r]' % request.connection) |
933 | - method = request.get_method() |
934 | - url = request.get_full_url() |
935 | - print(' Will retry, %s %r' % (method, url)) |
936 | - request.connection.close() |
937 | - response = self.do_open(http_class, request, False) |
938 | - else: |
939 | - if self._debuglevel >= 2: |
940 | - print('Received second exception: [%r]' % exc_val) |
941 | - print(' On connection: [%r]' % request.connection) |
942 | - if exc_type in (http_client.BadStatusLine, http_client.UnknownProtocol): |
943 | - # http_client.BadStatusLine and |
944 | - # http_client.UnknownProtocol indicates that a |
945 | - # bogus server was encountered or a bad |
946 | - # connection (i.e. transient errors) is |
947 | - # experimented, we have already retried once |
948 | - # for that request so we raise the exception. |
949 | - my_exception = errors.InvalidHttpResponse( |
950 | - request.get_full_url(), |
951 | - 'Bad status line received', |
952 | - orig_error=exc_val) |
953 | - elif (isinstance(exc_val, socket.error) and len(exc_val.args) |
954 | - and exc_val.args[0] in (errno.ECONNRESET, 10053, 10054)): |
955 | - # 10053 == WSAECONNABORTED |
956 | - # 10054 == WSAECONNRESET |
957 | - raise errors.ConnectionReset( |
958 | - "Connection lost while sending request.") |
959 | - else: |
960 | - # All other exception are considered connection related. |
961 | - |
962 | - # socket errors generally occurs for reasons |
963 | - # far outside our scope, so closing the |
964 | - # connection and retrying is the best we can |
965 | - # do. |
966 | - if PY3: |
967 | - selector = request.selector |
968 | - else: |
969 | - selector = request.get_selector() |
970 | - my_exception = errors.ConnectionError( |
971 | - msg='while sending %s %s:' % (request.get_method(), |
972 | - selector), |
973 | - orig_error=exc_val) |
974 | - |
975 | - if self._debuglevel >= 2: |
976 | - print('On connection: [%r]' % request.connection) |
977 | - method = request.get_method() |
978 | - url = request.get_full_url() |
979 | - print(' Failed again, %s %r' % (method, url)) |
980 | - print(' Will raise: [%r]' % my_exception) |
981 | - reraise(type(my_exception), my_exception, exc_tb) |
982 | - return response |
983 | - |
984 | - def do_open(self, http_class, request, first_try=True): |
985 | - """See urllib_request.AbstractHTTPHandler.do_open for the general idea. |
986 | - |
987 | - The request will be retried once if it fails. |
988 | - """ |
989 | - connection = request.connection |
990 | - if connection is None: |
991 | - raise AssertionError( |
992 | - 'Cannot process a request without a connection') |
993 | - |
994 | - # Get all the headers |
995 | - headers = {} |
996 | - headers.update(request.header_items()) |
997 | - headers.update(request.unredirected_hdrs) |
998 | - # Some servers or proxies will choke on headers not properly |
999 | - # cased. http_client/urllib/urllib_request all use capitalize to get canonical |
1000 | - # header names, but only python2.5 urllib_request use title() to fix them just |
1001 | - # before sending the request. And not all versions of python 2.5 do |
1002 | - # that. Since we replace urllib_request.AbstractHTTPHandler.do_open we do it |
1003 | - # ourself below. |
1004 | - headers = {name.title(): val for name, val in headers.items()} |
1005 | - |
1006 | - try: |
1007 | - method = request.get_method() |
1008 | - if PY3: |
1009 | - url = request.selector |
1010 | - else: |
1011 | - url = request.get_selector() |
1012 | - if sys.version_info[:2] >= (3, 6): |
1013 | - connection._send_request(method, url, |
1014 | - # FIXME: implements 100-continue |
1015 | - # None, # We don't send the body yet |
1016 | - request.data, |
1017 | - headers, encode_chunked=False) |
1018 | - else: |
1019 | - connection._send_request(method, url, |
1020 | - # FIXME: implements 100-continue |
1021 | - # None, # We don't send the body yet |
1022 | - request.data, |
1023 | - headers) |
1024 | - if 'http' in debug.debug_flags: |
1025 | - trace.mutter('> %s %s' % (method, url)) |
1026 | - hdrs = [] |
1027 | - for k, v in headers.items(): |
1028 | - # People are often told to paste -Dhttp output to help |
1029 | - # debug. Don't compromise credentials. |
1030 | - if k in ('Authorization', 'Proxy-Authorization'): |
1031 | - v = '<masked>' |
1032 | - hdrs.append('%s: %s' % (k, v)) |
1033 | - trace.mutter('> ' + '\n> '.join(hdrs) + '\n') |
1034 | - if self._debuglevel >= 1: |
1035 | - print('Request sent: [%r] from (%s)' |
1036 | - % (request, request.connection.sock.getsockname())) |
1037 | - response = connection.getresponse() |
1038 | - convert_to_addinfourl = True |
1039 | - except (ssl.SSLError, ssl.CertificateError): |
1040 | - # Something is wrong with either the certificate or the hostname, |
1041 | - # re-trying won't help |
1042 | - raise |
1043 | - except (socket.gaierror, http_client.BadStatusLine, http_client.UnknownProtocol, |
1044 | - socket.error, http_client.HTTPException): |
1045 | - response = self.retry_or_raise(http_class, request, first_try) |
1046 | - convert_to_addinfourl = False |
1047 | - |
1048 | - if PY3: |
1049 | - response.msg = response.reason |
1050 | - return response |
1051 | - |
1052 | -# FIXME: HTTPConnection does not fully support 100-continue (the |
1053 | -# server responses are just ignored) |
1054 | - |
1055 | -# if code == 100: |
1056 | -# mutter('Will send the body') |
1057 | -# # We can send the body now |
1058 | -# body = request.data |
1059 | -# if body is None: |
1060 | -# raise URLError("No data given") |
1061 | -# connection.send(body) |
1062 | -# response = connection.getresponse() |
1063 | - |
1064 | - if self._debuglevel >= 2: |
1065 | - print('Receives response: %r' % response) |
1066 | - print(' For: %r(%r)' % (request.get_method(), |
1067 | - request.get_full_url())) |
1068 | - |
1069 | - if convert_to_addinfourl: |
1070 | - # Shamelessly copied from urllib_request |
1071 | - req = request |
1072 | - r = response |
1073 | - r.recv = r.read |
1074 | - fp = socket._fileobject(r, bufsize=65536) |
1075 | - resp = addinfourl(fp, r.msg, req.get_full_url()) |
1076 | - resp.code = r.status |
1077 | - resp.msg = r.reason |
1078 | - resp.version = r.version |
1079 | - if self._debuglevel >= 2: |
1080 | - print('Create addinfourl: %r' % resp) |
1081 | - print(' For: %r(%r)' % (request.get_method(), |
1082 | - request.get_full_url())) |
1083 | - if 'http' in debug.debug_flags: |
1084 | - version = 'HTTP/%d.%d' |
1085 | - try: |
1086 | - version = version % (resp.version / 10, |
1087 | - resp.version % 10) |
1088 | - except: |
1089 | - version = 'HTTP/%r' % resp.version |
1090 | - trace.mutter('< %s %s %s' % (version, resp.code, |
1091 | - resp.msg)) |
1092 | - # Use the raw header lines instead of treating resp.info() as a |
1093 | - # dict since we may miss duplicated headers otherwise. |
1094 | - hdrs = [h.rstrip('\r\n') for h in resp.info().headers] |
1095 | - trace.mutter('< ' + '\n< '.join(hdrs) + '\n') |
1096 | - else: |
1097 | - resp = response |
1098 | - return resp |
1099 | - |
1100 | - |
1101 | -class HTTPHandler(AbstractHTTPHandler): |
1102 | - """A custom handler that just thunks into HTTPConnection""" |
1103 | - |
1104 | - def http_open(self, request): |
1105 | - return self.do_open(HTTPConnection, request) |
1106 | - |
1107 | - |
1108 | -class HTTPSHandler(AbstractHTTPHandler): |
1109 | - """A custom handler that just thunks into HTTPSConnection""" |
1110 | - |
1111 | - https_request = AbstractHTTPHandler.http_request |
1112 | - |
1113 | - def https_open(self, request): |
1114 | - connection = request.connection |
1115 | - if connection.sock is None and \ |
1116 | - connection.proxied_host is not None and \ |
1117 | - request.get_method() != 'CONNECT': # Don't loop |
1118 | - # FIXME: We need a gazillion connection tests here, but we still |
1119 | - # miss a https server :-( : |
1120 | - # - with and without proxy |
1121 | - # - with and without certificate |
1122 | - # - with self-signed certificate |
1123 | - # - with and without authentication |
1124 | - # - with good and bad credentials (especially the proxy auth around |
1125 | - # CONNECT) |
1126 | - # - with basic and digest schemes |
1127 | - # - reconnection on errors |
1128 | - # - connection persistence behaviour (including reconnection) |
1129 | - |
1130 | - # We are about to connect for the first time via a proxy, we must |
1131 | - # issue a CONNECT request first to establish the encrypted link |
1132 | - connect = _ConnectRequest(request) |
1133 | - response = self.parent.open(connect) |
1134 | - if response.code != 200: |
1135 | - raise errors.ConnectionError("Can't connect to %s via proxy %s" % ( |
1136 | - connect.proxied_host, self.host)) |
1137 | - # Housekeeping |
1138 | - connection.cleanup_pipe() |
1139 | - # Establish the connection encryption |
1140 | - connection.connect_to_origin() |
1141 | - # Propagate the connection to the original request |
1142 | - request.connection = connection |
1143 | - return self.do_open(HTTPSConnection, request) |
1144 | - |
1145 | - |
1146 | -class HTTPRedirectHandler(urllib_request.HTTPRedirectHandler): |
1147 | - """Handles redirect requests. |
1148 | - |
1149 | - We have to implement our own scheme because we use a specific |
1150 | - Request object and because we want to implement a specific |
1151 | - policy. |
1152 | - """ |
1153 | - _debuglevel = DEBUG |
1154 | - # RFC2616 says that only read requests should be redirected |
1155 | - # without interacting with the user. But Breezy uses some |
1156 | - # shortcuts to optimize against roundtrips which can leads to |
1157 | - # write requests being issued before read requests of |
1158 | - # containing dirs can be redirected. So we redirect write |
1159 | - # requests in the same way which seems to respect the spirit |
1160 | - # of the RFC if not its letter. |
1161 | - |
1162 | - def redirect_request(self, req, fp, code, msg, headers, newurl): |
1163 | - """See urllib_request.HTTPRedirectHandler.redirect_request""" |
1164 | - # We would have preferred to update the request instead |
1165 | - # of creating a new one, but the urllib_request.Request object |
1166 | - # has a too complicated creation process to provide a |
1167 | - # simple enough equivalent update process. Instead, when |
1168 | - # redirecting, we only update the following request in |
1169 | - # the redirect chain with a reference to the parent |
1170 | - # request . |
1171 | - |
1172 | - # Some codes make no sense in our context and are treated |
1173 | - # as errors: |
1174 | - |
1175 | - # 300: Multiple choices for different representations of |
1176 | - # the URI. Using that mechanisn with Breezy will violate the |
1177 | - # protocol neutrality of Transport. |
1178 | - |
1179 | - # 304: Not modified (SHOULD only occurs with conditional |
1180 | - # GETs which are not used by our implementation) |
1181 | - |
1182 | - # 305: Use proxy. I can't imagine this one occurring in |
1183 | - # our context-- vila/20060909 |
1184 | - |
1185 | - # 306: Unused (if the RFC says so...) |
1186 | - |
1187 | - # If the code is 302 and the request is HEAD, some may |
1188 | - # think that it is a sufficent hint that the file exists |
1189 | - # and that we MAY avoid following the redirections. But |
1190 | - # if we want to be sure, we MUST follow them. |
1191 | - |
1192 | - if PY3: |
1193 | - origin_req_host = req.origin_req_host |
1194 | - else: |
1195 | - origin_req_host = req.get_origin_req_host() |
1196 | - |
1197 | - if code in (301, 302, 303, 307, 308): |
1198 | - return Request(req.get_method(), newurl, |
1199 | - headers=req.headers, |
1200 | - origin_req_host=origin_req_host, |
1201 | - unverifiable=True, |
1202 | - # TODO: It will be nice to be able to |
1203 | - # detect virtual hosts sharing the same |
1204 | - # IP address, that will allow us to |
1205 | - # share the same connection... |
1206 | - connection=None, |
1207 | - parent=req, |
1208 | - ) |
1209 | - else: |
1210 | - raise urllib_request.HTTPError( |
1211 | - req.get_full_url(), code, msg, headers, fp) |
1212 | - |
1213 | - def http_error_302(self, req, fp, code, msg, headers): |
1214 | - """Requests the redirected to URI. |
1215 | - |
1216 | - Copied from urllib_request to be able to clean the pipe of the associated |
1217 | - connection, *before* issuing the redirected request but *after* having |
1218 | - eventually raised an error. |
1219 | - """ |
1220 | - # Some servers (incorrectly) return multiple Location headers |
1221 | - # (so probably same goes for URI). Use first header. |
1222 | - |
1223 | - # TODO: Once we get rid of addinfourl objects, the |
1224 | - # following will need to be updated to use correct case |
1225 | - # for headers. |
1226 | - if 'location' in headers: |
1227 | - newurl = headers.get('location') |
1228 | - elif 'uri' in headers: |
1229 | - newurl = headers.get('uri') |
1230 | - else: |
1231 | - return |
1232 | - |
1233 | - newurl = urljoin(req.get_full_url(), newurl) |
1234 | - |
1235 | - if self._debuglevel >= 1: |
1236 | - print('Redirected to: %s (followed: %r)' % (newurl, |
1237 | - req.follow_redirections)) |
1238 | - if req.follow_redirections is False: |
1239 | - req.redirected_to = newurl |
1240 | - return fp |
1241 | - |
1242 | - # This call succeeds or raise an error. urllib_request returns |
1243 | - # if redirect_request returns None, but our |
1244 | - # redirect_request never returns None. |
1245 | - redirected_req = self.redirect_request(req, fp, code, msg, headers, |
1246 | - newurl) |
1247 | - |
1248 | - # loop detection |
1249 | - # .redirect_dict has a key url if url was previously visited. |
1250 | - if hasattr(req, 'redirect_dict'): |
1251 | - visited = redirected_req.redirect_dict = req.redirect_dict |
1252 | - if (visited.get(newurl, 0) >= self.max_repeats or |
1253 | - len(visited) >= self.max_redirections): |
1254 | - raise urllib_request.HTTPError(req.get_full_url(), code, |
1255 | - self.inf_msg + msg, headers, fp) |
1256 | - else: |
1257 | - visited = redirected_req.redirect_dict = req.redirect_dict = {} |
1258 | - visited[newurl] = visited.get(newurl, 0) + 1 |
1259 | - |
1260 | - # We can close the fp now that we are sure that we won't |
1261 | - # use it with HTTPError. |
1262 | - fp.close() |
1263 | - # We have all we need already in the response |
1264 | - req.connection.cleanup_pipe() |
1265 | - |
1266 | - return self.parent.open(redirected_req) |
1267 | - |
1268 | - http_error_301 = http_error_303 = http_error_307 = http_error_308 = http_error_302 |
1269 | - |
1270 | - |
1271 | -class ProxyHandler(urllib_request.ProxyHandler): |
1272 | - """Handles proxy setting. |
1273 | - |
1274 | - Copied and modified from urllib_request to be able to modify the request during |
1275 | - the request pre-processing instead of modifying it at _open time. As we |
1276 | - capture (or create) the connection object during request processing, _open |
1277 | - time was too late. |
1278 | - |
1279 | - The main task is to modify the request so that the connection is done to |
1280 | - the proxy while the request still refers to the destination host. |
1281 | - |
1282 | - Note: the proxy handling *may* modify the protocol used; the request may be |
1283 | - against an https server proxied through an http proxy. So, https_request |
1284 | - will be called, but later it's really http_open that will be called. This |
1285 | - explains why we don't have to call self.parent.open as the urllib_request did. |
1286 | - """ |
1287 | - |
1288 | - # Proxies must be in front |
1289 | - handler_order = 100 |
1290 | - _debuglevel = DEBUG |
1291 | - |
1292 | - def __init__(self, proxies=None): |
1293 | - urllib_request.ProxyHandler.__init__(self, proxies) |
1294 | - # First, let's get rid of urllib_request implementation |
1295 | - for type, proxy in self.proxies.items(): |
1296 | - if self._debuglevel >= 3: |
1297 | - print('Will unbind %s_open for %r' % (type, proxy)) |
1298 | - delattr(self, '%s_open' % type) |
1299 | - |
1300 | - def bind_scheme_request(proxy, scheme): |
1301 | - if proxy is None: |
1302 | - return |
1303 | - scheme_request = scheme + '_request' |
1304 | - if self._debuglevel >= 3: |
1305 | - print('Will bind %s for %r' % (scheme_request, proxy)) |
1306 | - setattr(self, scheme_request, |
1307 | - lambda request: self.set_proxy(request, scheme)) |
1308 | - # We are interested only by the http[s] proxies |
1309 | - http_proxy = self.get_proxy_env_var('http') |
1310 | - bind_scheme_request(http_proxy, 'http') |
1311 | - https_proxy = self.get_proxy_env_var('https') |
1312 | - bind_scheme_request(https_proxy, 'https') |
1313 | - |
1314 | - def get_proxy_env_var(self, name, default_to='all'): |
1315 | - """Get a proxy env var. |
1316 | - |
1317 | - Note that we indirectly rely on |
1318 | - urllib.getproxies_environment taking into account the |
1319 | - uppercased values for proxy variables. |
1320 | - """ |
1321 | - try: |
1322 | - return self.proxies[name.lower()] |
1323 | - except KeyError: |
1324 | - if default_to is not None: |
1325 | - # Try to get the alternate environment variable |
1326 | - try: |
1327 | - return self.proxies[default_to] |
1328 | - except KeyError: |
1329 | - pass |
1330 | - return None |
1331 | - |
1332 | - def proxy_bypass(self, host): |
1333 | - """Check if host should be proxied or not. |
1334 | - |
1335 | - :returns: True to skip the proxy, False otherwise. |
1336 | - """ |
1337 | - no_proxy = self.get_proxy_env_var('no', default_to=None) |
1338 | - bypass = self.evaluate_proxy_bypass(host, no_proxy) |
1339 | - if bypass is None: |
1340 | - # Nevertheless, there are platform-specific ways to |
1341 | - # ignore proxies... |
1342 | - return urllib_request.proxy_bypass(host) |
1343 | - else: |
1344 | - return bypass |
1345 | - |
1346 | - def evaluate_proxy_bypass(self, host, no_proxy): |
1347 | - """Check the host against a comma-separated no_proxy list as a string. |
1348 | - |
1349 | - :param host: ``host:port`` being requested |
1350 | - |
1351 | - :param no_proxy: comma-separated list of hosts to access directly. |
1352 | - |
1353 | - :returns: True to skip the proxy, False not to, or None to |
1354 | - leave it to urllib. |
1355 | - """ |
1356 | - if no_proxy is None: |
1357 | - # All hosts are proxied |
1358 | - return False |
1359 | - hhost, hport = splitport(host) |
1360 | - # Does host match any of the domains mentioned in |
1361 | - # no_proxy ? The rules about what is authorized in no_proxy |
1362 | - # are fuzzy (to say the least). We try to allow most |
1363 | - # commonly seen values. |
1364 | - for domain in no_proxy.split(','): |
1365 | - domain = domain.strip() |
1366 | - if domain == '': |
1367 | - continue |
1368 | - dhost, dport = splitport(domain) |
1369 | - if hport == dport or dport is None: |
1370 | - # Protect glob chars |
1371 | - dhost = dhost.replace(".", r"\.") |
1372 | - dhost = dhost.replace("*", r".*") |
1373 | - dhost = dhost.replace("?", r".") |
1374 | - if re.match(dhost, hhost, re.IGNORECASE): |
1375 | - return True |
1376 | - # Nothing explicitly avoid the host |
1377 | - return None |
1378 | - |
1379 | - def set_proxy(self, request, type): |
1380 | - if PY3: |
1381 | - host = request.host |
1382 | - else: |
1383 | - host = request.get_host() |
1384 | - if self.proxy_bypass(host): |
1385 | - return request |
1386 | - |
1387 | - proxy = self.get_proxy_env_var(type) |
1388 | - if self._debuglevel >= 3: |
1389 | - print('set_proxy %s_request for %r' % (type, proxy)) |
1390 | - # FIXME: python 2.5 urlparse provides a better _parse_proxy which can |
1391 | - # grok user:password@host:port as well as |
1392 | - # http://user:password@host:port |
1393 | - |
1394 | - parsed_url = transport.ConnectedTransport._split_url(proxy) |
1395 | - if not parsed_url.host: |
1396 | - raise urlutils.InvalidURL(proxy, 'No host component') |
1397 | - |
1398 | - if request.proxy_auth == {}: |
1399 | - # No proxy auth parameter are available, we are handling the first |
1400 | - # proxied request, intialize. scheme (the authentication scheme) |
1401 | - # and realm will be set by the AuthHandler |
1402 | - request.proxy_auth = { |
1403 | - 'host': parsed_url.host, |
1404 | - 'port': parsed_url.port, |
1405 | - 'user': parsed_url.user, |
1406 | - 'password': parsed_url.password, |
1407 | - 'protocol': parsed_url.scheme, |
1408 | - # We ignore path since we connect to a proxy |
1409 | - 'path': None} |
1410 | - if parsed_url.port is None: |
1411 | - phost = parsed_url.host |
1412 | - else: |
1413 | - phost = parsed_url.host + ':%d' % parsed_url.port |
1414 | - request.set_proxy(phost, type) |
1415 | - if self._debuglevel >= 3: |
1416 | - print('set_proxy: proxy set to %s://%s' % (type, phost)) |
1417 | - return request |
1418 | - |
1419 | - |
1420 | -class AbstractAuthHandler(urllib_request.BaseHandler): |
1421 | - """A custom abstract authentication handler for all http authentications. |
1422 | - |
1423 | - Provides the meat to handle authentication errors and |
1424 | - preventively set authentication headers after the first |
1425 | - successful authentication. |
1426 | - |
1427 | - This can be used for http and proxy, as well as for basic, negotiate and |
1428 | - digest authentications. |
1429 | - |
1430 | - This provides an unified interface for all authentication handlers |
1431 | - (urllib_request provides far too many with different policies). |
1432 | - |
1433 | - The interaction between this handler and the urllib_request |
1434 | - framework is not obvious, it works as follow: |
1435 | - |
1436 | - opener.open(request) is called: |
1437 | - |
1438 | - - that may trigger http_request which will add an authentication header |
1439 | - (self.build_header) if enough info is available. |
1440 | - |
1441 | - - the request is sent to the server, |
1442 | - |
1443 | - - if an authentication error is received self.auth_required is called, |
1444 | - we acquire the authentication info in the error headers and call |
1445 | - self.auth_match to check that we are able to try the |
1446 | - authentication and complete the authentication parameters, |
1447 | - |
1448 | - - we call parent.open(request), that may trigger http_request |
1449 | - and will add a header (self.build_header), but here we have |
1450 | - all the required info (keep in mind that the request and |
1451 | - authentication used in the recursive calls are really (and must be) |
1452 | - the *same* objects). |
1453 | - |
1454 | - - if the call returns a response, the authentication have been |
1455 | - successful and the request authentication parameters have been updated. |
1456 | - """ |
1457 | - |
1458 | - scheme = None |
1459 | - """The scheme as it appears in the server header (lower cased)""" |
1460 | - |
1461 | - _max_retry = 3 |
1462 | - """We don't want to retry authenticating endlessly""" |
1463 | - |
1464 | - requires_username = True |
1465 | - """Whether the auth mechanism requires a username.""" |
1466 | - |
1467 | - # The following attributes should be defined by daughter |
1468 | - # classes: |
1469 | - # - auth_required_header: the header received from the server |
1470 | - # - auth_header: the header sent in the request |
1471 | - |
1472 | - def __init__(self): |
1473 | - # We want to know when we enter into an try/fail cycle of |
1474 | - # authentications so we initialize to None to indicate that we aren't |
1475 | - # in such a cycle by default. |
1476 | - self._retry_count = None |
1477 | - |
1478 | - def _parse_auth_header(self, server_header): |
1479 | - """Parse the authentication header. |
1480 | - |
1481 | - :param server_header: The value of the header sent by the server |
1482 | - describing the authenticaion request. |
1483 | - |
1484 | - :return: A tuple (scheme, remainder) scheme being the first word in the |
1485 | - given header (lower cased), remainder may be None. |
1486 | - """ |
1487 | - try: |
1488 | - scheme, remainder = server_header.split(None, 1) |
1489 | - except ValueError: |
1490 | - scheme = server_header |
1491 | - remainder = None |
1492 | - return (scheme.lower(), remainder) |
1493 | - |
1494 | - def update_auth(self, auth, key, value): |
1495 | - """Update a value in auth marking the auth as modified if needed""" |
1496 | - old_value = auth.get(key, None) |
1497 | - if old_value != value: |
1498 | - auth[key] = value |
1499 | - auth['modified'] = True |
1500 | - |
1501 | - def auth_required(self, request, headers): |
1502 | - """Retry the request if the auth scheme is ours. |
1503 | - |
1504 | - :param request: The request needing authentication. |
1505 | - :param headers: The headers for the authentication error response. |
1506 | - :return: None or the response for the authenticated request. |
1507 | - """ |
1508 | - # Don't try to authenticate endlessly |
1509 | - if self._retry_count is None: |
1510 | - # The retry being recusrsive calls, None identify the first retry |
1511 | - self._retry_count = 1 |
1512 | - else: |
1513 | - self._retry_count += 1 |
1514 | - if self._retry_count > self._max_retry: |
1515 | - # Let's be ready for next round |
1516 | - self._retry_count = None |
1517 | - return None |
1518 | - if PY3: |
1519 | - server_headers = headers.get_all(self.auth_required_header) |
1520 | - else: |
1521 | - server_headers = headers.getheaders(self.auth_required_header) |
1522 | - if not server_headers: |
1523 | - # The http error MUST have the associated |
1524 | - # header. This must never happen in production code. |
1525 | - trace.mutter('%s not found', self.auth_required_header) |
1526 | - return None |
1527 | - |
1528 | - auth = self.get_auth(request) |
1529 | - auth['modified'] = False |
1530 | - # Put some common info in auth if the caller didn't |
1531 | - if auth.get('path', None) is None: |
1532 | - parsed_url = urlutils.URL.from_string(request.get_full_url()) |
1533 | - self.update_auth(auth, 'protocol', parsed_url.scheme) |
1534 | - self.update_auth(auth, 'host', parsed_url.host) |
1535 | - self.update_auth(auth, 'port', parsed_url.port) |
1536 | - self.update_auth(auth, 'path', parsed_url.path) |
1537 | - # FIXME: the auth handler should be selected at a single place instead |
1538 | - # of letting all handlers try to match all headers, but the current |
1539 | - # design doesn't allow a simple implementation. |
1540 | - for server_header in server_headers: |
1541 | - # Several schemes can be proposed by the server, try to match each |
1542 | - # one in turn |
1543 | - matching_handler = self.auth_match(server_header, auth) |
1544 | - if matching_handler: |
1545 | - # auth_match may have modified auth (by adding the |
1546 | - # password or changing the realm, for example) |
1547 | - if (request.get_header(self.auth_header, None) is not None |
1548 | - and not auth['modified']): |
1549 | - # We already tried that, give up |
1550 | - return None |
1551 | - |
1552 | - # Only the most secure scheme proposed by the server should be |
1553 | - # used, since the handlers use 'handler_order' to describe that |
1554 | - # property, the first handler tried takes precedence, the |
1555 | - # others should not attempt to authenticate if the best one |
1556 | - # failed. |
1557 | - best_scheme = auth.get('best_scheme', None) |
1558 | - if best_scheme is None: |
1559 | - # At that point, if current handler should doesn't succeed |
1560 | - # the credentials are wrong (or incomplete), but we know |
1561 | - # that the associated scheme should be used. |
1562 | - best_scheme = auth['best_scheme'] = self.scheme |
1563 | - if best_scheme != self.scheme: |
1564 | - continue |
1565 | - |
1566 | - if self.requires_username and auth.get('user', None) is None: |
1567 | - # Without a known user, we can't authenticate |
1568 | - return None |
1569 | - |
1570 | - # Housekeeping |
1571 | - request.connection.cleanup_pipe() |
1572 | - # Retry the request with an authentication header added |
1573 | - response = self.parent.open(request) |
1574 | - if response: |
1575 | - self.auth_successful(request, response) |
1576 | - return response |
1577 | - # We are not qualified to handle the authentication. |
1578 | - # Note: the authentication error handling will try all |
1579 | - # available handlers. If one of them authenticates |
1580 | - # successfully, a response will be returned. If none of |
1581 | - # them succeeds, None will be returned and the error |
1582 | - # handler will raise the 401 'Unauthorized' or the 407 |
1583 | - # 'Proxy Authentication Required' error. |
1584 | - return None |
1585 | - |
1586 | - def add_auth_header(self, request, header): |
1587 | - """Add the authentication header to the request""" |
1588 | - request.add_unredirected_header(self.auth_header, header) |
1589 | - |
1590 | - def auth_match(self, header, auth): |
1591 | - """Check that we are able to handle that authentication scheme. |
1592 | - |
1593 | - The request authentication parameters may need to be |
1594 | - updated with info from the server. Some of these |
1595 | - parameters, when combined, are considered to be the |
1596 | - authentication key, if one of them change the |
1597 | - authentication result may change. 'user' and 'password' |
1598 | - are exampls, but some auth schemes may have others |
1599 | - (digest's nonce is an example, digest's nonce_count is a |
1600 | - *counter-example*). Such parameters must be updated by |
1601 | - using the update_auth() method. |
1602 | - |
1603 | - :param header: The authentication header sent by the server. |
1604 | - :param auth: The auth parameters already known. They may be |
1605 | - updated. |
1606 | - :returns: True if we can try to handle the authentication. |
1607 | - """ |
1608 | - raise NotImplementedError(self.auth_match) |
1609 | - |
1610 | - def build_auth_header(self, auth, request): |
1611 | - """Build the value of the header used to authenticate. |
1612 | - |
1613 | - :param auth: The auth parameters needed to build the header. |
1614 | - :param request: The request needing authentication. |
1615 | - |
1616 | - :return: None or header. |
1617 | - """ |
1618 | - raise NotImplementedError(self.build_auth_header) |
1619 | - |
1620 | - def auth_successful(self, request, response): |
1621 | - """The authentification was successful for the request. |
1622 | - |
1623 | - Additional infos may be available in the response. |
1624 | - |
1625 | - :param request: The succesfully authenticated request. |
1626 | - :param response: The server response (may contain auth info). |
1627 | - """ |
1628 | - # It may happen that we need to reconnect later, let's be ready |
1629 | - self._retry_count = None |
1630 | - |
1631 | - def get_user_password(self, auth): |
1632 | - """Ask user for a password if none is already available. |
1633 | - |
1634 | - :param auth: authentication info gathered so far (from the initial url |
1635 | - and then during dialog with the server). |
1636 | - """ |
1637 | - auth_conf = config.AuthenticationConfig() |
1638 | - user = auth.get('user', None) |
1639 | - password = auth.get('password', None) |
1640 | - realm = auth['realm'] |
1641 | - port = auth.get('port', None) |
1642 | - |
1643 | - if user is None: |
1644 | - user = auth_conf.get_user(auth['protocol'], auth['host'], |
1645 | - port=port, path=auth['path'], |
1646 | - realm=realm, ask=True, |
1647 | - prompt=self.build_username_prompt(auth)) |
1648 | - if user is not None and password is None: |
1649 | - password = auth_conf.get_password( |
1650 | - auth['protocol'], auth['host'], user, |
1651 | - port=port, |
1652 | - path=auth['path'], realm=realm, |
1653 | - prompt=self.build_password_prompt(auth)) |
1654 | - |
1655 | - return user, password |
1656 | - |
1657 | - def _build_password_prompt(self, auth): |
1658 | - """Build a prompt taking the protocol used into account. |
1659 | - |
1660 | - The AuthHandler is used by http and https, we want that information in |
1661 | - the prompt, so we build the prompt from the authentication dict which |
1662 | - contains all the needed parts. |
1663 | - |
1664 | - Also, http and proxy AuthHandlers present different prompts to the |
1665 | - user. The daughter classes should implements a public |
1666 | - build_password_prompt using this method. |
1667 | - """ |
1668 | - prompt = u'%s' % auth['protocol'].upper() + u' %(user)s@%(host)s' |
1669 | - realm = auth['realm'] |
1670 | - if realm is not None: |
1671 | - prompt += u", Realm: '%s'" % realm |
1672 | - prompt += u' password' |
1673 | - return prompt |
1674 | - |
1675 | - def _build_username_prompt(self, auth): |
1676 | - """Build a prompt taking the protocol used into account. |
1677 | - |
1678 | - The AuthHandler is used by http and https, we want that information in |
1679 | - the prompt, so we build the prompt from the authentication dict which |
1680 | - contains all the needed parts. |
1681 | - |
1682 | - Also, http and proxy AuthHandlers present different prompts to the |
1683 | - user. The daughter classes should implements a public |
1684 | - build_username_prompt using this method. |
1685 | - """ |
1686 | - prompt = u'%s' % auth['protocol'].upper() + u' %(host)s' |
1687 | - realm = auth['realm'] |
1688 | - if realm is not None: |
1689 | - prompt += u", Realm: '%s'" % realm |
1690 | - prompt += u' username' |
1691 | - return prompt |
1692 | - |
1693 | - def http_request(self, request): |
1694 | - """Insert an authentication header if information is available""" |
1695 | - auth = self.get_auth(request) |
1696 | - if self.auth_params_reusable(auth): |
1697 | - self.add_auth_header( |
1698 | - request, self.build_auth_header(auth, request)) |
1699 | - return request |
1700 | - |
1701 | - https_request = http_request # FIXME: Need test |
1702 | - |
1703 | - |
1704 | -class NegotiateAuthHandler(AbstractAuthHandler): |
1705 | - """A authentication handler that handles WWW-Authenticate: Negotiate. |
1706 | - |
1707 | - At the moment this handler supports just Kerberos. In the future, |
1708 | - NTLM support may also be added. |
1709 | - """ |
1710 | - |
1711 | - scheme = 'negotiate' |
1712 | - handler_order = 480 |
1713 | - requires_username = False |
1714 | - |
1715 | - def auth_match(self, header, auth): |
1716 | - scheme, raw_auth = self._parse_auth_header(header) |
1717 | - if scheme != self.scheme: |
1718 | - return False |
1719 | - self.update_auth(auth, 'scheme', scheme) |
1720 | - resp = self._auth_match_kerberos(auth) |
1721 | - if resp is None: |
1722 | - return False |
1723 | - # Optionally should try to authenticate using NTLM here |
1724 | - self.update_auth(auth, 'negotiate_response', resp) |
1725 | - return True |
1726 | - |
1727 | - def _auth_match_kerberos(self, auth): |
1728 | - """Try to create a GSSAPI response for authenticating against a host.""" |
1729 | - global kerberos, checked_kerberos |
1730 | - if kerberos is None and not checked_kerberos: |
1731 | - try: |
1732 | - import kerberos |
1733 | - except ImportError: |
1734 | - kerberos = None |
1735 | - checked_kerberos = True |
1736 | - if kerberos is None: |
1737 | - return None |
1738 | - ret, vc = kerberos.authGSSClientInit("HTTP@%(host)s" % auth) |
1739 | - if ret < 1: |
1740 | - trace.warning('Unable to create GSSAPI context for %s: %d', |
1741 | - auth['host'], ret) |
1742 | - return None |
1743 | - ret = kerberos.authGSSClientStep(vc, "") |
1744 | - if ret < 0: |
1745 | - trace.mutter('authGSSClientStep failed: %d', ret) |
1746 | - return None |
1747 | - return kerberos.authGSSClientResponse(vc) |
1748 | - |
1749 | - def build_auth_header(self, auth, request): |
1750 | - return "Negotiate %s" % auth['negotiate_response'] |
1751 | - |
1752 | - def auth_params_reusable(self, auth): |
1753 | - # If the auth scheme is known, it means a previous |
1754 | - # authentication was successful, all information is |
1755 | - # available, no further checks are needed. |
1756 | - return (auth.get('scheme', None) == 'negotiate' and |
1757 | - auth.get('negotiate_response', None) is not None) |
1758 | - |
1759 | - |
1760 | -class BasicAuthHandler(AbstractAuthHandler): |
1761 | - """A custom basic authentication handler.""" |
1762 | - |
1763 | - scheme = 'basic' |
1764 | - handler_order = 500 |
1765 | - auth_regexp = re.compile('realm="([^"]*)"', re.I) |
1766 | - |
1767 | - def build_auth_header(self, auth, request): |
1768 | - raw = '%s:%s' % (auth['user'], auth['password']) |
1769 | - auth_header = 'Basic ' + \ |
1770 | - base64.b64encode(raw.encode('utf-8')).decode('ascii') |
1771 | - return auth_header |
1772 | - |
1773 | - def extract_realm(self, header_value): |
1774 | - match = self.auth_regexp.search(header_value) |
1775 | - realm = None |
1776 | - if match: |
1777 | - realm = match.group(1) |
1778 | - return match, realm |
1779 | - |
1780 | - def auth_match(self, header, auth): |
1781 | - scheme, raw_auth = self._parse_auth_header(header) |
1782 | - if scheme != self.scheme: |
1783 | - return False |
1784 | - |
1785 | - match, realm = self.extract_realm(raw_auth) |
1786 | - if match: |
1787 | - # Put useful info into auth |
1788 | - self.update_auth(auth, 'scheme', scheme) |
1789 | - self.update_auth(auth, 'realm', realm) |
1790 | - if (auth.get('user', None) is None |
1791 | - or auth.get('password', None) is None): |
1792 | - user, password = self.get_user_password(auth) |
1793 | - self.update_auth(auth, 'user', user) |
1794 | - self.update_auth(auth, 'password', password) |
1795 | - return match is not None |
1796 | - |
1797 | - def auth_params_reusable(self, auth): |
1798 | - # If the auth scheme is known, it means a previous |
1799 | - # authentication was successful, all information is |
1800 | - # available, no further checks are needed. |
1801 | - return auth.get('scheme', None) == 'basic' |
1802 | - |
1803 | - |
1804 | -def get_digest_algorithm_impls(algorithm): |
1805 | - H = None |
1806 | - KD = None |
1807 | - if algorithm == 'MD5': |
1808 | - def H(x): return osutils.md5(x).hexdigest() |
1809 | - elif algorithm == 'SHA': |
1810 | - H = osutils.sha_string |
1811 | - if H is not None: |
1812 | - def KD(secret, data): return H( |
1813 | - ("%s:%s" % (secret, data)).encode('utf-8')) |
1814 | - return H, KD |
1815 | - |
1816 | - |
1817 | -def get_new_cnonce(nonce, nonce_count): |
1818 | - raw = '%s:%d:%s:%s' % (nonce, nonce_count, time.ctime(), |
1819 | - osutils.rand_chars(8)) |
1820 | - return osutils.sha_string(raw.encode('utf-8'))[:16] |
1821 | - |
1822 | - |
1823 | -class DigestAuthHandler(AbstractAuthHandler): |
1824 | - """A custom digest authentication handler.""" |
1825 | - |
1826 | - scheme = 'digest' |
1827 | - # Before basic as digest is a bit more secure and should be preferred |
1828 | - handler_order = 490 |
1829 | - |
1830 | - def auth_params_reusable(self, auth): |
1831 | - # If the auth scheme is known, it means a previous |
1832 | - # authentication was successful, all information is |
1833 | - # available, no further checks are needed. |
1834 | - return auth.get('scheme', None) == 'digest' |
1835 | - |
1836 | - def auth_match(self, header, auth): |
1837 | - scheme, raw_auth = self._parse_auth_header(header) |
1838 | - if scheme != self.scheme: |
1839 | - return False |
1840 | - |
1841 | - # Put the requested authentication info into a dict |
1842 | - req_auth = urllib_request.parse_keqv_list( |
1843 | - urllib_request.parse_http_list(raw_auth)) |
1844 | - |
1845 | - # Check that we can handle that authentication |
1846 | - qop = req_auth.get('qop', None) |
1847 | - if qop != 'auth': # No auth-int so far |
1848 | - return False |
1849 | - |
1850 | - H, KD = get_digest_algorithm_impls(req_auth.get('algorithm', 'MD5')) |
1851 | - if H is None: |
1852 | - return False |
1853 | - |
1854 | - realm = req_auth.get('realm', None) |
1855 | - # Put useful info into auth |
1856 | - self.update_auth(auth, 'scheme', scheme) |
1857 | - self.update_auth(auth, 'realm', realm) |
1858 | - if auth.get('user', None) is None or auth.get('password', None) is None: |
1859 | - user, password = self.get_user_password(auth) |
1860 | - self.update_auth(auth, 'user', user) |
1861 | - self.update_auth(auth, 'password', password) |
1862 | - |
1863 | - try: |
1864 | - if req_auth.get('algorithm', None) is not None: |
1865 | - self.update_auth(auth, 'algorithm', req_auth.get('algorithm')) |
1866 | - nonce = req_auth['nonce'] |
1867 | - if auth.get('nonce', None) != nonce: |
1868 | - # A new nonce, never used |
1869 | - self.update_auth(auth, 'nonce_count', 0) |
1870 | - self.update_auth(auth, 'nonce', nonce) |
1871 | - self.update_auth(auth, 'qop', qop) |
1872 | - auth['opaque'] = req_auth.get('opaque', None) |
1873 | - except KeyError: |
1874 | - # Some required field is not there |
1875 | - return False |
1876 | - |
1877 | - return True |
1878 | - |
1879 | - def build_auth_header(self, auth, request): |
1880 | - if PY3: |
1881 | - selector = request.selector |
1882 | - else: |
1883 | - selector = request.get_selector() |
1884 | - url_scheme, url_selector = splittype(selector) |
1885 | - sel_host, uri = splithost(url_selector) |
1886 | - |
1887 | - A1 = ('%s:%s:%s' % |
1888 | - (auth['user'], auth['realm'], auth['password'])).encode('utf-8') |
1889 | - A2 = ('%s:%s' % (request.get_method(), uri)).encode('utf-8') |
1890 | - |
1891 | - nonce = auth['nonce'] |
1892 | - qop = auth['qop'] |
1893 | - |
1894 | - nonce_count = auth['nonce_count'] + 1 |
1895 | - ncvalue = '%08x' % nonce_count |
1896 | - cnonce = get_new_cnonce(nonce, nonce_count) |
1897 | - |
1898 | - H, KD = get_digest_algorithm_impls(auth.get('algorithm', 'MD5')) |
1899 | - nonce_data = '%s:%s:%s:%s:%s' % (nonce, ncvalue, cnonce, qop, H(A2)) |
1900 | - request_digest = KD(H(A1), nonce_data) |
1901 | - |
1902 | - header = 'Digest ' |
1903 | - header += 'username="%s", realm="%s", nonce="%s"' % (auth['user'], |
1904 | - auth['realm'], |
1905 | - nonce) |
1906 | - header += ', uri="%s"' % uri |
1907 | - header += ', cnonce="%s", nc=%s' % (cnonce, ncvalue) |
1908 | - header += ', qop="%s"' % qop |
1909 | - header += ', response="%s"' % request_digest |
1910 | - # Append the optional fields |
1911 | - opaque = auth.get('opaque', None) |
1912 | - if opaque: |
1913 | - header += ', opaque="%s"' % opaque |
1914 | - if auth.get('algorithm', None): |
1915 | - header += ', algorithm="%s"' % auth.get('algorithm') |
1916 | - |
1917 | - # We have used the nonce once more, update the count |
1918 | - auth['nonce_count'] = nonce_count |
1919 | - |
1920 | - return header |
1921 | - |
1922 | - |
1923 | -class HTTPAuthHandler(AbstractAuthHandler): |
1924 | - """Custom http authentication handler. |
1925 | - |
1926 | - Send the authentication preventively to avoid the roundtrip |
1927 | - associated with the 401 error and keep the revelant info in |
1928 | - the auth request attribute. |
1929 | - """ |
1930 | - |
1931 | - auth_required_header = 'www-authenticate' |
1932 | - auth_header = 'Authorization' |
1933 | - |
1934 | - def get_auth(self, request): |
1935 | - """Get the auth params from the request""" |
1936 | - return request.auth |
1937 | - |
1938 | - def set_auth(self, request, auth): |
1939 | - """Set the auth params for the request""" |
1940 | - request.auth = auth |
1941 | - |
1942 | - def build_password_prompt(self, auth): |
1943 | - return self._build_password_prompt(auth) |
1944 | - |
1945 | - def build_username_prompt(self, auth): |
1946 | - return self._build_username_prompt(auth) |
1947 | - |
1948 | - def http_error_401(self, req, fp, code, msg, headers): |
1949 | - return self.auth_required(req, headers) |
1950 | - |
1951 | - |
1952 | -class ProxyAuthHandler(AbstractAuthHandler): |
1953 | - """Custom proxy authentication handler. |
1954 | - |
1955 | - Send the authentication preventively to avoid the roundtrip |
1956 | - associated with the 407 error and keep the revelant info in |
1957 | - the proxy_auth request attribute.. |
1958 | - """ |
1959 | - |
1960 | - auth_required_header = 'proxy-authenticate' |
1961 | - # FIXME: the correct capitalization is Proxy-Authorization, |
1962 | - # but python-2.4 urllib_request.Request insist on using capitalize() |
1963 | - # instead of title(). |
1964 | - auth_header = 'Proxy-authorization' |
1965 | - |
1966 | - def get_auth(self, request): |
1967 | - """Get the auth params from the request""" |
1968 | - return request.proxy_auth |
1969 | - |
1970 | - def set_auth(self, request, auth): |
1971 | - """Set the auth params for the request""" |
1972 | - request.proxy_auth = auth |
1973 | - |
1974 | - def build_password_prompt(self, auth): |
1975 | - prompt = self._build_password_prompt(auth) |
1976 | - prompt = u'Proxy ' + prompt |
1977 | - return prompt |
1978 | - |
1979 | - def build_username_prompt(self, auth): |
1980 | - prompt = self._build_username_prompt(auth) |
1981 | - prompt = u'Proxy ' + prompt |
1982 | - return prompt |
1983 | - |
1984 | - def http_error_407(self, req, fp, code, msg, headers): |
1985 | - return self.auth_required(req, headers) |
1986 | - |
1987 | - |
1988 | -class HTTPBasicAuthHandler(BasicAuthHandler, HTTPAuthHandler): |
1989 | - """Custom http basic authentication handler""" |
1990 | - |
1991 | - |
1992 | -class ProxyBasicAuthHandler(BasicAuthHandler, ProxyAuthHandler): |
1993 | - """Custom proxy basic authentication handler""" |
1994 | - |
1995 | - |
1996 | -class HTTPDigestAuthHandler(DigestAuthHandler, HTTPAuthHandler): |
1997 | - """Custom http basic authentication handler""" |
1998 | - |
1999 | - |
2000 | -class ProxyDigestAuthHandler(DigestAuthHandler, ProxyAuthHandler): |
2001 | - """Custom proxy basic authentication handler""" |
2002 | - |
2003 | - |
2004 | -class HTTPNegotiateAuthHandler(NegotiateAuthHandler, HTTPAuthHandler): |
2005 | - """Custom http negotiate authentication handler""" |
2006 | - |
2007 | - |
2008 | -class ProxyNegotiateAuthHandler(NegotiateAuthHandler, ProxyAuthHandler): |
2009 | - """Custom proxy negotiate authentication handler""" |
2010 | - |
2011 | - |
2012 | -class HTTPErrorProcessor(urllib_request.HTTPErrorProcessor): |
2013 | - """Process HTTP error responses. |
2014 | - |
2015 | - We don't really process the errors, quite the contrary |
2016 | - instead, we leave our Transport handle them. |
2017 | - """ |
2018 | - |
2019 | - accepted_errors = [200, # Ok |
2020 | - 201, |
2021 | - 202, |
2022 | - 204, |
2023 | - 206, # Partial content |
2024 | - 400, |
2025 | - 403, |
2026 | - 404, # Not found |
2027 | - 405, # Method not allowed |
2028 | - 406, # Not Acceptable |
2029 | - 409, # Conflict |
2030 | - 416, # Range not satisfiable |
2031 | - 422, # Unprocessible entity |
2032 | - 501, # Not implemented |
2033 | - ] |
2034 | - """The error codes the caller will handle. |
2035 | - |
2036 | - This can be specialized in the request on a case-by case basis, but the |
2037 | - common cases are covered here. |
2038 | - """ |
2039 | - |
2040 | - def http_response(self, request, response): |
2041 | - code, msg, hdrs = response.code, response.msg, response.info() |
2042 | - |
2043 | - if code not in self.accepted_errors: |
2044 | - response = self.parent.error('http', request, response, |
2045 | - code, msg, hdrs) |
2046 | - return response |
2047 | - |
2048 | - https_response = http_response |
2049 | - |
2050 | - |
2051 | -class HTTPDefaultErrorHandler(urllib_request.HTTPDefaultErrorHandler): |
2052 | - """Translate common errors into Breezy Exceptions""" |
2053 | - |
2054 | - def http_error_default(self, req, fp, code, msg, hdrs): |
2055 | - if code == 403: |
2056 | - raise errors.TransportError( |
2057 | - 'Server refuses to fulfill the request (403 Forbidden)' |
2058 | - ' for %s' % req.get_full_url()) |
2059 | - else: |
2060 | - raise errors.UnexpectedHttpStatus( |
2061 | - req.get_full_url(), code, |
2062 | - 'Unable to handle http code: %s' % msg) |
2063 | - |
2064 | - |
2065 | -class Opener(object): |
2066 | - """A wrapper around urllib_request.build_opener |
2067 | - |
2068 | - Daughter classes can override to build their own specific opener |
2069 | - """ |
2070 | - # TODO: Provides hooks for daughter classes. |
2071 | - |
2072 | - def __init__(self, |
2073 | - connection=ConnectionHandler, |
2074 | - redirect=HTTPRedirectHandler, |
2075 | - error=HTTPErrorProcessor, |
2076 | - report_activity=None, |
2077 | - ca_certs=None): |
2078 | - self._opener = urllib_request.build_opener( |
2079 | - connection(report_activity=report_activity, ca_certs=ca_certs), |
2080 | - redirect, error, |
2081 | - ProxyHandler(), |
2082 | - HTTPBasicAuthHandler(), |
2083 | - HTTPDigestAuthHandler(), |
2084 | - HTTPNegotiateAuthHandler(), |
2085 | - ProxyBasicAuthHandler(), |
2086 | - ProxyDigestAuthHandler(), |
2087 | - ProxyNegotiateAuthHandler(), |
2088 | - HTTPHandler, |
2089 | - HTTPSHandler, |
2090 | - HTTPDefaultErrorHandler, |
2091 | - ) |
2092 | - |
2093 | - self.open = self._opener.open |
2094 | - if DEBUG >= 9: |
2095 | - # When dealing with handler order, it's easy to mess |
2096 | - # things up, the following will help understand which |
2097 | - # handler is used, when and for what. |
2098 | - import pprint |
2099 | - pprint.pprint(self._opener.__dict__) |
2100 | - |
2101 | - |
2102 | -class HttpTransport(ConnectedTransport): |
2103 | - """HTTP Client implementations. |
2104 | - |
2105 | - The protocol can be given as e.g. http+urllib://host/ to use a particular |
2106 | - implementation. |
2107 | - """ |
2108 | - |
2109 | - # _unqualified_scheme: "http" or "https" |
2110 | - # _scheme: may have "+pycurl", etc |
2111 | - |
2112 | - # In order to debug we have to issue our traces in sync with |
2113 | - # httplib, which use print :( |
2114 | - _debuglevel = 0 |
2115 | - |
2116 | - def __init__(self, base, _from_transport=None, ca_certs=None): |
2117 | - """Set the base path where files will be stored.""" |
2118 | - proto_match = re.match(r'^(https?)(\+\w+)?://', base) |
2119 | - if not proto_match: |
2120 | - raise AssertionError("not a http url: %r" % base) |
2121 | - self._unqualified_scheme = proto_match.group(1) |
2122 | - super(HttpTransport, self).__init__( |
2123 | - base, _from_transport=_from_transport) |
2124 | - self._medium = None |
2125 | - # range hint is handled dynamically throughout the life |
2126 | - # of the transport object. We start by trying multi-range |
2127 | - # requests and if the server returns bogus results, we |
2128 | - # retry with single range requests and, finally, we |
2129 | - # forget about range if the server really can't |
2130 | - # understand. Once acquired, this piece of info is |
2131 | - # propagated to clones. |
2132 | - if _from_transport is not None: |
2133 | - self._range_hint = _from_transport._range_hint |
2134 | - self._opener = _from_transport._opener |
2135 | - else: |
2136 | - self._range_hint = 'multi' |
2137 | - self._opener = Opener( |
2138 | - report_activity=self._report_activity, ca_certs=ca_certs) |
2139 | - |
2140 | - def request(self, method, url, fields=None, headers=None, **urlopen_kw): |
2141 | - body = urlopen_kw.pop('body', None) |
2142 | - if fields is not None: |
2143 | - data = urlencode(fields).encode() |
2144 | - if body is not None: |
2145 | - raise ValueError( |
2146 | - 'body and fields are mutually exclusive') |
2147 | - else: |
2148 | - data = body |
2149 | - if headers is None: |
2150 | - headers = {} |
2151 | - request = Request(method, url, data, headers) |
2152 | - request.follow_redirections = (urlopen_kw.pop('retries', 0) > 0) |
2153 | - if urlopen_kw: |
2154 | - raise NotImplementedError( |
2155 | - 'unknown arguments: %r' % urlopen_kw.keys()) |
2156 | - connection = self._get_connection() |
2157 | - if connection is not None: |
2158 | - # Give back shared info |
2159 | - request.connection = connection |
2160 | - (auth, proxy_auth) = self._get_credentials() |
2161 | - # Clean the httplib.HTTPConnection pipeline in case the previous |
2162 | - # request couldn't do it |
2163 | - connection.cleanup_pipe() |
2164 | - else: |
2165 | - # First request, initialize credentials. |
2166 | - # scheme and realm will be set by the _urllib2_wrappers.AuthHandler |
2167 | - auth = self._create_auth() |
2168 | - # Proxy initialization will be done by the first proxied request |
2169 | - proxy_auth = dict() |
2170 | - # Ensure authentication info is provided |
2171 | - request.auth = auth |
2172 | - request.proxy_auth = proxy_auth |
2173 | - |
2174 | - if self._debuglevel > 0: |
2175 | - print('perform: %s base: %s, url: %s' % (request.method, self.base, |
2176 | - request.get_full_url())) |
2177 | - response = self._opener.open(request) |
2178 | - if self._get_connection() is not request.connection: |
2179 | - # First connection or reconnection |
2180 | - self._set_connection(request.connection, |
2181 | - (request.auth, request.proxy_auth)) |
2182 | - else: |
2183 | - # http may change the credentials while keeping the |
2184 | - # connection opened |
2185 | - self._update_credentials((request.auth, request.proxy_auth)) |
2186 | - |
2187 | - code = response.code |
2188 | - if (request.follow_redirections is False |
2189 | - and code in (301, 302, 303, 307, 308)): |
2190 | - raise errors.RedirectRequested(request.get_full_url(), |
2191 | - request.redirected_to, |
2192 | - is_permanent=(code in (301, 308))) |
2193 | - |
2194 | - if request.redirected_to is not None: |
2195 | - trace.mutter('redirected from: %s to: %s' % (request.get_full_url(), |
2196 | - request.redirected_to)) |
2197 | - |
2198 | - class Urllib3LikeResponse(object): |
2199 | - |
2200 | - def __init__(self, actual): |
2201 | - self._actual = actual |
2202 | - self._data = None |
2203 | - |
2204 | - def getheader(self, name, default=None): |
2205 | - if self._actual.headers is None: |
2206 | - raise http_client.ResponseNotReady() |
2207 | - if PY3: |
2208 | - return self._actual.headers.get(name, default) |
2209 | - else: |
2210 | - return self._actual.headers.getheader(name, default) |
2211 | - |
2212 | - def getheaders(self): |
2213 | - if self._actual.headers is None: |
2214 | - raise http_client.ResponseNotReady() |
2215 | - return list(self._actual.headers.items()) |
2216 | - |
2217 | - @property |
2218 | - def status(self): |
2219 | - return self._actual.code |
2220 | - |
2221 | - @property |
2222 | - def reason(self): |
2223 | - return self._actual.reason |
2224 | - |
2225 | - @property |
2226 | - def data(self): |
2227 | - if self._data is None: |
2228 | - self._data = self._actual.read() |
2229 | - return self._data |
2230 | - |
2231 | - @property |
2232 | - def text(self): |
2233 | - if self.status == 204: |
2234 | - return None |
2235 | - charset = cgi.parse_header( |
2236 | - self._actual.headers['Content-Type'])[1].get('charset') |
2237 | - if charset: |
2238 | - return self.data.decode(charset) |
2239 | - else: |
2240 | - return self.data.decode() |
2241 | - |
2242 | - def read(self, amt=None): |
2243 | - return self._actual.read(amt) |
2244 | - |
2245 | - def readlines(self): |
2246 | - return self._actual.readlines() |
2247 | - |
2248 | - def readline(self, size=-1): |
2249 | - return self._actual.readline(size) |
2250 | - |
2251 | - return Urllib3LikeResponse(response) |
2252 | - |
2253 | - def disconnect(self): |
2254 | - connection = self._get_connection() |
2255 | - if connection is not None: |
2256 | - connection.close() |
2257 | - |
2258 | - def has(self, relpath): |
2259 | - """Does the target location exist? |
2260 | - """ |
2261 | - response = self._head(relpath) |
2262 | - |
2263 | - code = response.status |
2264 | - if code == 200: # "ok", |
2265 | - return True |
2266 | - else: |
2267 | - return False |
2268 | - |
2269 | - def get(self, relpath): |
2270 | - """Get the file at the given relative path. |
2271 | - |
2272 | - :param relpath: The relative path to the file |
2273 | - """ |
2274 | - code, response_file = self._get(relpath, None) |
2275 | - return response_file |
2276 | - |
2277 | - def _get(self, relpath, offsets, tail_amount=0): |
2278 | - """Get a file, or part of a file. |
2279 | - |
2280 | - :param relpath: Path relative to transport base URL |
2281 | - :param offsets: None to get the whole file; |
2282 | - or a list of _CoalescedOffset to fetch parts of a file. |
2283 | - :param tail_amount: The amount to get from the end of the file. |
2284 | - |
2285 | - :returns: (http_code, result_file) |
2286 | - """ |
2287 | - abspath = self._remote_path(relpath) |
2288 | - headers = {} |
2289 | - if offsets or tail_amount: |
2290 | - range_header = self._attempted_range_header(offsets, tail_amount) |
2291 | - if range_header is not None: |
2292 | - bytes = 'bytes=' + range_header |
2293 | - headers = {'Range': bytes} |
2294 | - else: |
2295 | - range_header = None |
2296 | - |
2297 | - response = self.request('GET', abspath, headers=headers) |
2298 | - |
2299 | - if response.status == 404: # not found |
2300 | - raise errors.NoSuchFile(abspath) |
2301 | - elif response.status == 416: |
2302 | - # We don't know which, but one of the ranges we specified was |
2303 | - # wrong. |
2304 | - raise errors.InvalidHttpRange(abspath, range_header, |
2305 | - 'Server return code %d' % response.status) |
2306 | - elif response.status == 400: |
2307 | - if range_header: |
2308 | - # We don't know which, but one of the ranges we specified was |
2309 | - # wrong. |
2310 | - raise errors.InvalidHttpRange( |
2311 | - abspath, range_header, |
2312 | - 'Server return code %d' % response.status) |
2313 | - else: |
2314 | - raise errors.BadHttpRequest(abspath, response.reason) |
2315 | - elif response.status not in (200, 206): |
2316 | - raise errors.UnexpectedHttpStatus(abspath, response.status) |
2317 | - |
2318 | - data = handle_response( |
2319 | - abspath, response.status, response.getheader, response) |
2320 | - return response.status, data |
2321 | - |
2322 | - def _remote_path(self, relpath): |
2323 | - """See ConnectedTransport._remote_path. |
2324 | - |
2325 | - user and passwords are not embedded in the path provided to the server. |
2326 | - """ |
2327 | - url = self._parsed_url.clone(relpath) |
2328 | - url.user = url.quoted_user = None |
2329 | - url.password = url.quoted_password = None |
2330 | - url.scheme = self._unqualified_scheme |
2331 | - return str(url) |
2332 | - |
2333 | - def _create_auth(self): |
2334 | - """Returns a dict containing the credentials provided at build time.""" |
2335 | - auth = dict(host=self._parsed_url.host, port=self._parsed_url.port, |
2336 | - user=self._parsed_url.user, password=self._parsed_url.password, |
2337 | - protocol=self._unqualified_scheme, |
2338 | - path=self._parsed_url.path) |
2339 | - return auth |
2340 | - |
2341 | - def get_smart_medium(self): |
2342 | - """See Transport.get_smart_medium.""" |
2343 | - if self._medium is None: |
2344 | - # Since medium holds some state (smart server probing at least), we |
2345 | - # need to keep it around. Note that this is needed because medium |
2346 | - # has the same 'base' attribute as the transport so it can't be |
2347 | - # shared between transports having different bases. |
2348 | - self._medium = SmartClientHTTPMedium(self) |
2349 | - return self._medium |
2350 | - |
2351 | - def _degrade_range_hint(self, relpath, ranges): |
2352 | - if self._range_hint == 'multi': |
2353 | - self._range_hint = 'single' |
2354 | - mutter('Retry "%s" with single range request' % relpath) |
2355 | - elif self._range_hint == 'single': |
2356 | - self._range_hint = None |
2357 | - mutter('Retry "%s" without ranges' % relpath) |
2358 | - else: |
2359 | - # We tried all the tricks, but nothing worked, caller must reraise. |
2360 | - return False |
2361 | - return True |
2362 | - |
2363 | - # _coalesce_offsets is a helper for readv, it try to combine ranges without |
2364 | - # degrading readv performances. _bytes_to_read_before_seek is the value |
2365 | - # used for the limit parameter and has been tuned for other transports. For |
2366 | - # HTTP, the name is inappropriate but the parameter is still useful and |
2367 | - # helps reduce the number of chunks in the response. The overhead for a |
2368 | - # chunk (headers, length, footer around the data itself is variable but |
2369 | - # around 50 bytes. We use 128 to reduce the range specifiers that appear in |
2370 | - # the header, some servers (notably Apache) enforce a maximum length for a |
2371 | - # header and issue a '400: Bad request' error when too much ranges are |
2372 | - # specified. |
2373 | - _bytes_to_read_before_seek = 128 |
2374 | - # No limit on the offset number that get combined into one, we are trying |
2375 | - # to avoid downloading the whole file. |
2376 | - _max_readv_combine = 0 |
2377 | - # By default Apache has a limit of ~400 ranges before replying with a 400 |
2378 | - # Bad Request. So we go underneath that amount to be safe. |
2379 | - _max_get_ranges = 200 |
2380 | - # We impose no limit on the range size. But see _pycurl.py for a different |
2381 | - # use. |
2382 | - _get_max_size = 0 |
2383 | - |
2384 | - def _readv(self, relpath, offsets): |
2385 | - """Get parts of the file at the given relative path. |
2386 | - |
2387 | - :param offsets: A list of (offset, size) tuples. |
2388 | - :param return: A list or generator of (offset, data) tuples |
2389 | - """ |
2390 | - # offsets may be a generator, we will iterate it several times, so |
2391 | - # build a list |
2392 | - offsets = list(offsets) |
2393 | - |
2394 | - try_again = True |
2395 | - retried_offset = None |
2396 | - while try_again: |
2397 | - try_again = False |
2398 | - |
2399 | - # Coalesce the offsets to minimize the GET requests issued |
2400 | - sorted_offsets = sorted(offsets) |
2401 | - coalesced = self._coalesce_offsets( |
2402 | - sorted_offsets, limit=self._max_readv_combine, |
2403 | - fudge_factor=self._bytes_to_read_before_seek, |
2404 | - max_size=self._get_max_size) |
2405 | - |
2406 | - # Turn it into a list, we will iterate it several times |
2407 | - coalesced = list(coalesced) |
2408 | - if 'http' in debug.debug_flags: |
2409 | - mutter('http readv of %s offsets => %s collapsed %s', |
2410 | - relpath, len(offsets), len(coalesced)) |
2411 | - |
2412 | - # Cache the data read, but only until it's been used |
2413 | - data_map = {} |
2414 | - # We will iterate on the data received from the GET requests and |
2415 | - # serve the corresponding offsets respecting the initial order. We |
2416 | - # need an offset iterator for that. |
2417 | - iter_offsets = iter(offsets) |
2418 | - try: |
2419 | - cur_offset_and_size = next(iter_offsets) |
2420 | - except StopIteration: |
2421 | - return |
2422 | - |
2423 | - try: |
2424 | - for cur_coal, rfile in self._coalesce_readv(relpath, coalesced): |
2425 | - # Split the received chunk |
2426 | - for offset, size in cur_coal.ranges: |
2427 | - start = cur_coal.start + offset |
2428 | - rfile.seek(start, os.SEEK_SET) |
2429 | - data = rfile.read(size) |
2430 | - data_len = len(data) |
2431 | - if data_len != size: |
2432 | - raise errors.ShortReadvError(relpath, start, size, |
2433 | - actual=data_len) |
2434 | - if (start, size) == cur_offset_and_size: |
2435 | - # The offset requested are sorted as the coalesced |
2436 | - # ones, no need to cache. Win ! |
2437 | - yield cur_offset_and_size[0], data |
2438 | - try: |
2439 | - cur_offset_and_size = next(iter_offsets) |
2440 | - except StopIteration: |
2441 | - return |
2442 | - else: |
2443 | - # Different sorting. We need to cache. |
2444 | - data_map[(start, size)] = data |
2445 | - |
2446 | - # Yield everything we can |
2447 | - while cur_offset_and_size in data_map: |
2448 | - # Clean the cached data since we use it |
2449 | - # XXX: will break if offsets contains duplicates -- |
2450 | - # vila20071129 |
2451 | - this_data = data_map.pop(cur_offset_and_size) |
2452 | - yield cur_offset_and_size[0], this_data |
2453 | - try: |
2454 | - cur_offset_and_size = next(iter_offsets) |
2455 | - except StopIteration: |
2456 | - return |
2457 | - |
2458 | - except (errors.ShortReadvError, errors.InvalidRange, |
2459 | - errors.InvalidHttpRange, errors.HttpBoundaryMissing) as e: |
2460 | - mutter('Exception %r: %s during http._readv', e, e) |
2461 | - if (not isinstance(e, errors.ShortReadvError) |
2462 | - or retried_offset == cur_offset_and_size): |
2463 | - # We don't degrade the range hint for ShortReadvError since |
2464 | - # they do not indicate a problem with the server ability to |
2465 | - # handle ranges. Except when we fail to get back a required |
2466 | - # offset twice in a row. In that case, falling back to |
2467 | - # single range or whole file should help. |
2468 | - if not self._degrade_range_hint(relpath, coalesced): |
2469 | - raise |
2470 | - # Some offsets may have been already processed, so we retry |
2471 | - # only the unsuccessful ones. |
2472 | - offsets = [cur_offset_and_size] + [o for o in iter_offsets] |
2473 | - retried_offset = cur_offset_and_size |
2474 | - try_again = True |
2475 | - |
2476 | - def _coalesce_readv(self, relpath, coalesced): |
2477 | - """Issue several GET requests to satisfy the coalesced offsets""" |
2478 | - |
2479 | - def get_and_yield(relpath, coalesced): |
2480 | - if coalesced: |
2481 | - # Note that the _get below may raise |
2482 | - # errors.InvalidHttpRange. It's the caller's responsibility to |
2483 | - # decide how to retry since it may provide different coalesced |
2484 | - # offsets. |
2485 | - code, rfile = self._get(relpath, coalesced) |
2486 | - for coal in coalesced: |
2487 | - yield coal, rfile |
2488 | - |
2489 | - if self._range_hint is None: |
2490 | - # Download whole file |
2491 | - for c, rfile in get_and_yield(relpath, coalesced): |
2492 | - yield c, rfile |
2493 | - else: |
2494 | - total = len(coalesced) |
2495 | - if self._range_hint == 'multi': |
2496 | - max_ranges = self._max_get_ranges |
2497 | - elif self._range_hint == 'single': |
2498 | - max_ranges = total |
2499 | - else: |
2500 | - raise AssertionError("Unknown _range_hint %r" |
2501 | - % (self._range_hint,)) |
2502 | - # TODO: Some web servers may ignore the range requests and return |
2503 | - # the whole file, we may want to detect that and avoid further |
2504 | - # requests. |
2505 | - # Hint: test_readv_multiple_get_requests will fail once we do that |
2506 | - cumul = 0 |
2507 | - ranges = [] |
2508 | - for coal in coalesced: |
2509 | - if ((self._get_max_size > 0 |
2510 | - and cumul + coal.length > self._get_max_size) or |
2511 | - len(ranges) >= max_ranges): |
2512 | - # Get that much and yield |
2513 | - for c, rfile in get_and_yield(relpath, ranges): |
2514 | - yield c, rfile |
2515 | - # Restart with the current offset |
2516 | - ranges = [coal] |
2517 | - cumul = coal.length |
2518 | - else: |
2519 | - ranges.append(coal) |
2520 | - cumul += coal.length |
2521 | - # Get the rest and yield |
2522 | - for c, rfile in get_and_yield(relpath, ranges): |
2523 | - yield c, rfile |
2524 | - |
2525 | - def recommended_page_size(self): |
2526 | - """See Transport.recommended_page_size(). |
2527 | - |
2528 | - For HTTP we suggest a large page size to reduce the overhead |
2529 | - introduced by latency. |
2530 | - """ |
2531 | - return 64 * 1024 |
2532 | - |
2533 | - def _post(self, body_bytes): |
2534 | - """POST body_bytes to .bzr/smart on this transport. |
2535 | - |
2536 | - :returns: (response code, response body file-like object). |
2537 | - """ |
2538 | - # TODO: Requiring all the body_bytes to be available at the beginning of |
2539 | - # the POST may require large client buffers. It would be nice to have |
2540 | - # an interface that allows streaming via POST when possible (and |
2541 | - # degrades to a local buffer when not). |
2542 | - abspath = self._remote_path('.bzr/smart') |
2543 | - response = self.request( |
2544 | - 'POST', abspath, body=body_bytes, |
2545 | - headers={'Content-Type': 'application/octet-stream'}) |
2546 | - if response.status not in (200, 403): |
2547 | - raise errors.UnexpectedHttpStatus(abspath, response.status) |
2548 | - code = response.status |
2549 | - data = handle_response( |
2550 | - abspath, code, response.getheader, response) |
2551 | - return code, data |
2552 | - |
2553 | - def _head(self, relpath): |
2554 | - """Request the HEAD of a file. |
2555 | - |
2556 | - Performs the request and leaves callers handle the results. |
2557 | - """ |
2558 | - abspath = self._remote_path(relpath) |
2559 | - response = self.request('HEAD', abspath) |
2560 | - if response.status not in (200, 404): |
2561 | - raise errors.UnexpectedHttpStatus(abspath, response.status) |
2562 | - |
2563 | - return response |
2564 | - |
2565 | - raise NotImplementedError(self._post) |
2566 | - |
2567 | - def put_file(self, relpath, f, mode=None): |
2568 | - """Copy the file-like object into the location. |
2569 | - |
2570 | - :param relpath: Location to put the contents, relative to base. |
2571 | - :param f: File-like object. |
2572 | - """ |
2573 | - raise errors.TransportNotPossible('http PUT not supported') |
2574 | - |
2575 | - def mkdir(self, relpath, mode=None): |
2576 | - """Create a directory at the given path.""" |
2577 | - raise errors.TransportNotPossible('http does not support mkdir()') |
2578 | - |
2579 | - def rmdir(self, relpath): |
2580 | - """See Transport.rmdir.""" |
2581 | - raise errors.TransportNotPossible('http does not support rmdir()') |
2582 | - |
2583 | - def append_file(self, relpath, f, mode=None): |
2584 | - """Append the text in the file-like object into the final |
2585 | - location. |
2586 | - """ |
2587 | - raise errors.TransportNotPossible('http does not support append()') |
2588 | - |
2589 | - def copy(self, rel_from, rel_to): |
2590 | - """Copy the item at rel_from to the location at rel_to""" |
2591 | - raise errors.TransportNotPossible('http does not support copy()') |
2592 | - |
2593 | - def copy_to(self, relpaths, other, mode=None, pb=None): |
2594 | - """Copy a set of entries from self into another Transport. |
2595 | - |
2596 | - :param relpaths: A list/generator of entries to be copied. |
2597 | - |
2598 | - TODO: if other is LocalTransport, is it possible to |
2599 | - do better than put(get())? |
2600 | - """ |
2601 | - # At this point HttpTransport might be able to check and see if |
2602 | - # the remote location is the same, and rather than download, and |
2603 | - # then upload, it could just issue a remote copy_this command. |
2604 | - if isinstance(other, HttpTransport): |
2605 | - raise errors.TransportNotPossible( |
2606 | - 'http cannot be the target of copy_to()') |
2607 | - else: |
2608 | - return super(HttpTransport, self).\ |
2609 | - copy_to(relpaths, other, mode=mode, pb=pb) |
2610 | - |
2611 | - def move(self, rel_from, rel_to): |
2612 | - """Move the item at rel_from to the location at rel_to""" |
2613 | - raise errors.TransportNotPossible('http does not support move()') |
2614 | - |
2615 | - def delete(self, relpath): |
2616 | - """Delete the item at relpath""" |
2617 | - raise errors.TransportNotPossible('http does not support delete()') |
2618 | - |
2619 | - def external_url(self): |
2620 | - """See breezy.transport.Transport.external_url.""" |
2621 | - # HTTP URL's are externally usable as long as they don't mention their |
2622 | - # implementation qualifier |
2623 | - url = self._parsed_url.clone() |
2624 | - url.scheme = self._unqualified_scheme |
2625 | - return str(url) |
2626 | - |
2627 | - def is_readonly(self): |
2628 | - """See Transport.is_readonly.""" |
2629 | - return True |
2630 | - |
2631 | - def listable(self): |
2632 | - """See Transport.listable.""" |
2633 | - return False |
2634 | - |
2635 | - def stat(self, relpath): |
2636 | - """Return the stat information for a file. |
2637 | - """ |
2638 | - raise errors.TransportNotPossible('http does not support stat()') |
2639 | - |
2640 | - def lock_read(self, relpath): |
2641 | - """Lock the given file for shared (read) access. |
2642 | - :return: A lock object, which should be passed to Transport.unlock() |
2643 | - """ |
2644 | - # The old RemoteBranch ignore lock for reading, so we will |
2645 | - # continue that tradition and return a bogus lock object. |
2646 | - class BogusLock(object): |
2647 | - def __init__(self, path): |
2648 | - self.path = path |
2649 | - |
2650 | - def unlock(self): |
2651 | - pass |
2652 | - return BogusLock(relpath) |
2653 | - |
2654 | - def lock_write(self, relpath): |
2655 | - """Lock the given file for exclusive (write) access. |
2656 | - WARNING: many transports do not support this, so trying avoid using it |
2657 | - |
2658 | - :return: A lock object, which should be passed to Transport.unlock() |
2659 | - """ |
2660 | - raise errors.TransportNotPossible('http does not support lock_write()') |
2661 | - |
2662 | - def _attempted_range_header(self, offsets, tail_amount): |
2663 | - """Prepare a HTTP Range header at a level the server should accept. |
2664 | - |
2665 | - :return: the range header representing offsets/tail_amount or None if |
2666 | - no header can be built. |
2667 | - """ |
2668 | - |
2669 | - if self._range_hint == 'multi': |
2670 | - # Generate the header describing all offsets |
2671 | - return self._range_header(offsets, tail_amount) |
2672 | - elif self._range_hint == 'single': |
2673 | - # Combine all the requested ranges into a single |
2674 | - # encompassing one |
2675 | - if len(offsets) > 0: |
2676 | - if tail_amount not in (0, None): |
2677 | - # Nothing we can do here to combine ranges with tail_amount |
2678 | - # in a single range, just returns None. The whole file |
2679 | - # should be downloaded. |
2680 | - return None |
2681 | - else: |
2682 | - start = offsets[0].start |
2683 | - last = offsets[-1] |
2684 | - end = last.start + last.length - 1 |
2685 | - whole = self._coalesce_offsets([(start, end - start + 1)], |
2686 | - limit=0, fudge_factor=0) |
2687 | - return self._range_header(list(whole), 0) |
2688 | - else: |
2689 | - # Only tail_amount, requested, leave range_header |
2690 | - # do its work |
2691 | - return self._range_header(offsets, tail_amount) |
2692 | - else: |
2693 | - return None |
2694 | - |
2695 | - @staticmethod |
2696 | - def _range_header(ranges, tail_amount): |
2697 | - """Turn a list of bytes ranges into a HTTP Range header value. |
2698 | - |
2699 | - :param ranges: A list of _CoalescedOffset |
2700 | - :param tail_amount: The amount to get from the end of the file. |
2701 | - |
2702 | - :return: HTTP range header string. |
2703 | - |
2704 | - At least a non-empty ranges *or* a tail_amount must be |
2705 | - provided. |
2706 | - """ |
2707 | - strings = [] |
2708 | - for offset in ranges: |
2709 | - strings.append('%d-%d' % (offset.start, |
2710 | - offset.start + offset.length - 1)) |
2711 | - |
2712 | - if tail_amount: |
2713 | - strings.append('-%d' % tail_amount) |
2714 | - |
2715 | - return ','.join(strings) |
2716 | - |
2717 | - def _redirected_to(self, source, target): |
2718 | - """Returns a transport suitable to re-issue a redirected request. |
2719 | - |
2720 | - :param source: The source url as returned by the server. |
2721 | - :param target: The target url as returned by the server. |
2722 | - |
2723 | - The redirection can be handled only if the relpath involved is not |
2724 | - renamed by the redirection. |
2725 | - |
2726 | - :returns: A transport |
2727 | - :raise UnusableRedirect: when the URL can not be reinterpreted |
2728 | - """ |
2729 | - parsed_source = self._split_url(source) |
2730 | - parsed_target = self._split_url(target) |
2731 | - pl = len(self._parsed_url.path) |
2732 | - # determine the excess tail - the relative path that was in |
2733 | - # the original request but not part of this transports' URL. |
2734 | - excess_tail = parsed_source.path[pl:].strip("/") |
2735 | - if not parsed_target.path.endswith(excess_tail): |
2736 | - # The final part of the url has been renamed, we can't handle the |
2737 | - # redirection. |
2738 | - raise UnusableRedirect( |
2739 | - source, target, "final part of the url was renamed") |
2740 | - |
2741 | - target_path = parsed_target.path |
2742 | - if excess_tail: |
2743 | - # Drop the tail that was in the redirect but not part of |
2744 | - # the path of this transport. |
2745 | - target_path = target_path[:-len(excess_tail)] |
2746 | - |
2747 | - if parsed_target.scheme in ('http', 'https'): |
2748 | - # Same protocol family (i.e. http[s]), we will preserve the same |
2749 | - # http client implementation when a redirection occurs from one to |
2750 | - # the other (otherwise users may be surprised that bzr switches |
2751 | - # from one implementation to the other, and devs may suffer |
2752 | - # debugging it). |
2753 | - if (parsed_target.scheme == self._unqualified_scheme |
2754 | - and parsed_target.host == self._parsed_url.host |
2755 | - and parsed_target.port == self._parsed_url.port |
2756 | - and (parsed_target.user is None or |
2757 | - parsed_target.user == self._parsed_url.user)): |
2758 | - # If a user is specified, it should match, we don't care about |
2759 | - # passwords, wrong passwords will be rejected anyway. |
2760 | - return self.clone(target_path) |
2761 | - else: |
2762 | - # Rebuild the url preserving the scheme qualification and the |
2763 | - # credentials (if they don't apply, the redirected to server |
2764 | - # will tell us, but if they do apply, we avoid prompting the |
2765 | - # user) |
2766 | - redir_scheme = parsed_target.scheme |
2767 | - new_url = self._unsplit_url(redir_scheme, |
2768 | - self._parsed_url.user, |
2769 | - self._parsed_url.password, |
2770 | - parsed_target.host, parsed_target.port, |
2771 | - target_path) |
2772 | - return transport.get_transport_from_url(new_url) |
2773 | - else: |
2774 | - # Redirected to a different protocol |
2775 | - new_url = self._unsplit_url(parsed_target.scheme, |
2776 | - parsed_target.user, |
2777 | - parsed_target.password, |
2778 | - parsed_target.host, parsed_target.port, |
2779 | - target_path) |
2780 | - return transport.get_transport_from_url(new_url) |
2781 | - |
2782 | - def _options(self, relpath): |
2783 | - abspath = self._remote_path(relpath) |
2784 | - resp = self.request('OPTIONS', abspath) |
2785 | - if resp.status == 404: |
2786 | - raise errors.NoSuchFile(abspath) |
2787 | - if resp.status in (403, 405): |
2788 | - raise errors.InvalidHttpResponse( |
2789 | - abspath, |
2790 | - "OPTIONS not supported or forbidden for remote URL") |
2791 | - return resp.getheaders() |
2792 | - |
2793 | - |
2794 | -# TODO: May be better located in smart/medium.py with the other |
2795 | -# SmartMedium classes |
2796 | -class SmartClientHTTPMedium(medium.SmartClientMedium): |
2797 | - |
2798 | - def __init__(self, http_transport): |
2799 | - super(SmartClientHTTPMedium, self).__init__(http_transport.base) |
2800 | - # We don't want to create a circular reference between the http |
2801 | - # transport and its associated medium. Since the transport will live |
2802 | - # longer than the medium, the medium keep only a weak reference to its |
2803 | - # transport. |
2804 | - self._http_transport_ref = weakref.ref(http_transport) |
2805 | - |
2806 | - def get_request(self): |
2807 | - return SmartClientHTTPMediumRequest(self) |
2808 | - |
2809 | - def should_probe(self): |
2810 | - return True |
2811 | - |
2812 | - def remote_path_from_transport(self, transport): |
2813 | - # Strip the optional 'bzr+' prefix from transport so it will have the |
2814 | - # same scheme as self. |
2815 | - transport_base = transport.base |
2816 | - if transport_base.startswith('bzr+'): |
2817 | - transport_base = transport_base[4:] |
2818 | - rel_url = urlutils.relative_url(self.base, transport_base) |
2819 | - return urlutils.unquote(rel_url) |
2820 | - |
2821 | - def send_http_smart_request(self, bytes): |
2822 | - try: |
2823 | - # Get back the http_transport hold by the weak reference |
2824 | - t = self._http_transport_ref() |
2825 | - code, body_filelike = t._post(bytes) |
2826 | - if code != 200: |
2827 | - raise errors.UnexpectedHttpStatus( |
2828 | - t._remote_path('.bzr/smart'), code) |
2829 | - except (errors.InvalidHttpResponse, errors.ConnectionReset) as e: |
2830 | - raise errors.SmartProtocolError(str(e)) |
2831 | - return body_filelike |
2832 | - |
2833 | - def _report_activity(self, bytes, direction): |
2834 | - """See SmartMedium._report_activity. |
2835 | - |
2836 | - Does nothing; the underlying plain HTTP transport will report the |
2837 | - activity that this medium would report. |
2838 | - """ |
2839 | - pass |
2840 | - |
2841 | - def disconnect(self): |
2842 | - """See SmartClientMedium.disconnect().""" |
2843 | - t = self._http_transport_ref() |
2844 | - t.disconnect() |
2845 | - |
2846 | - |
2847 | -# TODO: May be better located in smart/medium.py with the other |
2848 | -# SmartMediumRequest classes |
2849 | -class SmartClientHTTPMediumRequest(medium.SmartClientMediumRequest): |
2850 | - """A SmartClientMediumRequest that works with an HTTP medium.""" |
2851 | - |
2852 | - def __init__(self, client_medium): |
2853 | - medium.SmartClientMediumRequest.__init__(self, client_medium) |
2854 | - self._buffer = b'' |
2855 | - |
2856 | - def _accept_bytes(self, bytes): |
2857 | - self._buffer += bytes |
2858 | - |
2859 | - def _finished_writing(self): |
2860 | - data = self._medium.send_http_smart_request(self._buffer) |
2861 | - self._response_body = data |
2862 | - |
2863 | - def _read_bytes(self, count): |
2864 | - """See SmartClientMediumRequest._read_bytes.""" |
2865 | - return self._response_body.read(count) |
2866 | - |
2867 | - def _read_line(self): |
2868 | - line, excess = medium._get_line(self._response_body.read) |
2869 | - if excess != b'': |
2870 | - raise AssertionError( |
2871 | - '_get_line returned excess bytes, but this mediumrequest ' |
2872 | - 'cannot handle excess. (%r)' % (excess,)) |
2873 | - return line |
2874 | - |
2875 | - def _finished_reading(self): |
2876 | - """See SmartClientMediumRequest._finished_reading.""" |
2877 | - pass |
2878 | - |
2879 | - |
2880 | -def unhtml_roughly(maybe_html, length_limit=1000): |
2881 | - """Very approximate html->text translation, for presenting error bodies. |
2882 | - |
2883 | - :param length_limit: Truncate the result to this many characters. |
2884 | - |
2885 | - >>> unhtml_roughly("<b>bad</b> things happened\\n") |
2886 | - ' bad things happened ' |
2887 | - """ |
2888 | - return re.subn(r"(<[^>]*>|\n| )", " ", maybe_html)[0][:length_limit] |
2889 | - |
2890 | - |
2891 | -def get_test_permutations(): |
2892 | - """Return the permutations to be used in testing.""" |
2893 | - from breezy.tests import ( |
2894 | - features, |
2895 | - http_server, |
2896 | - ) |
2897 | - permutations = [(HttpTransport, http_server.HttpServer), ] |
2898 | - if features.HTTPSServerFeature.available(): |
2899 | - from breezy.tests import ( |
2900 | - https_server, |
2901 | - ssl_certs, |
2902 | - ) |
2903 | - |
2904 | - class HTTPS_transport(HttpTransport): |
2905 | - |
2906 | - def __init__(self, base, _from_transport=None): |
2907 | - super(HTTPS_transport, self).__init__( |
2908 | - base, _from_transport=_from_transport, |
2909 | - ca_certs=ssl_certs.build_path('ca.crt')) |
2910 | - |
2911 | - permutations.append((HTTPS_transport, |
2912 | - https_server.HTTPSServer)) |
2913 | - return permutations |
2914 | |
2915 | === added file 'breezy/transport/http/urllib.py' |
2916 | --- breezy/transport/http/urllib.py 1970-01-01 00:00:00 +0000 |
2917 | +++ breezy/transport/http/urllib.py 2020-12-27 18:07:47 +0000 |
2918 | @@ -0,0 +1,2626 @@ |
2919 | +# Copyright (C) 2005-2010 Canonical Ltd |
2920 | +# |
2921 | +# This program is free software; you can redistribute it and/or modify |
2922 | +# it under the terms of the GNU General Public License as published by |
2923 | +# the Free Software Foundation; either version 2 of the License, or |
2924 | +# (at your option) any later version. |
2925 | +# |
2926 | +# This program is distributed in the hope that it will be useful, |
2927 | +# but WITHOUT ANY WARRANTY; without even the implied warranty of |
2928 | +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
2929 | +# GNU General Public License for more details. |
2930 | +# |
2931 | +# You should have received a copy of the GNU General Public License |
2932 | +# along with this program; if not, write to the Free Software |
2933 | +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
2934 | + |
2935 | +"""Base implementation of Transport over http using urllib. |
2936 | + |
2937 | +There are separate implementation modules for each http client implementation. |
2938 | +""" |
2939 | + |
2940 | +from __future__ import absolute_import |
2941 | + |
2942 | +DEBUG = 0 |
2943 | + |
2944 | +import base64 |
2945 | +import cgi |
2946 | +import errno |
2947 | +import os |
2948 | +import re |
2949 | +import socket |
2950 | +import ssl |
2951 | +import sys |
2952 | +import time |
2953 | +import urllib |
2954 | +import weakref |
2955 | + |
2956 | +try: |
2957 | + import http.client as http_client |
2958 | +except ImportError: |
2959 | + import httplib as http_client |
2960 | +try: |
2961 | + import urllib.request as urllib_request |
2962 | +except ImportError: # python < 3 |
2963 | + import urllib2 as urllib_request |
2964 | +try: |
2965 | + from urllib.parse import urljoin, splitport, splittype, splithost, urlencode |
2966 | +except ImportError: |
2967 | + from urlparse import urljoin |
2968 | + from urllib import splitport, splittype, splithost, urlencode |
2969 | + |
2970 | +# TODO: handle_response should be integrated into the http/__init__.py |
2971 | +from .response import handle_response |
2972 | + |
2973 | +# FIXME: Oversimplifying, two kind of exceptions should be |
2974 | +# raised, once a request is issued: URLError before we have been |
2975 | +# able to process the response, HTTPError after that. Process the |
2976 | +# response means we are able to leave the socket clean, so if we |
2977 | +# are not able to do that, we should close the connection. The |
2978 | +# actual code more or less do that, tests should be written to |
2979 | +# ensure that. |
2980 | + |
2981 | +from ... import __version__ as breezy_version |
2982 | +from ... import ( |
2983 | + config, |
2984 | + debug, |
2985 | + errors, |
2986 | + lazy_import, |
2987 | + osutils, |
2988 | + trace, |
2989 | + transport, |
2990 | + ui, |
2991 | + urlutils, |
2992 | +) |
2993 | +from ...bzr.smart import medium |
2994 | +from ...sixish import ( |
2995 | + PY3, |
2996 | + reraise, |
2997 | + text_type, |
2998 | +) |
2999 | +from ...trace import mutter |
3000 | +from ...transport import ( |
3001 | + ConnectedTransport, |
3002 | + UnusableRedirect, |
3003 | + ) |
3004 | + |
3005 | +from . import default_user_agent, ssl |
3006 | + |
3007 | + |
3008 | +checked_kerberos = False |
3009 | +kerberos = None |
3010 | + |
3011 | + |
3012 | +class addinfourl(urllib_request.addinfourl): |
3013 | + '''Replacement addinfourl class compatible with python-2.7's xmlrpclib |
3014 | + |
3015 | + In python-2.7, xmlrpclib expects that the response object that it receives |
3016 | + has a getheader method. http_client.HTTPResponse provides this but |
3017 | + urllib_request.addinfourl does not. Add the necessary functions here, ported to |
3018 | + use the internal data structures of addinfourl. |
3019 | + ''' |
3020 | + |
3021 | + def getheader(self, name, default=None): |
3022 | + if self.headers is None: |
3023 | + raise http_client.ResponseNotReady() |
3024 | + return self.headers.getheader(name, default) |
3025 | + |
3026 | + def getheaders(self): |
3027 | + if self.headers is None: |
3028 | + raise http_client.ResponseNotReady() |
3029 | + return list(self.headers.items()) |
3030 | + |
3031 | + |
3032 | +class _ReportingFileSocket(object): |
3033 | + |
3034 | + def __init__(self, filesock, report_activity=None): |
3035 | + self.filesock = filesock |
3036 | + self._report_activity = report_activity |
3037 | + |
3038 | + def report_activity(self, size, direction): |
3039 | + if self._report_activity: |
3040 | + self._report_activity(size, direction) |
3041 | + |
3042 | + def read(self, size=1): |
3043 | + s = self.filesock.read(size) |
3044 | + self.report_activity(len(s), 'read') |
3045 | + return s |
3046 | + |
3047 | + def readline(self, size=-1): |
3048 | + s = self.filesock.readline(size) |
3049 | + self.report_activity(len(s), 'read') |
3050 | + return s |
3051 | + |
3052 | + def readinto(self, b): |
3053 | + s = self.filesock.readinto(b) |
3054 | + self.report_activity(s, 'read') |
3055 | + return s |
3056 | + |
3057 | + def __getattr__(self, name): |
3058 | + return getattr(self.filesock, name) |
3059 | + |
3060 | + |
3061 | +class _ReportingSocket(object): |
3062 | + |
3063 | + def __init__(self, sock, report_activity=None): |
3064 | + self.sock = sock |
3065 | + self._report_activity = report_activity |
3066 | + |
3067 | + def report_activity(self, size, direction): |
3068 | + if self._report_activity: |
3069 | + self._report_activity(size, direction) |
3070 | + |
3071 | + def sendall(self, s, *args): |
3072 | + self.sock.sendall(s, *args) |
3073 | + self.report_activity(len(s), 'write') |
3074 | + |
3075 | + def recv(self, *args): |
3076 | + s = self.sock.recv(*args) |
3077 | + self.report_activity(len(s), 'read') |
3078 | + return s |
3079 | + |
3080 | + def makefile(self, mode='r', bufsize=-1): |
3081 | + # http_client creates a fileobject that doesn't do buffering, which |
3082 | + # makes fp.readline() very expensive because it only reads one byte |
3083 | + # at a time. So we wrap the socket in an object that forces |
3084 | + # sock.makefile to make a buffered file. |
3085 | + fsock = self.sock.makefile(mode, 65536) |
3086 | + # And wrap that into a reporting kind of fileobject |
3087 | + return _ReportingFileSocket(fsock, self._report_activity) |
3088 | + |
3089 | + def __getattr__(self, name): |
3090 | + return getattr(self.sock, name) |
3091 | + |
3092 | + |
3093 | +# We define our own Response class to keep our http_client pipe clean |
3094 | +class Response(http_client.HTTPResponse): |
3095 | + """Custom HTTPResponse, to avoid the need to decorate. |
3096 | + |
3097 | + http_client prefers to decorate the returned objects, rather |
3098 | + than using a custom object. |
3099 | + """ |
3100 | + |
3101 | + # Some responses have bodies in which we have no interest |
3102 | + _body_ignored_responses = [301, 302, 303, 307, 308, 400, 401, 403, 404, 501] |
3103 | + |
3104 | + # in finish() below, we may have to discard several MB in the worst |
3105 | + # case. To avoid buffering that much, we read and discard by chunks |
3106 | + # instead. The underlying file is either a socket or a StringIO, so reading |
3107 | + # 8k chunks should be fine. |
3108 | + _discarded_buf_size = 8192 |
3109 | + |
3110 | + if PY3: |
3111 | + def __init__(self, sock, debuglevel=0, method=None, url=None): |
3112 | + self.url = url |
3113 | + super(Response, self).__init__( |
3114 | + sock, debuglevel=debuglevel, method=method, url=url) |
3115 | + |
3116 | + def begin(self): |
3117 | + """Begin to read the response from the server. |
3118 | + |
3119 | + http_client assumes that some responses get no content and do |
3120 | + not even attempt to read the body in that case, leaving |
3121 | + the body in the socket, blocking the next request. Let's |
3122 | + try to workaround that. |
3123 | + """ |
3124 | + http_client.HTTPResponse.begin(self) |
3125 | + if self.status in self._body_ignored_responses: |
3126 | + if self.debuglevel >= 2: |
3127 | + print("For status: [%s], will ready body, length: %s" % ( |
3128 | + self.status, self.length)) |
3129 | + if not (self.length is None or self.will_close): |
3130 | + # In some cases, we just can't read the body not |
3131 | + # even try or we may encounter a 104, 'Connection |
3132 | + # reset by peer' error if there is indeed no body |
3133 | + # and the server closed the connection just after |
3134 | + # having issued the response headers (even if the |
3135 | + # headers indicate a Content-Type...) |
3136 | + body = self.read(self.length) |
3137 | + if self.debuglevel >= 9: |
3138 | + # This one can be huge and is generally not interesting |
3139 | + print("Consumed body: [%s]" % body) |
3140 | + self.close() |
3141 | + elif self.status == 200: |
3142 | + # Whatever the request is, it went ok, so we surely don't want to |
3143 | + # close the connection. Some cases are not correctly detected by |
3144 | + # http_client.HTTPConnection.getresponse (called by |
3145 | + # http_client.HTTPResponse.begin). The CONNECT response for the https |
3146 | + # through proxy case is one. Note: the 'will_close' below refers |
3147 | + # to the "true" socket between us and the server, whereas the |
3148 | + # 'close()' above refers to the copy of that socket created by |
3149 | + # http_client for the response itself. So, in the if above we close the |
3150 | + # socket to indicate that we are done with the response whereas |
3151 | + # below we keep the socket with the server opened. |
3152 | + self.will_close = False |
3153 | + |
3154 | + def finish(self): |
3155 | + """Finish reading the body. |
3156 | + |
3157 | + In some cases, the client may have left some bytes to read in the |
3158 | + body. That will block the next request to succeed if we use a |
3159 | + persistent connection. If we don't use a persistent connection, well, |
3160 | + nothing will block the next request since a new connection will be |
3161 | + issued anyway. |
3162 | + |
3163 | + :return: the number of bytes left on the socket (may be None) |
3164 | + """ |
3165 | + pending = None |
3166 | + if not self.isclosed(): |
3167 | + # Make sure nothing was left to be read on the socket |
3168 | + pending = 0 |
3169 | + data = True |
3170 | + while data and self.length: |
3171 | + # read() will update self.length |
3172 | + data = self.read(min(self.length, self._discarded_buf_size)) |
3173 | + pending += len(data) |
3174 | + if pending: |
3175 | + trace.mutter("%s bytes left on the HTTP socket", pending) |
3176 | + self.close() |
3177 | + return pending |
3178 | + |
3179 | + |
3180 | +# Not inheriting from 'object' because http_client.HTTPConnection doesn't. |
3181 | +class AbstractHTTPConnection: |
3182 | + """A custom HTTP(S) Connection, which can reset itself on a bad response""" |
3183 | + |
3184 | + response_class = Response |
3185 | + |
3186 | + # When we detect a server responding with the whole file to range requests, |
3187 | + # we want to warn. But not below a given thresold. |
3188 | + _range_warning_thresold = 1024 * 1024 |
3189 | + |
3190 | + def __init__(self, report_activity=None): |
3191 | + self._response = None |
3192 | + self._report_activity = report_activity |
3193 | + self._ranges_received_whole_file = None |
3194 | + |
3195 | + def _mutter_connect(self): |
3196 | + netloc = '%s:%s' % (self.host, self.port) |
3197 | + if self.proxied_host is not None: |
3198 | + netloc += '(proxy for %s)' % self.proxied_host |
3199 | + trace.mutter('* About to connect() to %s' % netloc) |
3200 | + |
3201 | + def getresponse(self): |
3202 | + """Capture the response to be able to cleanup""" |
3203 | + self._response = http_client.HTTPConnection.getresponse(self) |
3204 | + return self._response |
3205 | + |
3206 | + def cleanup_pipe(self): |
3207 | + """Read the remaining bytes of the last response if any.""" |
3208 | + if self._response is not None: |
3209 | + try: |
3210 | + pending = self._response.finish() |
3211 | + # Warn the user (once) |
3212 | + if (self._ranges_received_whole_file is None |
3213 | + and self._response.status == 200 |
3214 | + and pending |
3215 | + and pending > self._range_warning_thresold): |
3216 | + self._ranges_received_whole_file = True |
3217 | + trace.warning( |
3218 | + 'Got a 200 response when asking for multiple ranges,' |
3219 | + ' does your server at %s:%s support range requests?', |
3220 | + self.host, self.port) |
3221 | + except socket.error as e: |
3222 | + # It's conceivable that the socket is in a bad state here |
3223 | + # (including some test cases) and in this case, it doesn't need |
3224 | + # cleaning anymore, so no need to fail, we just get rid of the |
3225 | + # socket and let callers reconnect |
3226 | + if (len(e.args) == 0 |
3227 | + or e.args[0] not in (errno.ECONNRESET, errno.ECONNABORTED)): |
3228 | + raise |
3229 | + self.close() |
3230 | + self._response = None |
3231 | + # Preserve our preciousss |
3232 | + sock = self.sock |
3233 | + self.sock = None |
3234 | + # Let http_client.HTTPConnection do its housekeeping |
3235 | + self.close() |
3236 | + # Restore our preciousss |
3237 | + self.sock = sock |
3238 | + |
3239 | + def _wrap_socket_for_reporting(self, sock): |
3240 | + """Wrap the socket before anybody use it.""" |
3241 | + self.sock = _ReportingSocket(sock, self._report_activity) |
3242 | + |
3243 | + |
3244 | +class HTTPConnection(AbstractHTTPConnection, http_client.HTTPConnection): |
3245 | + |
3246 | + # XXX: Needs refactoring at the caller level. |
3247 | + def __init__(self, host, port=None, proxied_host=None, |
3248 | + report_activity=None, ca_certs=None): |
3249 | + AbstractHTTPConnection.__init__(self, report_activity=report_activity) |
3250 | + if PY3: |
3251 | + http_client.HTTPConnection.__init__(self, host, port) |
3252 | + else: |
3253 | + # Use strict=True since we don't support HTTP/0.9 |
3254 | + http_client.HTTPConnection.__init__(self, host, port, strict=True) |
3255 | + self.proxied_host = proxied_host |
3256 | + # ca_certs is ignored, it's only relevant for https |
3257 | + |
3258 | + def connect(self): |
3259 | + if 'http' in debug.debug_flags: |
3260 | + self._mutter_connect() |
3261 | + http_client.HTTPConnection.connect(self) |
3262 | + self._wrap_socket_for_reporting(self.sock) |
3263 | + |
3264 | + |
3265 | +class HTTPSConnection(AbstractHTTPConnection, http_client.HTTPSConnection): |
3266 | + |
3267 | + def __init__(self, host, port=None, key_file=None, cert_file=None, |
3268 | + proxied_host=None, |
3269 | + report_activity=None, ca_certs=None): |
3270 | + AbstractHTTPConnection.__init__(self, report_activity=report_activity) |
3271 | + if PY3: |
3272 | + http_client.HTTPSConnection.__init__( |
3273 | + self, host, port, key_file, cert_file) |
3274 | + else: |
3275 | + # Use strict=True since we don't support HTTP/0.9 |
3276 | + http_client.HTTPSConnection.__init__(self, host, port, |
3277 | + key_file, cert_file, strict=True) |
3278 | + self.proxied_host = proxied_host |
3279 | + self.ca_certs = ca_certs |
3280 | + |
3281 | + def connect(self): |
3282 | + if 'http' in debug.debug_flags: |
3283 | + self._mutter_connect() |
3284 | + http_client.HTTPConnection.connect(self) |
3285 | + self._wrap_socket_for_reporting(self.sock) |
3286 | + if self.proxied_host is None: |
3287 | + self.connect_to_origin() |
3288 | + |
3289 | + def connect_to_origin(self): |
3290 | + # FIXME JRV 2011-12-18: Use location config here? |
3291 | + config_stack = config.GlobalStack() |
3292 | + cert_reqs = config_stack.get('ssl.cert_reqs') |
3293 | + if self.proxied_host is not None: |
3294 | + host = self.proxied_host.split(":", 1)[0] |
3295 | + else: |
3296 | + host = self.host |
3297 | + if cert_reqs == ssl.CERT_NONE: |
3298 | + ui.ui_factory.show_user_warning('not_checking_ssl_cert', host=host) |
3299 | + ui.ui_factory.suppressed_warnings.add('not_checking_ssl_cert') |
3300 | + ca_certs = None |
3301 | + else: |
3302 | + if self.ca_certs is None: |
3303 | + ca_certs = config_stack.get('ssl.ca_certs') |
3304 | + else: |
3305 | + ca_certs = self.ca_certs |
3306 | + if ca_certs is None: |
3307 | + trace.warning( |
3308 | + "No valid trusted SSL CA certificates file set. See " |
3309 | + "'brz help ssl.ca_certs' for more information on setting " |
3310 | + "trusted CAs.") |
3311 | + try: |
3312 | + ssl_context = ssl.create_default_context( |
3313 | + purpose=ssl.Purpose.SERVER_AUTH, cafile=ca_certs) |
3314 | + ssl_context.check_hostname = cert_reqs != ssl.CERT_NONE |
3315 | + if self.cert_file: |
3316 | + ssl_context.load_cert_chain( |
3317 | + keyfile=self.key_file, certfile=self.cert_file) |
3318 | + ssl_context.verify_mode = cert_reqs |
3319 | + ssl_sock = ssl_context.wrap_socket( |
3320 | + self.sock, server_hostname=self.host) |
3321 | + except ssl.SSLError: |
3322 | + trace.note( |
3323 | + "\n" |
3324 | + "See `brz help ssl.ca_certs` for how to specify trusted CA" |
3325 | + "certificates.\n" |
3326 | + "Pass -Ossl.cert_reqs=none to disable certificate " |
3327 | + "verification entirely.\n") |
3328 | + raise |
3329 | + # Wrap the ssl socket before anybody use it |
3330 | + self._wrap_socket_for_reporting(ssl_sock) |
3331 | + |
3332 | + |
3333 | +class Request(urllib_request.Request): |
3334 | + """A custom Request object. |
3335 | + |
3336 | + urllib_request determines the request method heuristically (based on |
3337 | + the presence or absence of data). We set the method |
3338 | + statically. |
3339 | + |
3340 | + The Request object tracks: |
3341 | + - the connection the request will be made on. |
3342 | + - the authentication parameters needed to preventively set |
3343 | + the authentication header once a first authentication have |
3344 | + been made. |
3345 | + """ |
3346 | + |
3347 | + def __init__(self, method, url, data=None, headers={}, |
3348 | + origin_req_host=None, unverifiable=False, |
3349 | + connection=None, parent=None): |
3350 | + urllib_request.Request.__init__( |
3351 | + self, url, data, headers, |
3352 | + origin_req_host, unverifiable) |
3353 | + self.method = method |
3354 | + self.connection = connection |
3355 | + # To handle redirections |
3356 | + self.parent = parent |
3357 | + self.redirected_to = None |
3358 | + # Unless told otherwise, redirections are not followed |
3359 | + self.follow_redirections = False |
3360 | + # auth and proxy_auth are dicts containing, at least |
3361 | + # (scheme, host, port, realm, user, password, protocol, path). |
3362 | + # The dict entries are mostly handled by the AuthHandler. |
3363 | + # Some authentication schemes may add more entries. |
3364 | + self.auth = {} |
3365 | + self.proxy_auth = {} |
3366 | + self.proxied_host = None |
3367 | + |
3368 | + def get_method(self): |
3369 | + return self.method |
3370 | + |
3371 | + def set_proxy(self, proxy, type): |
3372 | + """Set the proxy and remember the proxied host.""" |
3373 | + if PY3: |
3374 | + host, port = splitport(self.host) |
3375 | + else: |
3376 | + host, port = splitport(self.get_host()) |
3377 | + if port is None: |
3378 | + # We need to set the default port ourselves way before it gets set |
3379 | + # in the HTTP[S]Connection object at build time. |
3380 | + if self.type == 'https': |
3381 | + conn_class = HTTPSConnection |
3382 | + else: |
3383 | + conn_class = HTTPConnection |
3384 | + port = conn_class.default_port |
3385 | + self.proxied_host = '%s:%s' % (host, port) |
3386 | + urllib_request.Request.set_proxy(self, proxy, type) |
3387 | + # When urllib_request makes a https request with our wrapper code and a proxy, |
3388 | + # it sets Host to the https proxy, not the host we want to talk to. |
3389 | + # I'm fairly sure this is our fault, but what is the cause is an open |
3390 | + # question. -- Robert Collins May 8 2010. |
3391 | + self.add_unredirected_header('Host', self.proxied_host) |
3392 | + |
3393 | + |
3394 | +class _ConnectRequest(Request): |
3395 | + |
3396 | + def __init__(self, request): |
3397 | + """Constructor |
3398 | + |
3399 | + :param request: the first request sent to the proxied host, already |
3400 | + processed by the opener (i.e. proxied_host is already set). |
3401 | + """ |
3402 | + # We give a fake url and redefine selector or urllib_request will be |
3403 | + # confused |
3404 | + Request.__init__(self, 'CONNECT', request.get_full_url(), |
3405 | + connection=request.connection) |
3406 | + if request.proxied_host is None: |
3407 | + raise AssertionError() |
3408 | + self.proxied_host = request.proxied_host |
3409 | + |
3410 | + @property |
3411 | + def selector(self): |
3412 | + return self.proxied_host |
3413 | + |
3414 | + def get_selector(self): |
3415 | + return self.selector |
3416 | + |
3417 | + def set_proxy(self, proxy, type): |
3418 | + """Set the proxy without remembering the proxied host. |
3419 | + |
3420 | + We already know the proxied host by definition, the CONNECT request |
3421 | + occurs only when the connection goes through a proxy. The usual |
3422 | + processing (masquerade the request so that the connection is done to |
3423 | + the proxy while the request is targeted at another host) does not apply |
3424 | + here. In fact, the connection is already established with proxy and we |
3425 | + just want to enable the SSL tunneling. |
3426 | + """ |
3427 | + urllib_request.Request.set_proxy(self, proxy, type) |
3428 | + |
3429 | + |
3430 | +class ConnectionHandler(urllib_request.BaseHandler): |
3431 | + """Provides connection-sharing by pre-processing requests. |
3432 | + |
3433 | + urllib_request provides no way to access the HTTPConnection object |
3434 | + internally used. But we need it in order to achieve |
3435 | + connection sharing. So, we add it to the request just before |
3436 | + it is processed, and then we override the do_open method for |
3437 | + http[s] requests in AbstractHTTPHandler. |
3438 | + """ |
3439 | + |
3440 | + handler_order = 1000 # after all pre-processings |
3441 | + |
3442 | + def __init__(self, report_activity=None, ca_certs=None): |
3443 | + self._report_activity = report_activity |
3444 | + self.ca_certs = ca_certs |
3445 | + |
3446 | + def create_connection(self, request, http_connection_class): |
3447 | + host = request.host |
3448 | + if not host: |
3449 | + # Just a bit of paranoia here, this should have been |
3450 | + # handled in the higher levels |
3451 | + raise urlutils.InvalidURL(request.get_full_url(), 'no host given.') |
3452 | + |
3453 | + # We create a connection (but it will not connect until the first |
3454 | + # request is made) |
3455 | + try: |
3456 | + connection = http_connection_class( |
3457 | + host, proxied_host=request.proxied_host, |
3458 | + report_activity=self._report_activity, |
3459 | + ca_certs=self.ca_certs) |
3460 | + except http_client.InvalidURL as exception: |
3461 | + # There is only one occurrence of InvalidURL in http_client |
3462 | + raise urlutils.InvalidURL(request.get_full_url(), |
3463 | + extra='nonnumeric port') |
3464 | + |
3465 | + return connection |
3466 | + |
3467 | + def capture_connection(self, request, http_connection_class): |
3468 | + """Capture or inject the request connection. |
3469 | + |
3470 | + Two cases: |
3471 | + - the request have no connection: create a new one, |
3472 | + |
3473 | + - the request have a connection: this one have been used |
3474 | + already, let's capture it, so that we can give it to |
3475 | + another transport to be reused. We don't do that |
3476 | + ourselves: the Transport object get the connection from |
3477 | + a first request and then propagate it, from request to |
3478 | + request or to cloned transports. |
3479 | + """ |
3480 | + connection = request.connection |
3481 | + if connection is None: |
3482 | + # Create a new one |
3483 | + connection = self.create_connection(request, http_connection_class) |
3484 | + request.connection = connection |
3485 | + |
3486 | + # All connections will pass here, propagate debug level |
3487 | + connection.set_debuglevel(DEBUG) |
3488 | + return request |
3489 | + |
3490 | + def http_request(self, request): |
3491 | + return self.capture_connection(request, HTTPConnection) |
3492 | + |
3493 | + def https_request(self, request): |
3494 | + return self.capture_connection(request, HTTPSConnection) |
3495 | + |
3496 | + |
3497 | +class AbstractHTTPHandler(urllib_request.AbstractHTTPHandler): |
3498 | + """A custom handler for HTTP(S) requests. |
3499 | + |
3500 | + We overrive urllib_request.AbstractHTTPHandler to get a better |
3501 | + control of the connection, the ability to implement new |
3502 | + request types and return a response able to cope with |
3503 | + persistent connections. |
3504 | + """ |
3505 | + |
3506 | + # We change our order to be before urllib_request HTTP[S]Handlers |
3507 | + # and be chosen instead of them (the first http_open called |
3508 | + # wins). |
3509 | + handler_order = 400 |
3510 | + |
3511 | + _default_headers = {'Pragma': 'no-cache', |
3512 | + 'Cache-control': 'max-age=0', |
3513 | + 'Connection': 'Keep-Alive', |
3514 | + 'User-agent': default_user_agent(), |
3515 | + 'Accept': '*/*', |
3516 | + } |
3517 | + |
3518 | + def __init__(self): |
3519 | + urllib_request.AbstractHTTPHandler.__init__(self, debuglevel=DEBUG) |
3520 | + |
3521 | + def http_request(self, request): |
3522 | + """Common headers setting""" |
3523 | + |
3524 | + for name, value in self._default_headers.items(): |
3525 | + if name not in request.headers: |
3526 | + request.headers[name] = value |
3527 | + # FIXME: We may have to add the Content-Length header if |
3528 | + # we have data to send. |
3529 | + return request |
3530 | + |
3531 | + def retry_or_raise(self, http_class, request, first_try): |
3532 | + """Retry the request (once) or raise the exception. |
3533 | + |
3534 | + urllib_request raises exception of application level kind, we |
3535 | + just have to translate them. |
3536 | + |
3537 | + http_client can raise exceptions of transport level (badly |
3538 | + formatted dialog, loss of connexion or socket level |
3539 | + problems). In that case we should issue the request again |
3540 | + (http_client will close and reopen a new connection if |
3541 | + needed). |
3542 | + """ |
3543 | + # When an exception occurs, we give back the original |
3544 | + # Traceback or the bugs are hard to diagnose. |
3545 | + exc_type, exc_val, exc_tb = sys.exc_info() |
3546 | + if exc_type == socket.gaierror: |
3547 | + # No need to retry, that will not help |
3548 | + if PY3: |
3549 | + origin_req_host = request.origin_req_host |
3550 | + else: |
3551 | + origin_req_host = request.get_origin_req_host() |
3552 | + raise errors.ConnectionError("Couldn't resolve host '%s'" |
3553 | + % origin_req_host, |
3554 | + orig_error=exc_val) |
3555 | + elif isinstance(exc_val, http_client.ImproperConnectionState): |
3556 | + # The http_client pipeline is in incorrect state, it's a bug in our |
3557 | + # implementation. |
3558 | + reraise(exc_type, exc_val, exc_tb) |
3559 | + else: |
3560 | + if first_try: |
3561 | + if self._debuglevel >= 2: |
3562 | + print('Received exception: [%r]' % exc_val) |
3563 | + print(' On connection: [%r]' % request.connection) |
3564 | + method = request.get_method() |
3565 | + url = request.get_full_url() |
3566 | + print(' Will retry, %s %r' % (method, url)) |
3567 | + request.connection.close() |
3568 | + response = self.do_open(http_class, request, False) |
3569 | + else: |
3570 | + if self._debuglevel >= 2: |
3571 | + print('Received second exception: [%r]' % exc_val) |
3572 | + print(' On connection: [%r]' % request.connection) |
3573 | + if exc_type in (http_client.BadStatusLine, http_client.UnknownProtocol): |
3574 | + # http_client.BadStatusLine and |
3575 | + # http_client.UnknownProtocol indicates that a |
3576 | + # bogus server was encountered or a bad |
3577 | + # connection (i.e. transient errors) is |
3578 | + # experimented, we have already retried once |
3579 | + # for that request so we raise the exception. |
3580 | + my_exception = errors.InvalidHttpResponse( |
3581 | + request.get_full_url(), |
3582 | + 'Bad status line received', |
3583 | + orig_error=exc_val) |
3584 | + elif (isinstance(exc_val, socket.error) and len(exc_val.args) |
3585 | + and exc_val.args[0] in (errno.ECONNRESET, 10053, 10054)): |
3586 | + # 10053 == WSAECONNABORTED |
3587 | + # 10054 == WSAECONNRESET |
3588 | + raise errors.ConnectionReset( |
3589 | + "Connection lost while sending request.") |
3590 | + else: |
3591 | + # All other exception are considered connection related. |
3592 | + |
3593 | + # socket errors generally occurs for reasons |
3594 | + # far outside our scope, so closing the |
3595 | + # connection and retrying is the best we can |
3596 | + # do. |
3597 | + if PY3: |
3598 | + selector = request.selector |
3599 | + else: |
3600 | + selector = request.get_selector() |
3601 | + my_exception = errors.ConnectionError( |
3602 | + msg='while sending %s %s:' % (request.get_method(), |
3603 | + selector), |
3604 | + orig_error=exc_val) |
3605 | + |
3606 | + if self._debuglevel >= 2: |
3607 | + print('On connection: [%r]' % request.connection) |
3608 | + method = request.get_method() |
3609 | + url = request.get_full_url() |
3610 | + print(' Failed again, %s %r' % (method, url)) |
3611 | + print(' Will raise: [%r]' % my_exception) |
3612 | + reraise(type(my_exception), my_exception, exc_tb) |
3613 | + return response |
3614 | + |
3615 | + def do_open(self, http_class, request, first_try=True): |
3616 | + """See urllib_request.AbstractHTTPHandler.do_open for the general idea. |
3617 | + |
3618 | + The request will be retried once if it fails. |
3619 | + """ |
3620 | + connection = request.connection |
3621 | + if connection is None: |
3622 | + raise AssertionError( |
3623 | + 'Cannot process a request without a connection') |
3624 | + |
3625 | + # Get all the headers |
3626 | + headers = {} |
3627 | + headers.update(request.header_items()) |
3628 | + headers.update(request.unredirected_hdrs) |
3629 | + # Some servers or proxies will choke on headers not properly |
3630 | + # cased. http_client/urllib/urllib_request all use capitalize to get canonical |
3631 | + # header names, but only python2.5 urllib_request use title() to fix them just |
3632 | + # before sending the request. And not all versions of python 2.5 do |
3633 | + # that. Since we replace urllib_request.AbstractHTTPHandler.do_open we do it |
3634 | + # ourself below. |
3635 | + headers = {name.title(): val for name, val in headers.items()} |
3636 | + |
3637 | + try: |
3638 | + method = request.get_method() |
3639 | + if PY3: |
3640 | + url = request.selector |
3641 | + else: |
3642 | + url = request.get_selector() |
3643 | + if sys.version_info[:2] >= (3, 6): |
3644 | + connection._send_request(method, url, |
3645 | + # FIXME: implements 100-continue |
3646 | + # None, # We don't send the body yet |
3647 | + request.data, |
3648 | + headers, encode_chunked=False) |
3649 | + else: |
3650 | + connection._send_request(method, url, |
3651 | + # FIXME: implements 100-continue |
3652 | + # None, # We don't send the body yet |
3653 | + request.data, |
3654 | + headers) |
3655 | + if 'http' in debug.debug_flags: |
3656 | + trace.mutter('> %s %s' % (method, url)) |
3657 | + hdrs = [] |
3658 | + for k, v in headers.items(): |
3659 | + # People are often told to paste -Dhttp output to help |
3660 | + # debug. Don't compromise credentials. |
3661 | + if k in ('Authorization', 'Proxy-Authorization'): |
3662 | + v = '<masked>' |
3663 | + hdrs.append('%s: %s' % (k, v)) |
3664 | + trace.mutter('> ' + '\n> '.join(hdrs) + '\n') |
3665 | + if self._debuglevel >= 1: |
3666 | + print('Request sent: [%r] from (%s)' |
3667 | + % (request, request.connection.sock.getsockname())) |
3668 | + response = connection.getresponse() |
3669 | + convert_to_addinfourl = True |
3670 | + except (ssl.SSLError, ssl.CertificateError): |
3671 | + # Something is wrong with either the certificate or the hostname, |
3672 | + # re-trying won't help |
3673 | + raise |
3674 | + except (socket.gaierror, http_client.BadStatusLine, http_client.UnknownProtocol, |
3675 | + socket.error, http_client.HTTPException): |
3676 | + response = self.retry_or_raise(http_class, request, first_try) |
3677 | + convert_to_addinfourl = False |
3678 | + |
3679 | + if PY3: |
3680 | + response.msg = response.reason |
3681 | + return response |
3682 | + |
3683 | +# FIXME: HTTPConnection does not fully support 100-continue (the |
3684 | +# server responses are just ignored) |
3685 | + |
3686 | +# if code == 100: |
3687 | +# mutter('Will send the body') |
3688 | +# # We can send the body now |
3689 | +# body = request.data |
3690 | +# if body is None: |
3691 | +# raise URLError("No data given") |
3692 | +# connection.send(body) |
3693 | +# response = connection.getresponse() |
3694 | + |
3695 | + if self._debuglevel >= 2: |
3696 | + print('Receives response: %r' % response) |
3697 | + print(' For: %r(%r)' % (request.get_method(), |
3698 | + request.get_full_url())) |
3699 | + |
3700 | + if convert_to_addinfourl: |
3701 | + # Shamelessly copied from urllib_request |
3702 | + req = request |
3703 | + r = response |
3704 | + r.recv = r.read |
3705 | + fp = socket._fileobject(r, bufsize=65536) |
3706 | + resp = addinfourl(fp, r.msg, req.get_full_url()) |
3707 | + resp.code = r.status |
3708 | + resp.msg = r.reason |
3709 | + resp.version = r.version |
3710 | + if self._debuglevel >= 2: |
3711 | + print('Create addinfourl: %r' % resp) |
3712 | + print(' For: %r(%r)' % (request.get_method(), |
3713 | + request.get_full_url())) |
3714 | + if 'http' in debug.debug_flags: |
3715 | + version = 'HTTP/%d.%d' |
3716 | + try: |
3717 | + version = version % (resp.version / 10, |
3718 | + resp.version % 10) |
3719 | + except: |
3720 | + version = 'HTTP/%r' % resp.version |
3721 | + trace.mutter('< %s %s %s' % (version, resp.code, |
3722 | + resp.msg)) |
3723 | + # Use the raw header lines instead of treating resp.info() as a |
3724 | + # dict since we may miss duplicated headers otherwise. |
3725 | + hdrs = [h.rstrip('\r\n') for h in resp.info().headers] |
3726 | + trace.mutter('< ' + '\n< '.join(hdrs) + '\n') |
3727 | + else: |
3728 | + resp = response |
3729 | + return resp |
3730 | + |
3731 | + |
3732 | +class HTTPHandler(AbstractHTTPHandler): |
3733 | + """A custom handler that just thunks into HTTPConnection""" |
3734 | + |
3735 | + def http_open(self, request): |
3736 | + return self.do_open(HTTPConnection, request) |
3737 | + |
3738 | + |
3739 | +class HTTPSHandler(AbstractHTTPHandler): |
3740 | + """A custom handler that just thunks into HTTPSConnection""" |
3741 | + |
3742 | + https_request = AbstractHTTPHandler.http_request |
3743 | + |
3744 | + def https_open(self, request): |
3745 | + connection = request.connection |
3746 | + if connection.sock is None and \ |
3747 | + connection.proxied_host is not None and \ |
3748 | + request.get_method() != 'CONNECT': # Don't loop |
3749 | + # FIXME: We need a gazillion connection tests here, but we still |
3750 | + # miss a https server :-( : |
3751 | + # - with and without proxy |
3752 | + # - with and without certificate |
3753 | + # - with self-signed certificate |
3754 | + # - with and without authentication |
3755 | + # - with good and bad credentials (especially the proxy auth around |
3756 | + # CONNECT) |
3757 | + # - with basic and digest schemes |
3758 | + # - reconnection on errors |
3759 | + # - connection persistence behaviour (including reconnection) |
3760 | + |
3761 | + # We are about to connect for the first time via a proxy, we must |
3762 | + # issue a CONNECT request first to establish the encrypted link |
3763 | + connect = _ConnectRequest(request) |
3764 | + response = self.parent.open(connect) |
3765 | + if response.code != 200: |
3766 | + raise errors.ConnectionError("Can't connect to %s via proxy %s" % ( |
3767 | + connect.proxied_host, self.host)) |
3768 | + # Housekeeping |
3769 | + connection.cleanup_pipe() |
3770 | + # Establish the connection encryption |
3771 | + connection.connect_to_origin() |
3772 | + # Propagate the connection to the original request |
3773 | + request.connection = connection |
3774 | + return self.do_open(HTTPSConnection, request) |
3775 | + |
3776 | + |
3777 | +class HTTPRedirectHandler(urllib_request.HTTPRedirectHandler): |
3778 | + """Handles redirect requests. |
3779 | + |
3780 | + We have to implement our own scheme because we use a specific |
3781 | + Request object and because we want to implement a specific |
3782 | + policy. |
3783 | + """ |
3784 | + _debuglevel = DEBUG |
3785 | + # RFC2616 says that only read requests should be redirected |
3786 | + # without interacting with the user. But Breezy uses some |
3787 | + # shortcuts to optimize against roundtrips which can leads to |
3788 | + # write requests being issued before read requests of |
3789 | + # containing dirs can be redirected. So we redirect write |
3790 | + # requests in the same way which seems to respect the spirit |
3791 | + # of the RFC if not its letter. |
3792 | + |
3793 | + def redirect_request(self, req, fp, code, msg, headers, newurl): |
3794 | + """See urllib_request.HTTPRedirectHandler.redirect_request""" |
3795 | + # We would have preferred to update the request instead |
3796 | + # of creating a new one, but the urllib_request.Request object |
3797 | + # has a too complicated creation process to provide a |
3798 | + # simple enough equivalent update process. Instead, when |
3799 | + # redirecting, we only update the following request in |
3800 | + # the redirect chain with a reference to the parent |
3801 | + # request . |
3802 | + |
3803 | + # Some codes make no sense in our context and are treated |
3804 | + # as errors: |
3805 | + |
3806 | + # 300: Multiple choices for different representations of |
3807 | + # the URI. Using that mechanisn with Breezy will violate the |
3808 | + # protocol neutrality of Transport. |
3809 | + |
3810 | + # 304: Not modified (SHOULD only occurs with conditional |
3811 | + # GETs which are not used by our implementation) |
3812 | + |
3813 | + # 305: Use proxy. I can't imagine this one occurring in |
3814 | + # our context-- vila/20060909 |
3815 | + |
3816 | + # 306: Unused (if the RFC says so...) |
3817 | + |
3818 | + # If the code is 302 and the request is HEAD, some may |
3819 | + # think that it is a sufficent hint that the file exists |
3820 | + # and that we MAY avoid following the redirections. But |
3821 | + # if we want to be sure, we MUST follow them. |
3822 | + |
3823 | + if PY3: |
3824 | + origin_req_host = req.origin_req_host |
3825 | + else: |
3826 | + origin_req_host = req.get_origin_req_host() |
3827 | + |
3828 | + if code in (301, 302, 303, 307, 308): |
3829 | + return Request(req.get_method(), newurl, |
3830 | + headers=req.headers, |
3831 | + origin_req_host=origin_req_host, |
3832 | + unverifiable=True, |
3833 | + # TODO: It will be nice to be able to |
3834 | + # detect virtual hosts sharing the same |
3835 | + # IP address, that will allow us to |
3836 | + # share the same connection... |
3837 | + connection=None, |
3838 | + parent=req, |
3839 | + ) |
3840 | + else: |
3841 | + raise urllib_request.HTTPError( |
3842 | + req.get_full_url(), code, msg, headers, fp) |
3843 | + |
3844 | + def http_error_302(self, req, fp, code, msg, headers): |
3845 | + """Requests the redirected to URI. |
3846 | + |
3847 | + Copied from urllib_request to be able to clean the pipe of the associated |
3848 | + connection, *before* issuing the redirected request but *after* having |
3849 | + eventually raised an error. |
3850 | + """ |
3851 | + # Some servers (incorrectly) return multiple Location headers |
3852 | + # (so probably same goes for URI). Use first header. |
3853 | + |
3854 | + # TODO: Once we get rid of addinfourl objects, the |
3855 | + # following will need to be updated to use correct case |
3856 | + # for headers. |
3857 | + if 'location' in headers: |
3858 | + newurl = headers.get('location') |
3859 | + elif 'uri' in headers: |
3860 | + newurl = headers.get('uri') |
3861 | + else: |
3862 | + return |
3863 | + |
3864 | + newurl = urljoin(req.get_full_url(), newurl) |
3865 | + |
3866 | + if self._debuglevel >= 1: |
3867 | + print('Redirected to: %s (followed: %r)' % (newurl, |
3868 | + req.follow_redirections)) |
3869 | + if req.follow_redirections is False: |
3870 | + req.redirected_to = newurl |
3871 | + return fp |
3872 | + |
3873 | + # This call succeeds or raise an error. urllib_request returns |
3874 | + # if redirect_request returns None, but our |
3875 | + # redirect_request never returns None. |
3876 | + redirected_req = self.redirect_request(req, fp, code, msg, headers, |
3877 | + newurl) |
3878 | + |
3879 | + # loop detection |
3880 | + # .redirect_dict has a key url if url was previously visited. |
3881 | + if hasattr(req, 'redirect_dict'): |
3882 | + visited = redirected_req.redirect_dict = req.redirect_dict |
3883 | + if (visited.get(newurl, 0) >= self.max_repeats or |
3884 | + len(visited) >= self.max_redirections): |
3885 | + raise urllib_request.HTTPError(req.get_full_url(), code, |
3886 | + self.inf_msg + msg, headers, fp) |
3887 | + else: |
3888 | + visited = redirected_req.redirect_dict = req.redirect_dict = {} |
3889 | + visited[newurl] = visited.get(newurl, 0) + 1 |
3890 | + |
3891 | + # We can close the fp now that we are sure that we won't |
3892 | + # use it with HTTPError. |
3893 | + fp.close() |
3894 | + # We have all we need already in the response |
3895 | + req.connection.cleanup_pipe() |
3896 | + |
3897 | + return self.parent.open(redirected_req) |
3898 | + |
3899 | + http_error_301 = http_error_303 = http_error_307 = http_error_308 = http_error_302 |
3900 | + |
3901 | + |
3902 | +class ProxyHandler(urllib_request.ProxyHandler): |
3903 | + """Handles proxy setting. |
3904 | + |
3905 | + Copied and modified from urllib_request to be able to modify the request during |
3906 | + the request pre-processing instead of modifying it at _open time. As we |
3907 | + capture (or create) the connection object during request processing, _open |
3908 | + time was too late. |
3909 | + |
3910 | + The main task is to modify the request so that the connection is done to |
3911 | + the proxy while the request still refers to the destination host. |
3912 | + |
3913 | + Note: the proxy handling *may* modify the protocol used; the request may be |
3914 | + against an https server proxied through an http proxy. So, https_request |
3915 | + will be called, but later it's really http_open that will be called. This |
3916 | + explains why we don't have to call self.parent.open as the urllib_request did. |
3917 | + """ |
3918 | + |
3919 | + # Proxies must be in front |
3920 | + handler_order = 100 |
3921 | + _debuglevel = DEBUG |
3922 | + |
3923 | + def __init__(self, proxies=None): |
3924 | + urllib_request.ProxyHandler.__init__(self, proxies) |
3925 | + # First, let's get rid of urllib_request implementation |
3926 | + for type, proxy in self.proxies.items(): |
3927 | + if self._debuglevel >= 3: |
3928 | + print('Will unbind %s_open for %r' % (type, proxy)) |
3929 | + delattr(self, '%s_open' % type) |
3930 | + |
3931 | + def bind_scheme_request(proxy, scheme): |
3932 | + if proxy is None: |
3933 | + return |
3934 | + scheme_request = scheme + '_request' |
3935 | + if self._debuglevel >= 3: |
3936 | + print('Will bind %s for %r' % (scheme_request, proxy)) |
3937 | + setattr(self, scheme_request, |
3938 | + lambda request: self.set_proxy(request, scheme)) |
3939 | + # We are interested only by the http[s] proxies |
3940 | + http_proxy = self.get_proxy_env_var('http') |
3941 | + bind_scheme_request(http_proxy, 'http') |
3942 | + https_proxy = self.get_proxy_env_var('https') |
3943 | + bind_scheme_request(https_proxy, 'https') |
3944 | + |
3945 | + def get_proxy_env_var(self, name, default_to='all'): |
3946 | + """Get a proxy env var. |
3947 | + |
3948 | + Note that we indirectly rely on |
3949 | + urllib.getproxies_environment taking into account the |
3950 | + uppercased values for proxy variables. |
3951 | + """ |
3952 | + try: |
3953 | + return self.proxies[name.lower()] |
3954 | + except KeyError: |
3955 | + if default_to is not None: |
3956 | + # Try to get the alternate environment variable |
3957 | + try: |
3958 | + return self.proxies[default_to] |
3959 | + except KeyError: |
3960 | + pass |
3961 | + return None |
3962 | + |
3963 | + def proxy_bypass(self, host): |
3964 | + """Check if host should be proxied or not. |
3965 | + |
3966 | + :returns: True to skip the proxy, False otherwise. |
3967 | + """ |
3968 | + no_proxy = self.get_proxy_env_var('no', default_to=None) |
3969 | + bypass = self.evaluate_proxy_bypass(host, no_proxy) |
3970 | + if bypass is None: |
3971 | + # Nevertheless, there are platform-specific ways to |
3972 | + # ignore proxies... |
3973 | + return urllib_request.proxy_bypass(host) |
3974 | + else: |
3975 | + return bypass |
3976 | + |
3977 | + def evaluate_proxy_bypass(self, host, no_proxy): |
3978 | + """Check the host against a comma-separated no_proxy list as a string. |
3979 | + |
3980 | + :param host: ``host:port`` being requested |
3981 | + |
3982 | + :param no_proxy: comma-separated list of hosts to access directly. |
3983 | + |
3984 | + :returns: True to skip the proxy, False not to, or None to |
3985 | + leave it to urllib. |
3986 | + """ |
3987 | + if no_proxy is None: |
3988 | + # All hosts are proxied |
3989 | + return False |
3990 | + hhost, hport = splitport(host) |
3991 | + # Does host match any of the domains mentioned in |
3992 | + # no_proxy ? The rules about what is authorized in no_proxy |
3993 | + # are fuzzy (to say the least). We try to allow most |
3994 | + # commonly seen values. |
3995 | + for domain in no_proxy.split(','): |
3996 | + domain = domain.strip() |
3997 | + if domain == '': |
3998 | + continue |
3999 | + dhost, dport = splitport(domain) |
4000 | + if hport == dport or dport is None: |
4001 | + # Protect glob chars |
4002 | + dhost = dhost.replace(".", r"\.") |
4003 | + dhost = dhost.replace("*", r".*") |
4004 | + dhost = dhost.replace("?", r".") |
4005 | + if re.match(dhost, hhost, re.IGNORECASE): |
4006 | + return True |
4007 | + # Nothing explicitly avoid the host |
4008 | + return None |
4009 | + |
4010 | + def set_proxy(self, request, type): |
4011 | + if PY3: |
4012 | + host = request.host |
4013 | + else: |
4014 | + host = request.get_host() |
4015 | + if self.proxy_bypass(host): |
4016 | + return request |
4017 | + |
4018 | + proxy = self.get_proxy_env_var(type) |
4019 | + if self._debuglevel >= 3: |
4020 | + print('set_proxy %s_request for %r' % (type, proxy)) |
4021 | + # FIXME: python 2.5 urlparse provides a better _parse_proxy which can |
4022 | + # grok user:password@host:port as well as |
4023 | + # http://user:password@host:port |
4024 | + |
4025 | + parsed_url = transport.ConnectedTransport._split_url(proxy) |
4026 | + if not parsed_url.host: |
4027 | + raise urlutils.InvalidURL(proxy, 'No host component') |
4028 | + |
4029 | + if request.proxy_auth == {}: |
4030 | + # No proxy auth parameter are available, we are handling the first |
4031 | + # proxied request, intialize. scheme (the authentication scheme) |
4032 | + # and realm will be set by the AuthHandler |
4033 | + request.proxy_auth = { |
4034 | + 'host': parsed_url.host, |
4035 | + 'port': parsed_url.port, |
4036 | + 'user': parsed_url.user, |
4037 | + 'password': parsed_url.password, |
4038 | + 'protocol': parsed_url.scheme, |
4039 | + # We ignore path since we connect to a proxy |
4040 | + 'path': None} |
4041 | + if parsed_url.port is None: |
4042 | + phost = parsed_url.host |
4043 | + else: |
4044 | + phost = parsed_url.host + ':%d' % parsed_url.port |
4045 | + request.set_proxy(phost, type) |
4046 | + if self._debuglevel >= 3: |
4047 | + print('set_proxy: proxy set to %s://%s' % (type, phost)) |
4048 | + return request |
4049 | + |
4050 | + |
4051 | +class AbstractAuthHandler(urllib_request.BaseHandler): |
4052 | + """A custom abstract authentication handler for all http authentications. |
4053 | + |
4054 | + Provides the meat to handle authentication errors and |
4055 | + preventively set authentication headers after the first |
4056 | + successful authentication. |
4057 | + |
4058 | + This can be used for http and proxy, as well as for basic, negotiate and |
4059 | + digest authentications. |
4060 | + |
4061 | + This provides an unified interface for all authentication handlers |
4062 | + (urllib_request provides far too many with different policies). |
4063 | + |
4064 | + The interaction between this handler and the urllib_request |
4065 | + framework is not obvious, it works as follow: |
4066 | + |
4067 | + opener.open(request) is called: |
4068 | + |
4069 | + - that may trigger http_request which will add an authentication header |
4070 | + (self.build_header) if enough info is available. |
4071 | + |
4072 | + - the request is sent to the server, |
4073 | + |
4074 | + - if an authentication error is received self.auth_required is called, |
4075 | + we acquire the authentication info in the error headers and call |
4076 | + self.auth_match to check that we are able to try the |
4077 | + authentication and complete the authentication parameters, |
4078 | + |
4079 | + - we call parent.open(request), that may trigger http_request |
4080 | + and will add a header (self.build_header), but here we have |
4081 | + all the required info (keep in mind that the request and |
4082 | + authentication used in the recursive calls are really (and must be) |
4083 | + the *same* objects). |
4084 | + |
4085 | + - if the call returns a response, the authentication have been |
4086 | + successful and the request authentication parameters have been updated. |
4087 | + """ |
4088 | + |
4089 | + scheme = None |
4090 | + """The scheme as it appears in the server header (lower cased)""" |
4091 | + |
4092 | + _max_retry = 3 |
4093 | + """We don't want to retry authenticating endlessly""" |
4094 | + |
4095 | + requires_username = True |
4096 | + """Whether the auth mechanism requires a username.""" |
4097 | + |
4098 | + # The following attributes should be defined by daughter |
4099 | + # classes: |
4100 | + # - auth_required_header: the header received from the server |
4101 | + # - auth_header: the header sent in the request |
4102 | + |
4103 | + def __init__(self): |
4104 | + # We want to know when we enter into an try/fail cycle of |
4105 | + # authentications so we initialize to None to indicate that we aren't |
4106 | + # in such a cycle by default. |
4107 | + self._retry_count = None |
4108 | + |
4109 | + def _parse_auth_header(self, server_header): |
4110 | + """Parse the authentication header. |
4111 | + |
4112 | + :param server_header: The value of the header sent by the server |
4113 | + describing the authenticaion request. |
4114 | + |
4115 | + :return: A tuple (scheme, remainder) scheme being the first word in the |
4116 | + given header (lower cased), remainder may be None. |
4117 | + """ |
4118 | + try: |
4119 | + scheme, remainder = server_header.split(None, 1) |
4120 | + except ValueError: |
4121 | + scheme = server_header |
4122 | + remainder = None |
4123 | + return (scheme.lower(), remainder) |
4124 | + |
4125 | + def update_auth(self, auth, key, value): |
4126 | + """Update a value in auth marking the auth as modified if needed""" |
4127 | + old_value = auth.get(key, None) |
4128 | + if old_value != value: |
4129 | + auth[key] = value |
4130 | + auth['modified'] = True |
4131 | + |
4132 | + def auth_required(self, request, headers): |
4133 | + """Retry the request if the auth scheme is ours. |
4134 | + |
4135 | + :param request: The request needing authentication. |
4136 | + :param headers: The headers for the authentication error response. |
4137 | + :return: None or the response for the authenticated request. |
4138 | + """ |
4139 | + # Don't try to authenticate endlessly |
4140 | + if self._retry_count is None: |
4141 | + # The retry being recusrsive calls, None identify the first retry |
4142 | + self._retry_count = 1 |
4143 | + else: |
4144 | + self._retry_count += 1 |
4145 | + if self._retry_count > self._max_retry: |
4146 | + # Let's be ready for next round |
4147 | + self._retry_count = None |
4148 | + return None |
4149 | + if PY3: |
4150 | + server_headers = headers.get_all(self.auth_required_header) |
4151 | + else: |
4152 | + server_headers = headers.getheaders(self.auth_required_header) |
4153 | + if not server_headers: |
4154 | + # The http error MUST have the associated |
4155 | + # header. This must never happen in production code. |
4156 | + trace.mutter('%s not found', self.auth_required_header) |
4157 | + return None |
4158 | + |
4159 | + auth = self.get_auth(request) |
4160 | + auth['modified'] = False |
4161 | + # Put some common info in auth if the caller didn't |
4162 | + if auth.get('path', None) is None: |
4163 | + parsed_url = urlutils.URL.from_string(request.get_full_url()) |
4164 | + self.update_auth(auth, 'protocol', parsed_url.scheme) |
4165 | + self.update_auth(auth, 'host', parsed_url.host) |
4166 | + self.update_auth(auth, 'port', parsed_url.port) |
4167 | + self.update_auth(auth, 'path', parsed_url.path) |
4168 | + # FIXME: the auth handler should be selected at a single place instead |
4169 | + # of letting all handlers try to match all headers, but the current |
4170 | + # design doesn't allow a simple implementation. |
4171 | + for server_header in server_headers: |
4172 | + # Several schemes can be proposed by the server, try to match each |
4173 | + # one in turn |
4174 | + matching_handler = self.auth_match(server_header, auth) |
4175 | + if matching_handler: |
4176 | + # auth_match may have modified auth (by adding the |
4177 | + # password or changing the realm, for example) |
4178 | + if (request.get_header(self.auth_header, None) is not None |
4179 | + and not auth['modified']): |
4180 | + # We already tried that, give up |
4181 | + return None |
4182 | + |
4183 | + # Only the most secure scheme proposed by the server should be |
4184 | + # used, since the handlers use 'handler_order' to describe that |
4185 | + # property, the first handler tried takes precedence, the |
4186 | + # others should not attempt to authenticate if the best one |
4187 | + # failed. |
4188 | + best_scheme = auth.get('best_scheme', None) |
4189 | + if best_scheme is None: |
4190 | + # At that point, if current handler should doesn't succeed |
4191 | + # the credentials are wrong (or incomplete), but we know |
4192 | + # that the associated scheme should be used. |
4193 | + best_scheme = auth['best_scheme'] = self.scheme |
4194 | + if best_scheme != self.scheme: |
4195 | + continue |
4196 | + |
4197 | + if self.requires_username and auth.get('user', None) is None: |
4198 | + # Without a known user, we can't authenticate |
4199 | + return None |
4200 | + |
4201 | + # Housekeeping |
4202 | + request.connection.cleanup_pipe() |
4203 | + # Retry the request with an authentication header added |
4204 | + response = self.parent.open(request) |
4205 | + if response: |
4206 | + self.auth_successful(request, response) |
4207 | + return response |
4208 | + # We are not qualified to handle the authentication. |
4209 | + # Note: the authentication error handling will try all |
4210 | + # available handlers. If one of them authenticates |
4211 | + # successfully, a response will be returned. If none of |
4212 | + # them succeeds, None will be returned and the error |
4213 | + # handler will raise the 401 'Unauthorized' or the 407 |
4214 | + # 'Proxy Authentication Required' error. |
4215 | + return None |
4216 | + |
4217 | + def add_auth_header(self, request, header): |
4218 | + """Add the authentication header to the request""" |
4219 | + request.add_unredirected_header(self.auth_header, header) |
4220 | + |
4221 | + def auth_match(self, header, auth): |
4222 | + """Check that we are able to handle that authentication scheme. |
4223 | + |
4224 | + The request authentication parameters may need to be |
4225 | + updated with info from the server. Some of these |
4226 | + parameters, when combined, are considered to be the |
4227 | + authentication key, if one of them change the |
4228 | + authentication result may change. 'user' and 'password' |
4229 | + are exampls, but some auth schemes may have others |
4230 | + (digest's nonce is an example, digest's nonce_count is a |
4231 | + *counter-example*). Such parameters must be updated by |
4232 | + using the update_auth() method. |
4233 | + |
4234 | + :param header: The authentication header sent by the server. |
4235 | + :param auth: The auth parameters already known. They may be |
4236 | + updated. |
4237 | + :returns: True if we can try to handle the authentication. |
4238 | + """ |
4239 | + raise NotImplementedError(self.auth_match) |
4240 | + |
4241 | + def build_auth_header(self, auth, request): |
4242 | + """Build the value of the header used to authenticate. |
4243 | + |
4244 | + :param auth: The auth parameters needed to build the header. |
4245 | + :param request: The request needing authentication. |
4246 | + |
4247 | + :return: None or header. |
4248 | + """ |
4249 | + raise NotImplementedError(self.build_auth_header) |
4250 | + |
4251 | + def auth_successful(self, request, response): |
4252 | + """The authentification was successful for the request. |
4253 | + |
4254 | + Additional infos may be available in the response. |
4255 | + |
4256 | + :param request: The succesfully authenticated request. |
4257 | + :param response: The server response (may contain auth info). |
4258 | + """ |
4259 | + # It may happen that we need to reconnect later, let's be ready |
4260 | + self._retry_count = None |
4261 | + |
4262 | + def get_user_password(self, auth): |
4263 | + """Ask user for a password if none is already available. |
4264 | + |
4265 | + :param auth: authentication info gathered so far (from the initial url |
4266 | + and then during dialog with the server). |
4267 | + """ |
4268 | + auth_conf = config.AuthenticationConfig() |
4269 | + user = auth.get('user', None) |
4270 | + password = auth.get('password', None) |
4271 | + realm = auth['realm'] |
4272 | + port = auth.get('port', None) |
4273 | + |
4274 | + if user is None: |
4275 | + user = auth_conf.get_user(auth['protocol'], auth['host'], |
4276 | + port=port, path=auth['path'], |
4277 | + realm=realm, ask=True, |
4278 | + prompt=self.build_username_prompt(auth)) |
4279 | + if user is not None and password is None: |
4280 | + password = auth_conf.get_password( |
4281 | + auth['protocol'], auth['host'], user, |
4282 | + port=port, |
4283 | + path=auth['path'], realm=realm, |
4284 | + prompt=self.build_password_prompt(auth)) |
4285 | + |
4286 | + return user, password |
4287 | + |
4288 | + def _build_password_prompt(self, auth): |
4289 | + """Build a prompt taking the protocol used into account. |
4290 | + |
4291 | + The AuthHandler is used by http and https, we want that information in |
4292 | + the prompt, so we build the prompt from the authentication dict which |
4293 | + contains all the needed parts. |
4294 | + |
4295 | + Also, http and proxy AuthHandlers present different prompts to the |
4296 | + user. The daughter classes should implements a public |
4297 | + build_password_prompt using this method. |
4298 | + """ |
4299 | + prompt = u'%s' % auth['protocol'].upper() + u' %(user)s@%(host)s' |
4300 | + realm = auth['realm'] |
4301 | + if realm is not None: |
4302 | + prompt += u", Realm: '%s'" % realm |
4303 | + prompt += u' password' |
4304 | + return prompt |
4305 | + |
4306 | + def _build_username_prompt(self, auth): |
4307 | + """Build a prompt taking the protocol used into account. |
4308 | + |
4309 | + The AuthHandler is used by http and https, we want that information in |
4310 | + the prompt, so we build the prompt from the authentication dict which |
4311 | + contains all the needed parts. |
4312 | + |
4313 | + Also, http and proxy AuthHandlers present different prompts to the |
4314 | + user. The daughter classes should implements a public |
4315 | + build_username_prompt using this method. |
4316 | + """ |
4317 | + prompt = u'%s' % auth['protocol'].upper() + u' %(host)s' |
4318 | + realm = auth['realm'] |
4319 | + if realm is not None: |
4320 | + prompt += u", Realm: '%s'" % realm |
4321 | + prompt += u' username' |
4322 | + return prompt |
4323 | + |
4324 | + def http_request(self, request): |
4325 | + """Insert an authentication header if information is available""" |
4326 | + auth = self.get_auth(request) |
4327 | + if self.auth_params_reusable(auth): |
4328 | + self.add_auth_header( |
4329 | + request, self.build_auth_header(auth, request)) |
4330 | + return request |
4331 | + |
4332 | + https_request = http_request # FIXME: Need test |
4333 | + |
4334 | + |
4335 | +class NegotiateAuthHandler(AbstractAuthHandler): |
4336 | + """A authentication handler that handles WWW-Authenticate: Negotiate. |
4337 | + |
4338 | + At the moment this handler supports just Kerberos. In the future, |
4339 | + NTLM support may also be added. |
4340 | + """ |
4341 | + |
4342 | + scheme = 'negotiate' |
4343 | + handler_order = 480 |
4344 | + requires_username = False |
4345 | + |
4346 | + def auth_match(self, header, auth): |
4347 | + scheme, raw_auth = self._parse_auth_header(header) |
4348 | + if scheme != self.scheme: |
4349 | + return False |
4350 | + self.update_auth(auth, 'scheme', scheme) |
4351 | + resp = self._auth_match_kerberos(auth) |
4352 | + if resp is None: |
4353 | + return False |
4354 | + # Optionally should try to authenticate using NTLM here |
4355 | + self.update_auth(auth, 'negotiate_response', resp) |
4356 | + return True |
4357 | + |
4358 | + def _auth_match_kerberos(self, auth): |
4359 | + """Try to create a GSSAPI response for authenticating against a host.""" |
4360 | + global kerberos, checked_kerberos |
4361 | + if kerberos is None and not checked_kerberos: |
4362 | + try: |
4363 | + import kerberos |
4364 | + except ImportError: |
4365 | + kerberos = None |
4366 | + checked_kerberos = True |
4367 | + if kerberos is None: |
4368 | + return None |
4369 | + ret, vc = kerberos.authGSSClientInit("HTTP@%(host)s" % auth) |
4370 | + if ret < 1: |
4371 | + trace.warning('Unable to create GSSAPI context for %s: %d', |
4372 | + auth['host'], ret) |
4373 | + return None |
4374 | + ret = kerberos.authGSSClientStep(vc, "") |
4375 | + if ret < 0: |
4376 | + trace.mutter('authGSSClientStep failed: %d', ret) |
4377 | + return None |
4378 | + return kerberos.authGSSClientResponse(vc) |
4379 | + |
4380 | + def build_auth_header(self, auth, request): |
4381 | + return "Negotiate %s" % auth['negotiate_response'] |
4382 | + |
4383 | + def auth_params_reusable(self, auth): |
4384 | + # If the auth scheme is known, it means a previous |
4385 | + # authentication was successful, all information is |
4386 | + # available, no further checks are needed. |
4387 | + return (auth.get('scheme', None) == 'negotiate' and |
4388 | + auth.get('negotiate_response', None) is not None) |
4389 | + |
4390 | + |
4391 | +class BasicAuthHandler(AbstractAuthHandler): |
4392 | + """A custom basic authentication handler.""" |
4393 | + |
4394 | + scheme = 'basic' |
4395 | + handler_order = 500 |
4396 | + auth_regexp = re.compile('realm="([^"]*)"', re.I) |
4397 | + |
4398 | + def build_auth_header(self, auth, request): |
4399 | + raw = '%s:%s' % (auth['user'], auth['password']) |
4400 | + auth_header = 'Basic ' + \ |
4401 | + base64.b64encode(raw.encode('utf-8')).decode('ascii') |
4402 | + return auth_header |
4403 | + |
4404 | + def extract_realm(self, header_value): |
4405 | + match = self.auth_regexp.search(header_value) |
4406 | + realm = None |
4407 | + if match: |
4408 | + realm = match.group(1) |
4409 | + return match, realm |
4410 | + |
4411 | + def auth_match(self, header, auth): |
4412 | + scheme, raw_auth = self._parse_auth_header(header) |
4413 | + if scheme != self.scheme: |
4414 | + return False |
4415 | + |
4416 | + match, realm = self.extract_realm(raw_auth) |
4417 | + if match: |
4418 | + # Put useful info into auth |
4419 | + self.update_auth(auth, 'scheme', scheme) |
4420 | + self.update_auth(auth, 'realm', realm) |
4421 | + if (auth.get('user', None) is None |
4422 | + or auth.get('password', None) is None): |
4423 | + user, password = self.get_user_password(auth) |
4424 | + self.update_auth(auth, 'user', user) |
4425 | + self.update_auth(auth, 'password', password) |
4426 | + return match is not None |
4427 | + |
4428 | + def auth_params_reusable(self, auth): |
4429 | + # If the auth scheme is known, it means a previous |
4430 | + # authentication was successful, all information is |
4431 | + # available, no further checks are needed. |
4432 | + return auth.get('scheme', None) == 'basic' |
4433 | + |
4434 | + |
4435 | +def get_digest_algorithm_impls(algorithm): |
4436 | + H = None |
4437 | + KD = None |
4438 | + if algorithm == 'MD5': |
4439 | + def H(x): return osutils.md5(x).hexdigest() |
4440 | + elif algorithm == 'SHA': |
4441 | + H = osutils.sha_string |
4442 | + if H is not None: |
4443 | + def KD(secret, data): return H( |
4444 | + ("%s:%s" % (secret, data)).encode('utf-8')) |
4445 | + return H, KD |
4446 | + |
4447 | + |
4448 | +def get_new_cnonce(nonce, nonce_count): |
4449 | + raw = '%s:%d:%s:%s' % (nonce, nonce_count, time.ctime(), |
4450 | + osutils.rand_chars(8)) |
4451 | + return osutils.sha_string(raw.encode('utf-8'))[:16] |
4452 | + |
4453 | + |
4454 | +class DigestAuthHandler(AbstractAuthHandler): |
4455 | + """A custom digest authentication handler.""" |
4456 | + |
4457 | + scheme = 'digest' |
4458 | + # Before basic as digest is a bit more secure and should be preferred |
4459 | + handler_order = 490 |
4460 | + |
4461 | + def auth_params_reusable(self, auth): |
4462 | + # If the auth scheme is known, it means a previous |
4463 | + # authentication was successful, all information is |
4464 | + # available, no further checks are needed. |
4465 | + return auth.get('scheme', None) == 'digest' |
4466 | + |
4467 | + def auth_match(self, header, auth): |
4468 | + scheme, raw_auth = self._parse_auth_header(header) |
4469 | + if scheme != self.scheme: |
4470 | + return False |
4471 | + |
4472 | + # Put the requested authentication info into a dict |
4473 | + req_auth = urllib_request.parse_keqv_list( |
4474 | + urllib_request.parse_http_list(raw_auth)) |
4475 | + |
4476 | + # Check that we can handle that authentication |
4477 | + qop = req_auth.get('qop', None) |
4478 | + if qop != 'auth': # No auth-int so far |
4479 | + return False |
4480 | + |
4481 | + H, KD = get_digest_algorithm_impls(req_auth.get('algorithm', 'MD5')) |
4482 | + if H is None: |
4483 | + return False |
4484 | + |
4485 | + realm = req_auth.get('realm', None) |
4486 | + # Put useful info into auth |
4487 | + self.update_auth(auth, 'scheme', scheme) |
4488 | + self.update_auth(auth, 'realm', realm) |
4489 | + if auth.get('user', None) is None or auth.get('password', None) is None: |
4490 | + user, password = self.get_user_password(auth) |
4491 | + self.update_auth(auth, 'user', user) |
4492 | + self.update_auth(auth, 'password', password) |
4493 | + |
4494 | + try: |
4495 | + if req_auth.get('algorithm', None) is not None: |
4496 | + self.update_auth(auth, 'algorithm', req_auth.get('algorithm')) |
4497 | + nonce = req_auth['nonce'] |
4498 | + if auth.get('nonce', None) != nonce: |
4499 | + # A new nonce, never used |
4500 | + self.update_auth(auth, 'nonce_count', 0) |
4501 | + self.update_auth(auth, 'nonce', nonce) |
4502 | + self.update_auth(auth, 'qop', qop) |
4503 | + auth['opaque'] = req_auth.get('opaque', None) |
4504 | + except KeyError: |
4505 | + # Some required field is not there |
4506 | + return False |
4507 | + |
4508 | + return True |
4509 | + |
4510 | + def build_auth_header(self, auth, request): |
4511 | + if PY3: |
4512 | + selector = request.selector |
4513 | + else: |
4514 | + selector = request.get_selector() |
4515 | + url_scheme, url_selector = splittype(selector) |
4516 | + sel_host, uri = splithost(url_selector) |
4517 | + |
4518 | + A1 = ('%s:%s:%s' % |
4519 | + (auth['user'], auth['realm'], auth['password'])).encode('utf-8') |
4520 | + A2 = ('%s:%s' % (request.get_method(), uri)).encode('utf-8') |
4521 | + |
4522 | + nonce = auth['nonce'] |
4523 | + qop = auth['qop'] |
4524 | + |
4525 | + nonce_count = auth['nonce_count'] + 1 |
4526 | + ncvalue = '%08x' % nonce_count |
4527 | + cnonce = get_new_cnonce(nonce, nonce_count) |
4528 | + |
4529 | + H, KD = get_digest_algorithm_impls(auth.get('algorithm', 'MD5')) |
4530 | + nonce_data = '%s:%s:%s:%s:%s' % (nonce, ncvalue, cnonce, qop, H(A2)) |
4531 | + request_digest = KD(H(A1), nonce_data) |
4532 | + |
4533 | + header = 'Digest ' |
4534 | + header += 'username="%s", realm="%s", nonce="%s"' % (auth['user'], |
4535 | + auth['realm'], |
4536 | + nonce) |
4537 | + header += ', uri="%s"' % uri |
4538 | + header += ', cnonce="%s", nc=%s' % (cnonce, ncvalue) |
4539 | + header += ', qop="%s"' % qop |
4540 | + header += ', response="%s"' % request_digest |
4541 | + # Append the optional fields |
4542 | + opaque = auth.get('opaque', None) |
4543 | + if opaque: |
4544 | + header += ', opaque="%s"' % opaque |
4545 | + if auth.get('algorithm', None): |
4546 | + header += ', algorithm="%s"' % auth.get('algorithm') |
4547 | + |
4548 | + # We have used the nonce once more, update the count |
4549 | + auth['nonce_count'] = nonce_count |
4550 | + |
4551 | + return header |
4552 | + |
4553 | + |
4554 | +class HTTPAuthHandler(AbstractAuthHandler): |
4555 | + """Custom http authentication handler. |
4556 | + |
4557 | + Send the authentication preventively to avoid the roundtrip |
4558 | + associated with the 401 error and keep the revelant info in |
4559 | + the auth request attribute. |
4560 | + """ |
4561 | + |
4562 | + auth_required_header = 'www-authenticate' |
4563 | + auth_header = 'Authorization' |
4564 | + |
4565 | + def get_auth(self, request): |
4566 | + """Get the auth params from the request""" |
4567 | + return request.auth |
4568 | + |
4569 | + def set_auth(self, request, auth): |
4570 | + """Set the auth params for the request""" |
4571 | + request.auth = auth |
4572 | + |
4573 | + def build_password_prompt(self, auth): |
4574 | + return self._build_password_prompt(auth) |
4575 | + |
4576 | + def build_username_prompt(self, auth): |
4577 | + return self._build_username_prompt(auth) |
4578 | + |
4579 | + def http_error_401(self, req, fp, code, msg, headers): |
4580 | + return self.auth_required(req, headers) |
4581 | + |
4582 | + |
4583 | +class ProxyAuthHandler(AbstractAuthHandler): |
4584 | + """Custom proxy authentication handler. |
4585 | + |
4586 | + Send the authentication preventively to avoid the roundtrip |
4587 | + associated with the 407 error and keep the revelant info in |
4588 | + the proxy_auth request attribute.. |
4589 | + """ |
4590 | + |
4591 | + auth_required_header = 'proxy-authenticate' |
4592 | + # FIXME: the correct capitalization is Proxy-Authorization, |
4593 | + # but python-2.4 urllib_request.Request insist on using capitalize() |
4594 | + # instead of title(). |
4595 | + auth_header = 'Proxy-authorization' |
4596 | + |
4597 | + def get_auth(self, request): |
4598 | + """Get the auth params from the request""" |
4599 | + return request.proxy_auth |
4600 | + |
4601 | + def set_auth(self, request, auth): |
4602 | + """Set the auth params for the request""" |
4603 | + request.proxy_auth = auth |
4604 | + |
4605 | + def build_password_prompt(self, auth): |
4606 | + prompt = self._build_password_prompt(auth) |
4607 | + prompt = u'Proxy ' + prompt |
4608 | + return prompt |
4609 | + |
4610 | + def build_username_prompt(self, auth): |
4611 | + prompt = self._build_username_prompt(auth) |
4612 | + prompt = u'Proxy ' + prompt |
4613 | + return prompt |
4614 | + |
4615 | + def http_error_407(self, req, fp, code, msg, headers): |
4616 | + return self.auth_required(req, headers) |
4617 | + |
4618 | + |
4619 | +class HTTPBasicAuthHandler(BasicAuthHandler, HTTPAuthHandler): |
4620 | + """Custom http basic authentication handler""" |
4621 | + |
4622 | + |
4623 | +class ProxyBasicAuthHandler(BasicAuthHandler, ProxyAuthHandler): |
4624 | + """Custom proxy basic authentication handler""" |
4625 | + |
4626 | + |
4627 | +class HTTPDigestAuthHandler(DigestAuthHandler, HTTPAuthHandler): |
4628 | + """Custom http basic authentication handler""" |
4629 | + |
4630 | + |
4631 | +class ProxyDigestAuthHandler(DigestAuthHandler, ProxyAuthHandler): |
4632 | + """Custom proxy basic authentication handler""" |
4633 | + |
4634 | + |
4635 | +class HTTPNegotiateAuthHandler(NegotiateAuthHandler, HTTPAuthHandler): |
4636 | + """Custom http negotiate authentication handler""" |
4637 | + |
4638 | + |
4639 | +class ProxyNegotiateAuthHandler(NegotiateAuthHandler, ProxyAuthHandler): |
4640 | + """Custom proxy negotiate authentication handler""" |
4641 | + |
4642 | + |
4643 | +class HTTPErrorProcessor(urllib_request.HTTPErrorProcessor): |
4644 | + """Process HTTP error responses. |
4645 | + |
4646 | + We don't really process the errors, quite the contrary |
4647 | + instead, we leave our Transport handle them. |
4648 | + """ |
4649 | + |
4650 | + accepted_errors = [200, # Ok |
4651 | + 201, |
4652 | + 202, |
4653 | + 204, |
4654 | + 206, # Partial content |
4655 | + 400, |
4656 | + 403, |
4657 | + 404, # Not found |
4658 | + 405, # Method not allowed |
4659 | + 406, # Not Acceptable |
4660 | + 409, # Conflict |
4661 | + 416, # Range not satisfiable |
4662 | + 422, # Unprocessible entity |
4663 | + 501, # Not implemented |
4664 | + ] |
4665 | + """The error codes the caller will handle. |
4666 | + |
4667 | + This can be specialized in the request on a case-by case basis, but the |
4668 | + common cases are covered here. |
4669 | + """ |
4670 | + |
4671 | + def http_response(self, request, response): |
4672 | + code, msg, hdrs = response.code, response.msg, response.info() |
4673 | + |
4674 | + if code not in self.accepted_errors: |
4675 | + response = self.parent.error('http', request, response, |
4676 | + code, msg, hdrs) |
4677 | + return response |
4678 | + |
4679 | + https_response = http_response |
4680 | + |
4681 | + |
4682 | +class HTTPDefaultErrorHandler(urllib_request.HTTPDefaultErrorHandler): |
4683 | + """Translate common errors into Breezy Exceptions""" |
4684 | + |
4685 | + def http_error_default(self, req, fp, code, msg, hdrs): |
4686 | + if code == 403: |
4687 | + raise errors.TransportError( |
4688 | + 'Server refuses to fulfill the request (403 Forbidden)' |
4689 | + ' for %s' % req.get_full_url()) |
4690 | + else: |
4691 | + raise errors.UnexpectedHttpStatus( |
4692 | + req.get_full_url(), code, |
4693 | + 'Unable to handle http code: %s' % msg) |
4694 | + |
4695 | + |
4696 | +class Opener(object): |
4697 | + """A wrapper around urllib_request.build_opener |
4698 | + |
4699 | + Daughter classes can override to build their own specific opener |
4700 | + """ |
4701 | + # TODO: Provides hooks for daughter classes. |
4702 | + |
4703 | + def __init__(self, |
4704 | + connection=ConnectionHandler, |
4705 | + redirect=HTTPRedirectHandler, |
4706 | + error=HTTPErrorProcessor, |
4707 | + report_activity=None, |
4708 | + ca_certs=None): |
4709 | + self._opener = urllib_request.build_opener( |
4710 | + connection(report_activity=report_activity, ca_certs=ca_certs), |
4711 | + redirect, error, |
4712 | + ProxyHandler(), |
4713 | + HTTPBasicAuthHandler(), |
4714 | + HTTPDigestAuthHandler(), |
4715 | + HTTPNegotiateAuthHandler(), |
4716 | + ProxyBasicAuthHandler(), |
4717 | + ProxyDigestAuthHandler(), |
4718 | + ProxyNegotiateAuthHandler(), |
4719 | + HTTPHandler, |
4720 | + HTTPSHandler, |
4721 | + HTTPDefaultErrorHandler, |
4722 | + ) |
4723 | + |
4724 | + self.open = self._opener.open |
4725 | + if DEBUG >= 9: |
4726 | + # When dealing with handler order, it's easy to mess |
4727 | + # things up, the following will help understand which |
4728 | + # handler is used, when and for what. |
4729 | + import pprint |
4730 | + pprint.pprint(self._opener.__dict__) |
4731 | + |
4732 | + |
4733 | +class HttpTransport(ConnectedTransport): |
4734 | + """HTTP Client implementations. |
4735 | + |
4736 | + The protocol can be given as e.g. http+urllib://host/ to use a particular |
4737 | + implementation. |
4738 | + """ |
4739 | + |
4740 | + # _unqualified_scheme: "http" or "https" |
4741 | + # _scheme: may have "+pycurl", etc |
4742 | + |
4743 | + # In order to debug we have to issue our traces in sync with |
4744 | + # httplib, which use print :( |
4745 | + _debuglevel = 0 |
4746 | + |
4747 | + def __init__(self, base, _from_transport=None, ca_certs=None): |
4748 | + """Set the base path where files will be stored.""" |
4749 | + proto_match = re.match(r'^(https?)(\+\w+)?://', base) |
4750 | + if not proto_match: |
4751 | + raise AssertionError("not a http url: %r" % base) |
4752 | + self._unqualified_scheme = proto_match.group(1) |
4753 | + super(HttpTransport, self).__init__( |
4754 | + base, _from_transport=_from_transport) |
4755 | + self._medium = None |
4756 | + # range hint is handled dynamically throughout the life |
4757 | + # of the transport object. We start by trying multi-range |
4758 | + # requests and if the server returns bogus results, we |
4759 | + # retry with single range requests and, finally, we |
4760 | + # forget about range if the server really can't |
4761 | + # understand. Once acquired, this piece of info is |
4762 | + # propagated to clones. |
4763 | + if _from_transport is not None: |
4764 | + self._range_hint = _from_transport._range_hint |
4765 | + self._opener = _from_transport._opener |
4766 | + else: |
4767 | + self._range_hint = 'multi' |
4768 | + self._opener = Opener( |
4769 | + report_activity=self._report_activity, ca_certs=ca_certs) |
4770 | + |
4771 | + def request(self, method, url, fields=None, headers=None, **urlopen_kw): |
4772 | + body = urlopen_kw.pop('body', None) |
4773 | + if fields is not None: |
4774 | + data = urlencode(fields).encode() |
4775 | + if body is not None: |
4776 | + raise ValueError( |
4777 | + 'body and fields are mutually exclusive') |
4778 | + else: |
4779 | + data = body |
4780 | + if headers is None: |
4781 | + headers = {} |
4782 | + request = Request(method, url, data, headers) |
4783 | + request.follow_redirections = (urlopen_kw.pop('retries', 0) > 0) |
4784 | + if urlopen_kw: |
4785 | + raise NotImplementedError( |
4786 | + 'unknown arguments: %r' % urlopen_kw.keys()) |
4787 | + connection = self._get_connection() |
4788 | + if connection is not None: |
4789 | + # Give back shared info |
4790 | + request.connection = connection |
4791 | + (auth, proxy_auth) = self._get_credentials() |
4792 | + # Clean the httplib.HTTPConnection pipeline in case the previous |
4793 | + # request couldn't do it |
4794 | + connection.cleanup_pipe() |
4795 | + else: |
4796 | + # First request, initialize credentials. |
4797 | + # scheme and realm will be set by the _urllib2_wrappers.AuthHandler |
4798 | + auth = self._create_auth() |
4799 | + # Proxy initialization will be done by the first proxied request |
4800 | + proxy_auth = dict() |
4801 | + # Ensure authentication info is provided |
4802 | + request.auth = auth |
4803 | + request.proxy_auth = proxy_auth |
4804 | + |
4805 | + if self._debuglevel > 0: |
4806 | + print('perform: %s base: %s, url: %s' % (request.method, self.base, |
4807 | + request.get_full_url())) |
4808 | + response = self._opener.open(request) |
4809 | + if self._get_connection() is not request.connection: |
4810 | + # First connection or reconnection |
4811 | + self._set_connection(request.connection, |
4812 | + (request.auth, request.proxy_auth)) |
4813 | + else: |
4814 | + # http may change the credentials while keeping the |
4815 | + # connection opened |
4816 | + self._update_credentials((request.auth, request.proxy_auth)) |
4817 | + |
4818 | + code = response.code |
4819 | + if (request.follow_redirections is False |
4820 | + and code in (301, 302, 303, 307, 308)): |
4821 | + raise errors.RedirectRequested(request.get_full_url(), |
4822 | + request.redirected_to, |
4823 | + is_permanent=(code in (301, 308))) |
4824 | + |
4825 | + if request.redirected_to is not None: |
4826 | + trace.mutter('redirected from: %s to: %s' % (request.get_full_url(), |
4827 | + request.redirected_to)) |
4828 | + |
4829 | + class Urllib3LikeResponse(object): |
4830 | + |
4831 | + def __init__(self, actual): |
4832 | + self._actual = actual |
4833 | + self._data = None |
4834 | + |
4835 | + def getheader(self, name, default=None): |
4836 | + if self._actual.headers is None: |
4837 | + raise http_client.ResponseNotReady() |
4838 | + if PY3: |
4839 | + return self._actual.headers.get(name, default) |
4840 | + else: |
4841 | + return self._actual.headers.getheader(name, default) |
4842 | + |
4843 | + def getheaders(self): |
4844 | + if self._actual.headers is None: |
4845 | + raise http_client.ResponseNotReady() |
4846 | + return list(self._actual.headers.items()) |
4847 | + |
4848 | + @property |
4849 | + def status(self): |
4850 | + return self._actual.code |
4851 | + |
4852 | + @property |
4853 | + def reason(self): |
4854 | + return self._actual.reason |
4855 | + |
4856 | + @property |
4857 | + def data(self): |
4858 | + if self._data is None: |
4859 | + self._data = self._actual.read() |
4860 | + return self._data |
4861 | + |
4862 | + @property |
4863 | + def text(self): |
4864 | + if self.status == 204: |
4865 | + return None |
4866 | + charset = cgi.parse_header( |
4867 | + self._actual.headers['Content-Type'])[1].get('charset') |
4868 | + if charset: |
4869 | + return self.data.decode(charset) |
4870 | + else: |
4871 | + return self.data.decode() |
4872 | + |
4873 | + def read(self, amt=None): |
4874 | + return self._actual.read(amt) |
4875 | + |
4876 | + def readlines(self): |
4877 | + return self._actual.readlines() |
4878 | + |
4879 | + def readline(self, size=-1): |
4880 | + return self._actual.readline(size) |
4881 | + |
4882 | + return Urllib3LikeResponse(response) |
4883 | + |
4884 | + def disconnect(self): |
4885 | + connection = self._get_connection() |
4886 | + if connection is not None: |
4887 | + connection.close() |
4888 | + |
4889 | + def has(self, relpath): |
4890 | + """Does the target location exist? |
4891 | + """ |
4892 | + response = self._head(relpath) |
4893 | + |
4894 | + code = response.status |
4895 | + if code == 200: # "ok", |
4896 | + return True |
4897 | + else: |
4898 | + return False |
4899 | + |
4900 | + def get(self, relpath): |
4901 | + """Get the file at the given relative path. |
4902 | + |
4903 | + :param relpath: The relative path to the file |
4904 | + """ |
4905 | + code, response_file = self._get(relpath, None) |
4906 | + return response_file |
4907 | + |
4908 | + def _get(self, relpath, offsets, tail_amount=0): |
4909 | + """Get a file, or part of a file. |
4910 | + |
4911 | + :param relpath: Path relative to transport base URL |
4912 | + :param offsets: None to get the whole file; |
4913 | + or a list of _CoalescedOffset to fetch parts of a file. |
4914 | + :param tail_amount: The amount to get from the end of the file. |
4915 | + |
4916 | + :returns: (http_code, result_file) |
4917 | + """ |
4918 | + abspath = self._remote_path(relpath) |
4919 | + headers = {} |
4920 | + if offsets or tail_amount: |
4921 | + range_header = self._attempted_range_header(offsets, tail_amount) |
4922 | + if range_header is not None: |
4923 | + bytes = 'bytes=' + range_header |
4924 | + headers = {'Range': bytes} |
4925 | + else: |
4926 | + range_header = None |
4927 | + |
4928 | + response = self.request('GET', abspath, headers=headers) |
4929 | + |
4930 | + if response.status == 404: # not found |
4931 | + raise errors.NoSuchFile(abspath) |
4932 | + elif response.status == 416: |
4933 | + # We don't know which, but one of the ranges we specified was |
4934 | + # wrong. |
4935 | + raise errors.InvalidHttpRange(abspath, range_header, |
4936 | + 'Server return code %d' % response.status) |
4937 | + elif response.status == 400: |
4938 | + if range_header: |
4939 | + # We don't know which, but one of the ranges we specified was |
4940 | + # wrong. |
4941 | + raise errors.InvalidHttpRange( |
4942 | + abspath, range_header, |
4943 | + 'Server return code %d' % response.status) |
4944 | + else: |
4945 | + raise errors.BadHttpRequest(abspath, response.reason) |
4946 | + elif response.status not in (200, 206): |
4947 | + raise errors.UnexpectedHttpStatus(abspath, response.status) |
4948 | + |
4949 | + data = handle_response( |
4950 | + abspath, response.status, response.getheader, response) |
4951 | + return response.status, data |
4952 | + |
4953 | + def _remote_path(self, relpath): |
4954 | + """See ConnectedTransport._remote_path. |
4955 | + |
4956 | + user and passwords are not embedded in the path provided to the server. |
4957 | + """ |
4958 | + url = self._parsed_url.clone(relpath) |
4959 | + url.user = url.quoted_user = None |
4960 | + url.password = url.quoted_password = None |
4961 | + url.scheme = self._unqualified_scheme |
4962 | + return str(url) |
4963 | + |
4964 | + def _create_auth(self): |
4965 | + """Returns a dict containing the credentials provided at build time.""" |
4966 | + auth = dict(host=self._parsed_url.host, port=self._parsed_url.port, |
4967 | + user=self._parsed_url.user, password=self._parsed_url.password, |
4968 | + protocol=self._unqualified_scheme, |
4969 | + path=self._parsed_url.path) |
4970 | + return auth |
4971 | + |
4972 | + def get_smart_medium(self): |
4973 | + """See Transport.get_smart_medium.""" |
4974 | + if self._medium is None: |
4975 | + # Since medium holds some state (smart server probing at least), we |
4976 | + # need to keep it around. Note that this is needed because medium |
4977 | + # has the same 'base' attribute as the transport so it can't be |
4978 | + # shared between transports having different bases. |
4979 | + self._medium = SmartClientHTTPMedium(self) |
4980 | + return self._medium |
4981 | + |
4982 | + def _degrade_range_hint(self, relpath, ranges): |
4983 | + if self._range_hint == 'multi': |
4984 | + self._range_hint = 'single' |
4985 | + mutter('Retry "%s" with single range request' % relpath) |
4986 | + elif self._range_hint == 'single': |
4987 | + self._range_hint = None |
4988 | + mutter('Retry "%s" without ranges' % relpath) |
4989 | + else: |
4990 | + # We tried all the tricks, but nothing worked, caller must reraise. |
4991 | + return False |
4992 | + return True |
4993 | + |
4994 | + # _coalesce_offsets is a helper for readv, it try to combine ranges without |
4995 | + # degrading readv performances. _bytes_to_read_before_seek is the value |
4996 | + # used for the limit parameter and has been tuned for other transports. For |
4997 | + # HTTP, the name is inappropriate but the parameter is still useful and |
4998 | + # helps reduce the number of chunks in the response. The overhead for a |
4999 | + # chunk (headers, length, footer around the data itself is variable but |
5000 | + # around 50 bytes. We use 128 to reduce the range specifiers that appear in |
The diff has been truncated for viewing.
Looks good!