Merge lp:~jelmer/brz/integrate-urllib2-wrappers into lp:brz

Proposed by Jelmer Vernooij
Status: Merged
Approved by: Jelmer Vernooij
Approved revision: no longer in the source branch.
Merge reported by: The Breezy Bot
Merged at revision: not available
Proposed branch: lp:~jelmer/brz/integrate-urllib2-wrappers
Merge into: lp:brz
Diff against target: 4026 lines (+1880/-1935)
8 files modified
breezy/config.py (+2/-2)
breezy/git/remote.py (+1/-1)
breezy/plugins/launchpad/lp_registration.py (+3/-3)
breezy/tests/test_http.py (+15/-16)
breezy/tests/test_http_response.py (+3/-3)
breezy/tests/test_https_urllib.py (+2/-3)
breezy/transport/http/__init__.py (+1854/-7)
breezy/transport/http/_urllib2_wrappers.py (+0/-1900)
To merge this branch: bzr merge lp:~jelmer/brz/integrate-urllib2-wrappers
Reviewer Review Type Date Requested Status
Jelmer Vernooij Approve
Vincent Ladeuil Abstain
Review via email: mp+367308@code.launchpad.net

Commit message

Integrate urllib2_wrappers into http/__init__.py.

Description of the change

Integrate urllib2_wrappers into http/__init__.py.

To post a comment you must log in.
Revision history for this message
Vincent Ladeuil (vila) wrote :

Disclaimer: _urllib2_wrappers (and a good chunk of the http test infra) was my first big contribution to the project. I may be biased :)

This code was written > 10 years ago based on urllib2 whose design predated for even more years.

urllib2 is a thin layer above the http protocol, allowing everything to be built on top.

https cert handling, proxies, authentication, connection sharing, everything has to be added leading to some surprisingly well hidden bugs (see https://code.launchpad.net/~vila/bzr/1606203-long-auth/+merge/305328 for instance, the bug was literally in the first version written).

Nowadays the requests library provide all that and properly expose Request objects hiding all the http[s] gory details (in a curated implementation).

I'm abstaining here as I do think _urllib2_wrappers should die (in its own file for that matter to keep its history blamable) but rather be /replaced/ by a requests-based implementation.

I.e. I'll kept the existing implementation untouched while developing the other and remove it as a final step before submitting ;)

review: Abstain
Revision history for this message
Jelmer Vernooij (jelmer) wrote :

Thanks for the comments.

I think there are two things to do here:

* get rid of the abstraction - we just want to support a single HTTP implementation
* migrate from urllib2 to urllib3/requests

We can do either first I think, but you're suggesting doing the latter first while I've done the first?

Revision history for this message
Jelmer Vernooij (jelmer) :
review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'breezy/config.py'
2--- breezy/config.py 2019-02-02 15:13:30 +0000
3+++ breezy/config.py 2019-05-11 18:51:20 +0000
4@@ -2937,10 +2937,10 @@
5 from_unicode=bool_from_store, invalid='warning',
6 help='''Whether to validate signatures in brz log.'''))
7 option_registry.register_lazy('ssl.ca_certs',
8- 'breezy.transport.http._urllib2_wrappers', 'opt_ssl_ca_certs')
9+ 'breezy.transport.http', 'opt_ssl_ca_certs')
10
11 option_registry.register_lazy('ssl.cert_reqs',
12- 'breezy.transport.http._urllib2_wrappers', 'opt_ssl_cert_reqs')
13+ 'breezy.transport.http', 'opt_ssl_cert_reqs')
14
15
16 class Section(object):
17
18=== modified file 'breezy/git/remote.py'
19--- breezy/git/remote.py 2019-02-15 14:45:15 +0000
20+++ breezy/git/remote.py 2019-05-11 18:51:20 +0000
21@@ -692,7 +692,7 @@
22 `redirect_location` properties, and `read` is a consumable read
23 method for the response data.
24 """
25- from breezy.transport.http._urllib2_wrappers import Request
26+ from breezy.transport.http import Request
27 headers['User-agent'] = user_agent_for_github()
28 headers["Pragma"] = "no-cache"
29 if allow_compression:
30
31=== modified file 'breezy/plugins/launchpad/lp_registration.py'
32--- breezy/plugins/launchpad/lp_registration.py 2019-01-18 01:19:29 +0000
33+++ breezy/plugins/launchpad/lp_registration.py 2019-05-11 18:51:20 +0000
34@@ -46,7 +46,7 @@
35 urlutils,
36 __version__ as _breezy_version,
37 )
38-from ...transport.http import _urllib2_wrappers
39+from ...transport import http
40
41 from .lp_api import (
42 DEFAULT_INSTANCE,
43@@ -87,13 +87,13 @@
44 def __init__(self, scheme):
45 Transport.__init__(self)
46 self._scheme = scheme
47- self._opener = _urllib2_wrappers.Opener()
48+ self._opener = http.Opener()
49 self.verbose = 0
50
51 def request(self, host, handler, request_body, verbose=0):
52 self.verbose = verbose
53 url = self._scheme + "://" + host + handler
54- request = _urllib2_wrappers.Request("POST", url, request_body)
55+ request = http.Request("POST", url, request_body)
56 request.add_header("User-Agent", self.user_agent)
57 request.add_header("Content-Type", "text/xml")
58
59
60=== modified file 'breezy/tests/test_http.py'
61--- breezy/tests/test_http.py 2018-11-12 01:41:38 +0000
62+++ breezy/tests/test_http.py 2019-05-11 18:51:20 +0000
63@@ -67,7 +67,6 @@
64 )
65 from ..transport.http import (
66 HttpTransport,
67- _urllib2_wrappers,
68 )
69
70
71@@ -226,7 +225,7 @@
72
73 def parse_header(self, header, auth_handler_class=None):
74 if auth_handler_class is None:
75- auth_handler_class = _urllib2_wrappers.AbstractAuthHandler
76+ auth_handler_class = http.AbstractAuthHandler
77 self.auth_handler = auth_handler_class()
78 return self.auth_handler._parse_auth_header(header)
79
80@@ -247,7 +246,7 @@
81 self.assertEqual('realm="Thou should not pass"', remainder)
82
83 def test_build_basic_header_with_long_creds(self):
84- handler = _urllib2_wrappers.BasicAuthHandler()
85+ handler = http.BasicAuthHandler()
86 user = 'user' * 10 # length 40
87 password = 'password' * 5 # length 40
88 header = handler.build_auth_header(
89@@ -259,7 +258,7 @@
90 def test_basic_extract_realm(self):
91 scheme, remainder = self.parse_header(
92 'Basic realm="Thou should not pass"',
93- _urllib2_wrappers.BasicAuthHandler)
94+ http.BasicAuthHandler)
95 match, realm = self.auth_handler.extract_realm(remainder)
96 self.assertTrue(match is not None)
97 self.assertEqual(u'Thou should not pass', realm)
98@@ -1130,13 +1129,13 @@
99 """
100
101 def _proxied_request(self):
102- handler = _urllib2_wrappers.ProxyHandler()
103- request = _urllib2_wrappers.Request('GET', 'http://baz/buzzle')
104+ handler = http.ProxyHandler()
105+ request = http.Request('GET', 'http://baz/buzzle')
106 handler.set_proxy(request, 'http')
107 return request
108
109 def assertEvaluateProxyBypass(self, expected, host, no_proxy):
110- handler = _urllib2_wrappers.ProxyHandler()
111+ handler = http.ProxyHandler()
112 self.assertEqual(expected,
113 handler.evaluate_proxy_bypass(host, no_proxy))
114
115@@ -1328,29 +1327,29 @@
116 self.get_new_transport().get('a').read())
117
118
119-class RedirectedRequest(_urllib2_wrappers.Request):
120+class RedirectedRequest(http.Request):
121 """Request following redirections. """
122
123- init_orig = _urllib2_wrappers.Request.__init__
124+ init_orig = http.Request.__init__
125
126 def __init__(self, method, url, *args, **kwargs):
127 """Constructor.
128
129 """
130 # Since the tests using this class will replace
131- # _urllib2_wrappers.Request, we can't just call the base class __init__
132+ # http.Request, we can't just call the base class __init__
133 # or we'll loop.
134 RedirectedRequest.init_orig(self, method, url, *args, **kwargs)
135 self.follow_redirections = True
136
137
138 def install_redirected_request(test):
139- test.overrideAttr(_urllib2_wrappers, 'Request', RedirectedRequest)
140+ test.overrideAttr(http, 'Request', RedirectedRequest)
141
142
143 def cleanup_http_redirection_connections(test):
144 # Some sockets are opened but never seen by _urllib, so we trap them at
145- # the _urllib2_wrappers level to be able to clean them up.
146+ # the http level to be able to clean them up.
147 def socket_disconnect(sock):
148 try:
149 sock.shutdown(socket.SHUT_RDWR)
150@@ -1362,13 +1361,13 @@
151 test.http_connect_orig(connection)
152 test.addCleanup(socket_disconnect, connection.sock)
153 test.http_connect_orig = test.overrideAttr(
154- _urllib2_wrappers.HTTPConnection, 'connect', connect)
155+ http.HTTPConnection, 'connect', connect)
156
157 def connect(connection):
158 test.https_connect_orig(connection)
159 test.addCleanup(socket_disconnect, connection.sock)
160 test.https_connect_orig = test.overrideAttr(
161- _urllib2_wrappers.HTTPSConnection, 'connect', connect)
162+ http.HTTPSConnection, 'connect', connect)
163
164
165 class TestHTTPSilentRedirections(http_utils.TestCaseWithRedirectedWebserver):
166@@ -1376,7 +1375,7 @@
167
168 http implementations do not redirect silently anymore (they
169 do not redirect at all in fact). The mechanism is still in
170- place at the _urllib2_wrappers.Request level and these tests
171+ place at the http.Request level and these tests
172 exercise it.
173 """
174
175@@ -1498,7 +1497,7 @@
176 password = 'foo'
177 _setup_authentication_config(scheme='http', host='localhost',
178 user=user, password=password)
179- handler = _urllib2_wrappers.HTTPAuthHandler()
180+ handler = http.HTTPAuthHandler()
181 got_pass = handler.get_user_password(dict(
182 user='joe',
183 protocol='http',
184
185=== modified file 'breezy/tests/test_http_response.py'
186--- breezy/tests/test_http_response.py 2019-03-24 20:59:14 +0000
187+++ breezy/tests/test_http_response.py 2019-05-11 18:51:20 +0000
188@@ -56,7 +56,7 @@
189 )
190 from ..transport.http import (
191 response,
192- _urllib2_wrappers,
193+ HTTPConnection,
194 )
195 from .file_utils import (
196 FakeReadFile,
197@@ -73,10 +73,10 @@
198 return self.readfile
199
200
201-class FakeHTTPConnection(_urllib2_wrappers.HTTPConnection):
202+class FakeHTTPConnection(HTTPConnection):
203
204 def __init__(self, sock):
205- _urllib2_wrappers.HTTPConnection.__init__(self, 'localhost')
206+ HTTPConnection.__init__(self, 'localhost')
207 # Set the socket to bypass the connection
208 self.sock = sock
209
210
211=== modified file 'breezy/tests/test_https_urllib.py'
212--- breezy/tests/test_https_urllib.py 2018-11-23 01:35:56 +0000
213+++ breezy/tests/test_https_urllib.py 2019-05-11 18:51:20 +0000
214@@ -26,8 +26,7 @@
215 trace,
216 )
217 from .. import tests
218-from ..transport.http import _urllib2_wrappers
219-from ..transport.http._urllib2_wrappers import ssl
220+from ..transport.http import ssl, opt_ssl_ca_certs
221
222
223 class CaCertsConfigTests(tests.TestCaseInTempDir):
224@@ -49,7 +48,7 @@
225 def test_specified_doesnt_exist(self):
226 stack = self.get_stack('')
227 # Disable the default value mechanism to force the behavior we want
228- self.overrideAttr(_urllib2_wrappers.opt_ssl_ca_certs, 'default',
229+ self.overrideAttr(opt_ssl_ca_certs, 'default',
230 os.path.join(self.test_dir, u"nonexisting.pem"))
231 self.warnings = []
232
233
234=== modified file 'breezy/transport/http/__init__.py'
235--- breezy/transport/http/__init__.py 2018-11-12 01:41:38 +0000
236+++ breezy/transport/http/__init__.py 2019-05-11 18:51:20 +0000
237@@ -21,30 +21,1877 @@
238
239 from __future__ import absolute_import
240
241+DEBUG = 0
242+
243+import base64
244+import errno
245 import os
246 import re
247+import socket
248+import ssl
249 import sys
250+import time
251+import urllib
252 import weakref
253
254+try:
255+ import http.client as http_client
256+except ImportError:
257+ import httplib as http_client
258+try:
259+ import urllib.request as urllib_request
260+except ImportError: # python < 3
261+ import urllib2 as urllib_request
262+try:
263+ from urllib.parse import urljoin, splitport, splittype, splithost
264+except ImportError:
265+ from urlparse import urljoin
266+ from urllib import splitport, splittype, splithost
267+
268+# TODO: handle_response should be integrated into the http/__init__.py
269+from .response import handle_response
270+
271+# FIXME: Oversimplifying, two kind of exceptions should be
272+# raised, once a request is issued: URLError before we have been
273+# able to process the response, HTTPError after that. Process the
274+# response means we are able to leave the socket clean, so if we
275+# are not able to do that, we should close the connection. The
276+# actual code more or less do that, tests should be written to
277+# ensure that.
278+
279+from ... import __version__ as breezy_version
280 from ... import (
281+ config,
282 debug,
283 errors,
284+ lazy_import,
285+ osutils,
286+ trace,
287 transport,
288 ui,
289 urlutils,
290- )
291+)
292 from ...bzr.smart import medium
293+from ...sixish import (
294+ PY3,
295+ reraise,
296+ text_type,
297+)
298 from ...trace import mutter
299 from ...transport import (
300 ConnectedTransport,
301 )
302
303-# TODO: handle_response should be integrated into the http/__init__.py
304-from .response import handle_response
305-from ._urllib2_wrappers import (
306- Opener,
307- Request,
308- )
309+
310+try:
311+ _ = (ssl.match_hostname, ssl.CertificateError)
312+except AttributeError:
313+ # Provide fallbacks for python < 2.7.9
314+ def match_hostname(cert, host):
315+ trace.warning(
316+ '%s cannot be verified, https certificates verification is only'
317+ ' available for python versions >= 2.7.9' % (host,))
318+ ssl.match_hostname = match_hostname
319+ ssl.CertificateError = ValueError
320+
321+
322+# Note for packagers: if there is no package providing certs for your platform,
323+# the curl project produces http://curl.haxx.se/ca/cacert.pem weekly.
324+_ssl_ca_certs_known_locations = [
325+ u'/etc/ssl/certs/ca-certificates.crt', # Ubuntu/debian/gentoo
326+ u'/etc/pki/tls/certs/ca-bundle.crt', # Fedora/CentOS/RH
327+ u'/etc/ssl/ca-bundle.pem', # OpenSuse
328+ u'/etc/ssl/cert.pem', # OpenSuse
329+ u"/usr/local/share/certs/ca-root-nss.crt", # FreeBSD
330+ # XXX: Needs checking, can't trust the interweb ;) -- vila 2012-01-25
331+ u'/etc/openssl/certs/ca-certificates.crt', # Solaris
332+]
333+
334+
335+def default_ca_certs():
336+ if sys.platform == 'win32':
337+ return os.path.join(os.path.dirname(sys.executable), u"cacert.pem")
338+ elif sys.platform == 'darwin':
339+ # FIXME: Needs some default value for osx, waiting for osx installers
340+ # guys feedback -- vila 2012-01-25
341+ pass
342+ else:
343+ # Try known locations for friendly OSes providing the root certificates
344+ # without making them hard to use for any https client.
345+ for path in _ssl_ca_certs_known_locations:
346+ if os.path.exists(path):
347+ # First found wins
348+ return path
349+ # A default path that makes sense and will be mentioned in the error
350+ # presented to the user, even if not correct for all platforms
351+ return _ssl_ca_certs_known_locations[0]
352+
353+
354+def ca_certs_from_store(path):
355+ if not os.path.exists(path):
356+ raise ValueError("ca certs path %s does not exist" % path)
357+ return path
358+
359+
360+def cert_reqs_from_store(unicode_str):
361+ import ssl
362+ try:
363+ return {"required": ssl.CERT_REQUIRED,
364+ "none": ssl.CERT_NONE}[unicode_str]
365+ except KeyError:
366+ raise ValueError("invalid value %s" % unicode_str)
367+
368+
369+def default_ca_reqs():
370+ if sys.platform in ('win32', 'darwin'):
371+ # FIXME: Once we get a native access to root certificates there, this
372+ # won't needed anymore. See http://pad.lv/920455 -- vila 2012-02-15
373+ return u'none'
374+ else:
375+ return u'required'
376+
377+
378+opt_ssl_ca_certs = config.Option('ssl.ca_certs',
379+ from_unicode=ca_certs_from_store,
380+ default=default_ca_certs,
381+ invalid='warning',
382+ help="""\
383+Path to certification authority certificates to trust.
384+
385+This should be a valid path to a bundle containing all root Certificate
386+Authorities used to verify an https server certificate.
387+
388+Use ssl.cert_reqs=none to disable certificate verification.
389+""")
390+
391+opt_ssl_cert_reqs = config.Option('ssl.cert_reqs',
392+ default=default_ca_reqs,
393+ from_unicode=cert_reqs_from_store,
394+ invalid='error',
395+ help="""\
396+Whether to require a certificate from the remote side. (default:required)
397+
398+Possible values:
399+ * none: Certificates ignored
400+ * required: Certificates required and validated
401+""")
402+
403+checked_kerberos = False
404+kerberos = None
405+
406+
407+class addinfourl(urllib_request.addinfourl):
408+ '''Replacement addinfourl class compatible with python-2.7's xmlrpclib
409+
410+ In python-2.7, xmlrpclib expects that the response object that it receives
411+ has a getheader method. http_client.HTTPResponse provides this but
412+ urllib_request.addinfourl does not. Add the necessary functions here, ported to
413+ use the internal data structures of addinfourl.
414+ '''
415+
416+ def getheader(self, name, default=None):
417+ if self.headers is None:
418+ raise http_client.ResponseNotReady()
419+ return self.headers.getheader(name, default)
420+
421+ def getheaders(self):
422+ if self.headers is None:
423+ raise http_client.ResponseNotReady()
424+ return list(self.headers.items())
425+
426+
427+class _ReportingFileSocket(object):
428+
429+ def __init__(self, filesock, report_activity=None):
430+ self.filesock = filesock
431+ self._report_activity = report_activity
432+
433+ def report_activity(self, size, direction):
434+ if self._report_activity:
435+ self._report_activity(size, direction)
436+
437+ def read(self, size=1):
438+ s = self.filesock.read(size)
439+ self.report_activity(len(s), 'read')
440+ return s
441+
442+ def readline(self, size=-1):
443+ s = self.filesock.readline(size)
444+ self.report_activity(len(s), 'read')
445+ return s
446+
447+ def readinto(self, b):
448+ s = self.filesock.readinto(b)
449+ self.report_activity(s, 'read')
450+ return s
451+
452+ def __getattr__(self, name):
453+ return getattr(self.filesock, name)
454+
455+
456+class _ReportingSocket(object):
457+
458+ def __init__(self, sock, report_activity=None):
459+ self.sock = sock
460+ self._report_activity = report_activity
461+
462+ def report_activity(self, size, direction):
463+ if self._report_activity:
464+ self._report_activity(size, direction)
465+
466+ def sendall(self, s, *args):
467+ self.sock.sendall(s, *args)
468+ self.report_activity(len(s), 'write')
469+
470+ def recv(self, *args):
471+ s = self.sock.recv(*args)
472+ self.report_activity(len(s), 'read')
473+ return s
474+
475+ def makefile(self, mode='r', bufsize=-1):
476+ # http_client creates a fileobject that doesn't do buffering, which
477+ # makes fp.readline() very expensive because it only reads one byte
478+ # at a time. So we wrap the socket in an object that forces
479+ # sock.makefile to make a buffered file.
480+ fsock = self.sock.makefile(mode, 65536)
481+ # And wrap that into a reporting kind of fileobject
482+ return _ReportingFileSocket(fsock, self._report_activity)
483+
484+ def __getattr__(self, name):
485+ return getattr(self.sock, name)
486+
487+
488+# We define our own Response class to keep our http_client pipe clean
489+class Response(http_client.HTTPResponse):
490+ """Custom HTTPResponse, to avoid the need to decorate.
491+
492+ http_client prefers to decorate the returned objects, rather
493+ than using a custom object.
494+ """
495+
496+ # Some responses have bodies in which we have no interest
497+ _body_ignored_responses = [301, 302, 303, 307, 400, 401, 403, 404, 501]
498+
499+ # in finish() below, we may have to discard several MB in the worst
500+ # case. To avoid buffering that much, we read and discard by chunks
501+ # instead. The underlying file is either a socket or a StringIO, so reading
502+ # 8k chunks should be fine.
503+ _discarded_buf_size = 8192
504+
505+ if PY3:
506+ def __init__(self, sock, debuglevel=0, method=None, url=None):
507+ self.url = url
508+ super(Response, self).__init__(
509+ sock, debuglevel=debuglevel, method=method, url=url)
510+
511+ def begin(self):
512+ """Begin to read the response from the server.
513+
514+ http_client assumes that some responses get no content and do
515+ not even attempt to read the body in that case, leaving
516+ the body in the socket, blocking the next request. Let's
517+ try to workaround that.
518+ """
519+ http_client.HTTPResponse.begin(self)
520+ if self.status in self._body_ignored_responses:
521+ if self.debuglevel >= 2:
522+ print("For status: [%s], will ready body, length: %s" % (
523+ self.status, self.length))
524+ if not (self.length is None or self.will_close):
525+ # In some cases, we just can't read the body not
526+ # even try or we may encounter a 104, 'Connection
527+ # reset by peer' error if there is indeed no body
528+ # and the server closed the connection just after
529+ # having issued the response headers (even if the
530+ # headers indicate a Content-Type...)
531+ body = self.read(self.length)
532+ if self.debuglevel >= 9:
533+ # This one can be huge and is generally not interesting
534+ print("Consumed body: [%s]" % body)
535+ self.close()
536+ elif self.status == 200:
537+ # Whatever the request is, it went ok, so we surely don't want to
538+ # close the connection. Some cases are not correctly detected by
539+ # http_client.HTTPConnection.getresponse (called by
540+ # http_client.HTTPResponse.begin). The CONNECT response for the https
541+ # through proxy case is one. Note: the 'will_close' below refers
542+ # to the "true" socket between us and the server, whereas the
543+ # 'close()' above refers to the copy of that socket created by
544+ # http_client for the response itself. So, in the if above we close the
545+ # socket to indicate that we are done with the response whereas
546+ # below we keep the socket with the server opened.
547+ self.will_close = False
548+
549+ def finish(self):
550+ """Finish reading the body.
551+
552+ In some cases, the client may have left some bytes to read in the
553+ body. That will block the next request to succeed if we use a
554+ persistent connection. If we don't use a persistent connection, well,
555+ nothing will block the next request since a new connection will be
556+ issued anyway.
557+
558+ :return: the number of bytes left on the socket (may be None)
559+ """
560+ pending = None
561+ if not self.isclosed():
562+ # Make sure nothing was left to be read on the socket
563+ pending = 0
564+ data = True
565+ while data and self.length:
566+ # read() will update self.length
567+ data = self.read(min(self.length, self._discarded_buf_size))
568+ pending += len(data)
569+ if pending:
570+ trace.mutter("%s bytes left on the HTTP socket", pending)
571+ self.close()
572+ return pending
573+
574+
575+# Not inheriting from 'object' because http_client.HTTPConnection doesn't.
576+class AbstractHTTPConnection:
577+ """A custom HTTP(S) Connection, which can reset itself on a bad response"""
578+
579+ response_class = Response
580+
581+ # When we detect a server responding with the whole file to range requests,
582+ # we want to warn. But not below a given thresold.
583+ _range_warning_thresold = 1024 * 1024
584+
585+ def __init__(self, report_activity=None):
586+ self._response = None
587+ self._report_activity = report_activity
588+ self._ranges_received_whole_file = None
589+
590+ def _mutter_connect(self):
591+ netloc = '%s:%s' % (self.host, self.port)
592+ if self.proxied_host is not None:
593+ netloc += '(proxy for %s)' % self.proxied_host
594+ trace.mutter('* About to connect() to %s' % netloc)
595+
596+ def getresponse(self):
597+ """Capture the response to be able to cleanup"""
598+ self._response = http_client.HTTPConnection.getresponse(self)
599+ return self._response
600+
601+ def cleanup_pipe(self):
602+ """Read the remaining bytes of the last response if any."""
603+ if self._response is not None:
604+ try:
605+ pending = self._response.finish()
606+ # Warn the user (once)
607+ if (self._ranges_received_whole_file is None
608+ and self._response.status == 200
609+ and pending
610+ and pending > self._range_warning_thresold):
611+ self._ranges_received_whole_file = True
612+ trace.warning(
613+ 'Got a 200 response when asking for multiple ranges,'
614+ ' does your server at %s:%s support range requests?',
615+ self.host, self.port)
616+ except socket.error as e:
617+ # It's conceivable that the socket is in a bad state here
618+ # (including some test cases) and in this case, it doesn't need
619+ # cleaning anymore, so no need to fail, we just get rid of the
620+ # socket and let callers reconnect
621+ if (len(e.args) == 0
622+ or e.args[0] not in (errno.ECONNRESET, errno.ECONNABORTED)):
623+ raise
624+ self.close()
625+ self._response = None
626+ # Preserve our preciousss
627+ sock = self.sock
628+ self.sock = None
629+ # Let http_client.HTTPConnection do its housekeeping
630+ self.close()
631+ # Restore our preciousss
632+ self.sock = sock
633+
634+ def _wrap_socket_for_reporting(self, sock):
635+ """Wrap the socket before anybody use it."""
636+ self.sock = _ReportingSocket(sock, self._report_activity)
637+
638+
639+class HTTPConnection(AbstractHTTPConnection, http_client.HTTPConnection):
640+
641+ # XXX: Needs refactoring at the caller level.
642+ def __init__(self, host, port=None, proxied_host=None,
643+ report_activity=None, ca_certs=None):
644+ AbstractHTTPConnection.__init__(self, report_activity=report_activity)
645+ if PY3:
646+ http_client.HTTPConnection.__init__(self, host, port)
647+ else:
648+ # Use strict=True since we don't support HTTP/0.9
649+ http_client.HTTPConnection.__init__(self, host, port, strict=True)
650+ self.proxied_host = proxied_host
651+ # ca_certs is ignored, it's only relevant for https
652+
653+ def connect(self):
654+ if 'http' in debug.debug_flags:
655+ self._mutter_connect()
656+ http_client.HTTPConnection.connect(self)
657+ self._wrap_socket_for_reporting(self.sock)
658+
659+
660+class HTTPSConnection(AbstractHTTPConnection, http_client.HTTPSConnection):
661+
662+ def __init__(self, host, port=None, key_file=None, cert_file=None,
663+ proxied_host=None,
664+ report_activity=None, ca_certs=None):
665+ AbstractHTTPConnection.__init__(self, report_activity=report_activity)
666+ if PY3:
667+ http_client.HTTPSConnection.__init__(
668+ self, host, port, key_file, cert_file)
669+ else:
670+ # Use strict=True since we don't support HTTP/0.9
671+ http_client.HTTPSConnection.__init__(self, host, port,
672+ key_file, cert_file, strict=True)
673+ self.proxied_host = proxied_host
674+ self.ca_certs = ca_certs
675+
676+ def connect(self):
677+ if 'http' in debug.debug_flags:
678+ self._mutter_connect()
679+ http_client.HTTPConnection.connect(self)
680+ self._wrap_socket_for_reporting(self.sock)
681+ if self.proxied_host is None:
682+ self.connect_to_origin()
683+
684+ def connect_to_origin(self):
685+ # FIXME JRV 2011-12-18: Use location config here?
686+ config_stack = config.GlobalStack()
687+ cert_reqs = config_stack.get('ssl.cert_reqs')
688+ if self.proxied_host is not None:
689+ host = self.proxied_host.split(":", 1)[0]
690+ else:
691+ host = self.host
692+ if cert_reqs == ssl.CERT_NONE:
693+ ui.ui_factory.show_user_warning('not_checking_ssl_cert', host=host)
694+ ui.ui_factory.suppressed_warnings.add('not_checking_ssl_cert')
695+ ca_certs = None
696+ else:
697+ if self.ca_certs is None:
698+ ca_certs = config_stack.get('ssl.ca_certs')
699+ else:
700+ ca_certs = self.ca_certs
701+ if ca_certs is None:
702+ trace.warning(
703+ "No valid trusted SSL CA certificates file set. See "
704+ "'brz help ssl.ca_certs' for more information on setting "
705+ "trusted CAs.")
706+ try:
707+ ssl_context = ssl.create_default_context(
708+ purpose=ssl.Purpose.SERVER_AUTH, cafile=ca_certs)
709+ ssl.check_hostname = True
710+ if self.cert_file:
711+ ssl_context.load_cert_chain(
712+ keyfile=self.key_file, certfile=self.cert_file)
713+ ssl_context.verify_mode = cert_reqs
714+ ssl_sock = ssl_context.wrap_socket(
715+ self.sock, server_hostname=self.host)
716+ except ssl.SSLError:
717+ trace.note(
718+ "\n"
719+ "See `brz help ssl.ca_certs` for how to specify trusted CA"
720+ "certificates.\n"
721+ "Pass -Ossl.cert_reqs=none to disable certificate "
722+ "verification entirely.\n")
723+ raise
724+ # Wrap the ssl socket before anybody use it
725+ self._wrap_socket_for_reporting(ssl_sock)
726+
727+
728+class Request(urllib_request.Request):
729+ """A custom Request object.
730+
731+ urllib_request determines the request method heuristically (based on
732+ the presence or absence of data). We set the method
733+ statically.
734+
735+ The Request object tracks:
736+ - the connection the request will be made on.
737+ - the authentication parameters needed to preventively set
738+ the authentication header once a first authentication have
739+ been made.
740+ """
741+
742+ def __init__(self, method, url, data=None, headers={},
743+ origin_req_host=None, unverifiable=False,
744+ connection=None, parent=None,
745+ accepted_errors=None):
746+ urllib_request.Request.__init__(
747+ self, url, data, headers,
748+ origin_req_host, unverifiable)
749+ self.method = method
750+ self.connection = connection
751+ self.accepted_errors = accepted_errors
752+ # To handle redirections
753+ self.parent = parent
754+ self.redirected_to = None
755+ # Unless told otherwise, redirections are not followed
756+ self.follow_redirections = False
757+ # auth and proxy_auth are dicts containing, at least
758+ # (scheme, host, port, realm, user, password, protocol, path).
759+ # The dict entries are mostly handled by the AuthHandler.
760+ # Some authentication schemes may add more entries.
761+ self.auth = {}
762+ self.proxy_auth = {}
763+ self.proxied_host = None
764+
765+ def get_method(self):
766+ return self.method
767+
768+ def set_proxy(self, proxy, type):
769+ """Set the proxy and remember the proxied host."""
770+ if PY3:
771+ host, port = splitport(self.host)
772+ else:
773+ host, port = splitport(self.get_host())
774+ if port is None:
775+ # We need to set the default port ourselves way before it gets set
776+ # in the HTTP[S]Connection object at build time.
777+ if self.type == 'https':
778+ conn_class = HTTPSConnection
779+ else:
780+ conn_class = HTTPConnection
781+ port = conn_class.default_port
782+ self.proxied_host = '%s:%s' % (host, port)
783+ urllib_request.Request.set_proxy(self, proxy, type)
784+ # When urllib_request makes a https request with our wrapper code and a proxy,
785+ # it sets Host to the https proxy, not the host we want to talk to.
786+ # I'm fairly sure this is our fault, but what is the cause is an open
787+ # question. -- Robert Collins May 8 2010.
788+ self.add_unredirected_header('Host', self.proxied_host)
789+
790+
791+class _ConnectRequest(Request):
792+
793+ def __init__(self, request):
794+ """Constructor
795+
796+ :param request: the first request sent to the proxied host, already
797+ processed by the opener (i.e. proxied_host is already set).
798+ """
799+ # We give a fake url and redefine selector or urllib_request will be
800+ # confused
801+ Request.__init__(self, 'CONNECT', request.get_full_url(),
802+ connection=request.connection)
803+ if request.proxied_host is None:
804+ raise AssertionError()
805+ self.proxied_host = request.proxied_host
806+
807+ @property
808+ def selector(self):
809+ return self.proxied_host
810+
811+ def get_selector(self):
812+ return self.selector
813+
814+ def set_proxy(self, proxy, type):
815+ """Set the proxy without remembering the proxied host.
816+
817+ We already know the proxied host by definition, the CONNECT request
818+ occurs only when the connection goes through a proxy. The usual
819+ processing (masquerade the request so that the connection is done to
820+ the proxy while the request is targeted at another host) does not apply
821+ here. In fact, the connection is already established with proxy and we
822+ just want to enable the SSL tunneling.
823+ """
824+ urllib_request.Request.set_proxy(self, proxy, type)
825+
826+
827+class ConnectionHandler(urllib_request.BaseHandler):
828+ """Provides connection-sharing by pre-processing requests.
829+
830+ urllib_request provides no way to access the HTTPConnection object
831+ internally used. But we need it in order to achieve
832+ connection sharing. So, we add it to the request just before
833+ it is processed, and then we override the do_open method for
834+ http[s] requests in AbstractHTTPHandler.
835+ """
836+
837+ handler_order = 1000 # after all pre-processings
838+
839+ def __init__(self, report_activity=None, ca_certs=None):
840+ self._report_activity = report_activity
841+ self.ca_certs = ca_certs
842+
843+ def create_connection(self, request, http_connection_class):
844+ host = request.host
845+ if not host:
846+ # Just a bit of paranoia here, this should have been
847+ # handled in the higher levels
848+ raise urlutils.InvalidURL(request.get_full_url(), 'no host given.')
849+
850+ # We create a connection (but it will not connect until the first
851+ # request is made)
852+ try:
853+ connection = http_connection_class(
854+ host, proxied_host=request.proxied_host,
855+ report_activity=self._report_activity,
856+ ca_certs=self.ca_certs)
857+ except http_client.InvalidURL as exception:
858+ # There is only one occurrence of InvalidURL in http_client
859+ raise urlutils.InvalidURL(request.get_full_url(),
860+ extra='nonnumeric port')
861+
862+ return connection
863+
864+ def capture_connection(self, request, http_connection_class):
865+ """Capture or inject the request connection.
866+
867+ Two cases:
868+ - the request have no connection: create a new one,
869+
870+ - the request have a connection: this one have been used
871+ already, let's capture it, so that we can give it to
872+ another transport to be reused. We don't do that
873+ ourselves: the Transport object get the connection from
874+ a first request and then propagate it, from request to
875+ request or to cloned transports.
876+ """
877+ connection = request.connection
878+ if connection is None:
879+ # Create a new one
880+ connection = self.create_connection(request, http_connection_class)
881+ request.connection = connection
882+
883+ # All connections will pass here, propagate debug level
884+ connection.set_debuglevel(DEBUG)
885+ return request
886+
887+ def http_request(self, request):
888+ return self.capture_connection(request, HTTPConnection)
889+
890+ def https_request(self, request):
891+ return self.capture_connection(request, HTTPSConnection)
892+
893+
894+class AbstractHTTPHandler(urllib_request.AbstractHTTPHandler):
895+ """A custom handler for HTTP(S) requests.
896+
897+ We overrive urllib_request.AbstractHTTPHandler to get a better
898+ control of the connection, the ability to implement new
899+ request types and return a response able to cope with
900+ persistent connections.
901+ """
902+
903+ # We change our order to be before urllib_request HTTP[S]Handlers
904+ # and be chosen instead of them (the first http_open called
905+ # wins).
906+ handler_order = 400
907+
908+ _default_headers = {'Pragma': 'no-cache',
909+ 'Cache-control': 'max-age=0',
910+ 'Connection': 'Keep-Alive',
911+ 'User-agent': 'Breezy/%s' % breezy_version,
912+ 'Accept': '*/*',
913+ }
914+
915+ def __init__(self):
916+ urllib_request.AbstractHTTPHandler.__init__(self, debuglevel=DEBUG)
917+
918+ def http_request(self, request):
919+ """Common headers setting"""
920+
921+ for name, value in self._default_headers.items():
922+ if name not in request.headers:
923+ request.headers[name] = value
924+ # FIXME: We may have to add the Content-Length header if
925+ # we have data to send.
926+ return request
927+
928+ def retry_or_raise(self, http_class, request, first_try):
929+ """Retry the request (once) or raise the exception.
930+
931+ urllib_request raises exception of application level kind, we
932+ just have to translate them.
933+
934+ http_client can raise exceptions of transport level (badly
935+ formatted dialog, loss of connexion or socket level
936+ problems). In that case we should issue the request again
937+ (http_client will close and reopen a new connection if
938+ needed).
939+ """
940+ # When an exception occurs, we give back the original
941+ # Traceback or the bugs are hard to diagnose.
942+ exc_type, exc_val, exc_tb = sys.exc_info()
943+ if exc_type == socket.gaierror:
944+ # No need to retry, that will not help
945+ if PY3:
946+ origin_req_host = request.origin_req_host
947+ else:
948+ origin_req_host = request.get_origin_req_host()
949+ raise errors.ConnectionError("Couldn't resolve host '%s'"
950+ % origin_req_host,
951+ orig_error=exc_val)
952+ elif isinstance(exc_val, http_client.ImproperConnectionState):
953+ # The http_client pipeline is in incorrect state, it's a bug in our
954+ # implementation.
955+ reraise(exc_type, exc_val, exc_tb)
956+ else:
957+ if first_try:
958+ if self._debuglevel >= 2:
959+ print('Received exception: [%r]' % exc_val)
960+ print(' On connection: [%r]' % request.connection)
961+ method = request.get_method()
962+ url = request.get_full_url()
963+ print(' Will retry, %s %r' % (method, url))
964+ request.connection.close()
965+ response = self.do_open(http_class, request, False)
966+ else:
967+ if self._debuglevel >= 2:
968+ print('Received second exception: [%r]' % exc_val)
969+ print(' On connection: [%r]' % request.connection)
970+ if exc_type in (http_client.BadStatusLine, http_client.UnknownProtocol):
971+ # http_client.BadStatusLine and
972+ # http_client.UnknownProtocol indicates that a
973+ # bogus server was encountered or a bad
974+ # connection (i.e. transient errors) is
975+ # experimented, we have already retried once
976+ # for that request so we raise the exception.
977+ my_exception = errors.InvalidHttpResponse(
978+ request.get_full_url(),
979+ 'Bad status line received',
980+ orig_error=exc_val)
981+ elif (isinstance(exc_val, socket.error) and len(exc_val.args)
982+ and exc_val.args[0] in (errno.ECONNRESET, 10053, 10054)):
983+ # 10053 == WSAECONNABORTED
984+ # 10054 == WSAECONNRESET
985+ raise errors.ConnectionReset(
986+ "Connection lost while sending request.")
987+ else:
988+ # All other exception are considered connection related.
989+
990+ # socket errors generally occurs for reasons
991+ # far outside our scope, so closing the
992+ # connection and retrying is the best we can
993+ # do.
994+ if PY3:
995+ selector = request.selector
996+ else:
997+ selector = request.get_selector()
998+ my_exception = errors.ConnectionError(
999+ msg='while sending %s %s:' % (request.get_method(),
1000+ selector),
1001+ orig_error=exc_val)
1002+
1003+ if self._debuglevel >= 2:
1004+ print('On connection: [%r]' % request.connection)
1005+ method = request.get_method()
1006+ url = request.get_full_url()
1007+ print(' Failed again, %s %r' % (method, url))
1008+ print(' Will raise: [%r]' % my_exception)
1009+ reraise(type(my_exception), my_exception, exc_tb)
1010+ return response
1011+
1012+ def do_open(self, http_class, request, first_try=True):
1013+ """See urllib_request.AbstractHTTPHandler.do_open for the general idea.
1014+
1015+ The request will be retried once if it fails.
1016+ """
1017+ connection = request.connection
1018+ if connection is None:
1019+ raise AssertionError(
1020+ 'Cannot process a request without a connection')
1021+
1022+ # Get all the headers
1023+ headers = {}
1024+ headers.update(request.header_items())
1025+ headers.update(request.unredirected_hdrs)
1026+ # Some servers or proxies will choke on headers not properly
1027+ # cased. http_client/urllib/urllib_request all use capitalize to get canonical
1028+ # header names, but only python2.5 urllib_request use title() to fix them just
1029+ # before sending the request. And not all versions of python 2.5 do
1030+ # that. Since we replace urllib_request.AbstractHTTPHandler.do_open we do it
1031+ # ourself below.
1032+ headers = {name.title(): val for name, val in headers.items()}
1033+
1034+ try:
1035+ method = request.get_method()
1036+ if PY3:
1037+ url = request.selector
1038+ else:
1039+ url = request.get_selector()
1040+ if sys.version_info[:2] >= (3, 6):
1041+ connection._send_request(method, url,
1042+ # FIXME: implements 100-continue
1043+ # None, # We don't send the body yet
1044+ request.data,
1045+ headers, encode_chunked=False)
1046+ else:
1047+ connection._send_request(method, url,
1048+ # FIXME: implements 100-continue
1049+ # None, # We don't send the body yet
1050+ request.data,
1051+ headers)
1052+ if 'http' in debug.debug_flags:
1053+ trace.mutter('> %s %s' % (method, url))
1054+ hdrs = []
1055+ for k, v in headers.items():
1056+ # People are often told to paste -Dhttp output to help
1057+ # debug. Don't compromise credentials.
1058+ if k in ('Authorization', 'Proxy-Authorization'):
1059+ v = '<masked>'
1060+ hdrs.append('%s: %s' % (k, v))
1061+ trace.mutter('> ' + '\n> '.join(hdrs) + '\n')
1062+ if self._debuglevel >= 1:
1063+ print('Request sent: [%r] from (%s)'
1064+ % (request, request.connection.sock.getsockname()))
1065+ response = connection.getresponse()
1066+ convert_to_addinfourl = True
1067+ except (ssl.SSLError, ssl.CertificateError):
1068+ # Something is wrong with either the certificate or the hostname,
1069+ # re-trying won't help
1070+ raise
1071+ except (socket.gaierror, http_client.BadStatusLine, http_client.UnknownProtocol,
1072+ socket.error, http_client.HTTPException):
1073+ response = self.retry_or_raise(http_class, request, first_try)
1074+ convert_to_addinfourl = False
1075+
1076+ if PY3:
1077+ response.msg = response.reason
1078+ return response
1079+
1080+# FIXME: HTTPConnection does not fully support 100-continue (the
1081+# server responses are just ignored)
1082+
1083+# if code == 100:
1084+# mutter('Will send the body')
1085+# # We can send the body now
1086+# body = request.data
1087+# if body is None:
1088+# raise URLError("No data given")
1089+# connection.send(body)
1090+# response = connection.getresponse()
1091+
1092+ if self._debuglevel >= 2:
1093+ print('Receives response: %r' % response)
1094+ print(' For: %r(%r)' % (request.get_method(),
1095+ request.get_full_url()))
1096+
1097+ if convert_to_addinfourl:
1098+ # Shamelessly copied from urllib_request
1099+ req = request
1100+ r = response
1101+ r.recv = r.read
1102+ fp = socket._fileobject(r, bufsize=65536)
1103+ resp = addinfourl(fp, r.msg, req.get_full_url())
1104+ resp.code = r.status
1105+ resp.msg = r.reason
1106+ resp.version = r.version
1107+ if self._debuglevel >= 2:
1108+ print('Create addinfourl: %r' % resp)
1109+ print(' For: %r(%r)' % (request.get_method(),
1110+ request.get_full_url()))
1111+ if 'http' in debug.debug_flags:
1112+ version = 'HTTP/%d.%d'
1113+ try:
1114+ version = version % (resp.version / 10,
1115+ resp.version % 10)
1116+ except:
1117+ version = 'HTTP/%r' % resp.version
1118+ trace.mutter('< %s %s %s' % (version, resp.code,
1119+ resp.msg))
1120+ # Use the raw header lines instead of treating resp.info() as a
1121+ # dict since we may miss duplicated headers otherwise.
1122+ hdrs = [h.rstrip('\r\n') for h in resp.info().headers]
1123+ trace.mutter('< ' + '\n< '.join(hdrs) + '\n')
1124+ else:
1125+ resp = response
1126+ return resp
1127+
1128+
1129+class HTTPHandler(AbstractHTTPHandler):
1130+ """A custom handler that just thunks into HTTPConnection"""
1131+
1132+ def http_open(self, request):
1133+ return self.do_open(HTTPConnection, request)
1134+
1135+
1136+class HTTPSHandler(AbstractHTTPHandler):
1137+ """A custom handler that just thunks into HTTPSConnection"""
1138+
1139+ https_request = AbstractHTTPHandler.http_request
1140+
1141+ def https_open(self, request):
1142+ connection = request.connection
1143+ if connection.sock is None and \
1144+ connection.proxied_host is not None and \
1145+ request.get_method() != 'CONNECT': # Don't loop
1146+ # FIXME: We need a gazillion connection tests here, but we still
1147+ # miss a https server :-( :
1148+ # - with and without proxy
1149+ # - with and without certificate
1150+ # - with self-signed certificate
1151+ # - with and without authentication
1152+ # - with good and bad credentials (especially the proxy auth around
1153+ # CONNECT)
1154+ # - with basic and digest schemes
1155+ # - reconnection on errors
1156+ # - connection persistence behaviour (including reconnection)
1157+
1158+ # We are about to connect for the first time via a proxy, we must
1159+ # issue a CONNECT request first to establish the encrypted link
1160+ connect = _ConnectRequest(request)
1161+ response = self.parent.open(connect)
1162+ if response.code != 200:
1163+ raise errors.ConnectionError("Can't connect to %s via proxy %s" % (
1164+ connect.proxied_host, self.host))
1165+ # Housekeeping
1166+ connection.cleanup_pipe()
1167+ # Establish the connection encryption
1168+ connection.connect_to_origin()
1169+ # Propagate the connection to the original request
1170+ request.connection = connection
1171+ return self.do_open(HTTPSConnection, request)
1172+
1173+
1174+class HTTPRedirectHandler(urllib_request.HTTPRedirectHandler):
1175+ """Handles redirect requests.
1176+
1177+ We have to implement our own scheme because we use a specific
1178+ Request object and because we want to implement a specific
1179+ policy.
1180+ """
1181+ _debuglevel = DEBUG
1182+ # RFC2616 says that only read requests should be redirected
1183+ # without interacting with the user. But Breezy uses some
1184+ # shortcuts to optimize against roundtrips which can leads to
1185+ # write requests being issued before read requests of
1186+ # containing dirs can be redirected. So we redirect write
1187+ # requests in the same way which seems to respect the spirit
1188+ # of the RFC if not its letter.
1189+
1190+ def redirect_request(self, req, fp, code, msg, headers, newurl):
1191+ """See urllib_request.HTTPRedirectHandler.redirect_request"""
1192+ # We would have preferred to update the request instead
1193+ # of creating a new one, but the urllib_request.Request object
1194+ # has a too complicated creation process to provide a
1195+ # simple enough equivalent update process. Instead, when
1196+ # redirecting, we only update the following request in
1197+ # the redirect chain with a reference to the parent
1198+ # request .
1199+
1200+ # Some codes make no sense in our context and are treated
1201+ # as errors:
1202+
1203+ # 300: Multiple choices for different representations of
1204+ # the URI. Using that mechanisn with Breezy will violate the
1205+ # protocol neutrality of Transport.
1206+
1207+ # 304: Not modified (SHOULD only occurs with conditional
1208+ # GETs which are not used by our implementation)
1209+
1210+ # 305: Use proxy. I can't imagine this one occurring in
1211+ # our context-- vila/20060909
1212+
1213+ # 306: Unused (if the RFC says so...)
1214+
1215+ # If the code is 302 and the request is HEAD, some may
1216+ # think that it is a sufficent hint that the file exists
1217+ # and that we MAY avoid following the redirections. But
1218+ # if we want to be sure, we MUST follow them.
1219+
1220+ if PY3:
1221+ origin_req_host = req.origin_req_host
1222+ else:
1223+ origin_req_host = req.get_origin_req_host()
1224+
1225+ if code in (301, 302, 303, 307):
1226+ return Request(req.get_method(), newurl,
1227+ headers=req.headers,
1228+ origin_req_host=origin_req_host,
1229+ unverifiable=True,
1230+ # TODO: It will be nice to be able to
1231+ # detect virtual hosts sharing the same
1232+ # IP address, that will allow us to
1233+ # share the same connection...
1234+ connection=None,
1235+ parent=req,
1236+ )
1237+ else:
1238+ raise urllib_request.HTTPError(
1239+ req.get_full_url(), code, msg, headers, fp)
1240+
1241+ def http_error_302(self, req, fp, code, msg, headers):
1242+ """Requests the redirected to URI.
1243+
1244+ Copied from urllib_request to be able to clean the pipe of the associated
1245+ connection, *before* issuing the redirected request but *after* having
1246+ eventually raised an error.
1247+ """
1248+ # Some servers (incorrectly) return multiple Location headers
1249+ # (so probably same goes for URI). Use first header.
1250+
1251+ # TODO: Once we get rid of addinfourl objects, the
1252+ # following will need to be updated to use correct case
1253+ # for headers.
1254+ if 'location' in headers:
1255+ newurl = headers.get('location')
1256+ elif 'uri' in headers:
1257+ newurl = headers.get('uri')
1258+ else:
1259+ return
1260+ if self._debuglevel >= 1:
1261+ print('Redirected to: %s (followed: %r)' % (newurl,
1262+ req.follow_redirections))
1263+ if req.follow_redirections is False:
1264+ req.redirected_to = newurl
1265+ return fp
1266+
1267+ newurl = urljoin(req.get_full_url(), newurl)
1268+
1269+ # This call succeeds or raise an error. urllib_request returns
1270+ # if redirect_request returns None, but our
1271+ # redirect_request never returns None.
1272+ redirected_req = self.redirect_request(req, fp, code, msg, headers,
1273+ newurl)
1274+
1275+ # loop detection
1276+ # .redirect_dict has a key url if url was previously visited.
1277+ if hasattr(req, 'redirect_dict'):
1278+ visited = redirected_req.redirect_dict = req.redirect_dict
1279+ if (visited.get(newurl, 0) >= self.max_repeats or
1280+ len(visited) >= self.max_redirections):
1281+ raise urllib_request.HTTPError(req.get_full_url(), code,
1282+ self.inf_msg + msg, headers, fp)
1283+ else:
1284+ visited = redirected_req.redirect_dict = req.redirect_dict = {}
1285+ visited[newurl] = visited.get(newurl, 0) + 1
1286+
1287+ # We can close the fp now that we are sure that we won't
1288+ # use it with HTTPError.
1289+ fp.close()
1290+ # We have all we need already in the response
1291+ req.connection.cleanup_pipe()
1292+
1293+ return self.parent.open(redirected_req)
1294+
1295+ http_error_301 = http_error_303 = http_error_307 = http_error_302
1296+
1297+
1298+class ProxyHandler(urllib_request.ProxyHandler):
1299+ """Handles proxy setting.
1300+
1301+ Copied and modified from urllib_request to be able to modify the request during
1302+ the request pre-processing instead of modifying it at _open time. As we
1303+ capture (or create) the connection object during request processing, _open
1304+ time was too late.
1305+
1306+ The main task is to modify the request so that the connection is done to
1307+ the proxy while the request still refers to the destination host.
1308+
1309+ Note: the proxy handling *may* modify the protocol used; the request may be
1310+ against an https server proxied through an http proxy. So, https_request
1311+ will be called, but later it's really http_open that will be called. This
1312+ explains why we don't have to call self.parent.open as the urllib_request did.
1313+ """
1314+
1315+ # Proxies must be in front
1316+ handler_order = 100
1317+ _debuglevel = DEBUG
1318+
1319+ def __init__(self, proxies=None):
1320+ urllib_request.ProxyHandler.__init__(self, proxies)
1321+ # First, let's get rid of urllib_request implementation
1322+ for type, proxy in self.proxies.items():
1323+ if self._debuglevel >= 3:
1324+ print('Will unbind %s_open for %r' % (type, proxy))
1325+ delattr(self, '%s_open' % type)
1326+
1327+ def bind_scheme_request(proxy, scheme):
1328+ if proxy is None:
1329+ return
1330+ scheme_request = scheme + '_request'
1331+ if self._debuglevel >= 3:
1332+ print('Will bind %s for %r' % (scheme_request, proxy))
1333+ setattr(self, scheme_request,
1334+ lambda request: self.set_proxy(request, scheme))
1335+ # We are interested only by the http[s] proxies
1336+ http_proxy = self.get_proxy_env_var('http')
1337+ bind_scheme_request(http_proxy, 'http')
1338+ https_proxy = self.get_proxy_env_var('https')
1339+ bind_scheme_request(https_proxy, 'https')
1340+
1341+ def get_proxy_env_var(self, name, default_to='all'):
1342+ """Get a proxy env var.
1343+
1344+ Note that we indirectly rely on
1345+ urllib.getproxies_environment taking into account the
1346+ uppercased values for proxy variables.
1347+ """
1348+ try:
1349+ return self.proxies[name.lower()]
1350+ except KeyError:
1351+ if default_to is not None:
1352+ # Try to get the alternate environment variable
1353+ try:
1354+ return self.proxies[default_to]
1355+ except KeyError:
1356+ pass
1357+ return None
1358+
1359+ def proxy_bypass(self, host):
1360+ """Check if host should be proxied or not.
1361+
1362+ :returns: True to skip the proxy, False otherwise.
1363+ """
1364+ no_proxy = self.get_proxy_env_var('no', default_to=None)
1365+ bypass = self.evaluate_proxy_bypass(host, no_proxy)
1366+ if bypass is None:
1367+ # Nevertheless, there are platform-specific ways to
1368+ # ignore proxies...
1369+ return urllib.proxy_bypass(host)
1370+ else:
1371+ return bypass
1372+
1373+ def evaluate_proxy_bypass(self, host, no_proxy):
1374+ """Check the host against a comma-separated no_proxy list as a string.
1375+
1376+ :param host: ``host:port`` being requested
1377+
1378+ :param no_proxy: comma-separated list of hosts to access directly.
1379+
1380+ :returns: True to skip the proxy, False not to, or None to
1381+ leave it to urllib.
1382+ """
1383+ if no_proxy is None:
1384+ # All hosts are proxied
1385+ return False
1386+ hhost, hport = splitport(host)
1387+ # Does host match any of the domains mentioned in
1388+ # no_proxy ? The rules about what is authorized in no_proxy
1389+ # are fuzzy (to say the least). We try to allow most
1390+ # commonly seen values.
1391+ for domain in no_proxy.split(','):
1392+ domain = domain.strip()
1393+ if domain == '':
1394+ continue
1395+ dhost, dport = splitport(domain)
1396+ if hport == dport or dport is None:
1397+ # Protect glob chars
1398+ dhost = dhost.replace(".", r"\.")
1399+ dhost = dhost.replace("*", r".*")
1400+ dhost = dhost.replace("?", r".")
1401+ if re.match(dhost, hhost, re.IGNORECASE):
1402+ return True
1403+ # Nothing explicitly avoid the host
1404+ return None
1405+
1406+ def set_proxy(self, request, type):
1407+ if PY3:
1408+ host = request.host
1409+ else:
1410+ host = request.get_host()
1411+ if self.proxy_bypass(host):
1412+ return request
1413+
1414+ proxy = self.get_proxy_env_var(type)
1415+ if self._debuglevel >= 3:
1416+ print('set_proxy %s_request for %r' % (type, proxy))
1417+ # FIXME: python 2.5 urlparse provides a better _parse_proxy which can
1418+ # grok user:password@host:port as well as
1419+ # http://user:password@host:port
1420+
1421+ parsed_url = transport.ConnectedTransport._split_url(proxy)
1422+ if not parsed_url.host:
1423+ raise urlutils.InvalidURL(proxy, 'No host component')
1424+
1425+ if request.proxy_auth == {}:
1426+ # No proxy auth parameter are available, we are handling the first
1427+ # proxied request, intialize. scheme (the authentication scheme)
1428+ # and realm will be set by the AuthHandler
1429+ request.proxy_auth = {
1430+ 'host': parsed_url.host,
1431+ 'port': parsed_url.port,
1432+ 'user': parsed_url.user,
1433+ 'password': parsed_url.password,
1434+ 'protocol': parsed_url.scheme,
1435+ # We ignore path since we connect to a proxy
1436+ 'path': None}
1437+ if parsed_url.port is None:
1438+ phost = parsed_url.host
1439+ else:
1440+ phost = parsed_url.host + ':%d' % parsed_url.port
1441+ request.set_proxy(phost, type)
1442+ if self._debuglevel >= 3:
1443+ print('set_proxy: proxy set to %s://%s' % (type, phost))
1444+ return request
1445+
1446+
1447+class AbstractAuthHandler(urllib_request.BaseHandler):
1448+ """A custom abstract authentication handler for all http authentications.
1449+
1450+ Provides the meat to handle authentication errors and
1451+ preventively set authentication headers after the first
1452+ successful authentication.
1453+
1454+ This can be used for http and proxy, as well as for basic, negotiate and
1455+ digest authentications.
1456+
1457+ This provides an unified interface for all authentication handlers
1458+ (urllib_request provides far too many with different policies).
1459+
1460+ The interaction between this handler and the urllib_request
1461+ framework is not obvious, it works as follow:
1462+
1463+ opener.open(request) is called:
1464+
1465+ - that may trigger http_request which will add an authentication header
1466+ (self.build_header) if enough info is available.
1467+
1468+ - the request is sent to the server,
1469+
1470+ - if an authentication error is received self.auth_required is called,
1471+ we acquire the authentication info in the error headers and call
1472+ self.auth_match to check that we are able to try the
1473+ authentication and complete the authentication parameters,
1474+
1475+ - we call parent.open(request), that may trigger http_request
1476+ and will add a header (self.build_header), but here we have
1477+ all the required info (keep in mind that the request and
1478+ authentication used in the recursive calls are really (and must be)
1479+ the *same* objects).
1480+
1481+ - if the call returns a response, the authentication have been
1482+ successful and the request authentication parameters have been updated.
1483+ """
1484+
1485+ scheme = None
1486+ """The scheme as it appears in the server header (lower cased)"""
1487+
1488+ _max_retry = 3
1489+ """We don't want to retry authenticating endlessly"""
1490+
1491+ requires_username = True
1492+ """Whether the auth mechanism requires a username."""
1493+
1494+ # The following attributes should be defined by daughter
1495+ # classes:
1496+ # - auth_required_header: the header received from the server
1497+ # - auth_header: the header sent in the request
1498+
1499+ def __init__(self):
1500+ # We want to know when we enter into an try/fail cycle of
1501+ # authentications so we initialize to None to indicate that we aren't
1502+ # in such a cycle by default.
1503+ self._retry_count = None
1504+
1505+ def _parse_auth_header(self, server_header):
1506+ """Parse the authentication header.
1507+
1508+ :param server_header: The value of the header sent by the server
1509+ describing the authenticaion request.
1510+
1511+ :return: A tuple (scheme, remainder) scheme being the first word in the
1512+ given header (lower cased), remainder may be None.
1513+ """
1514+ try:
1515+ scheme, remainder = server_header.split(None, 1)
1516+ except ValueError:
1517+ scheme = server_header
1518+ remainder = None
1519+ return (scheme.lower(), remainder)
1520+
1521+ def update_auth(self, auth, key, value):
1522+ """Update a value in auth marking the auth as modified if needed"""
1523+ old_value = auth.get(key, None)
1524+ if old_value != value:
1525+ auth[key] = value
1526+ auth['modified'] = True
1527+
1528+ def auth_required(self, request, headers):
1529+ """Retry the request if the auth scheme is ours.
1530+
1531+ :param request: The request needing authentication.
1532+ :param headers: The headers for the authentication error response.
1533+ :return: None or the response for the authenticated request.
1534+ """
1535+ # Don't try to authenticate endlessly
1536+ if self._retry_count is None:
1537+ # The retry being recusrsive calls, None identify the first retry
1538+ self._retry_count = 1
1539+ else:
1540+ self._retry_count += 1
1541+ if self._retry_count > self._max_retry:
1542+ # Let's be ready for next round
1543+ self._retry_count = None
1544+ return None
1545+ if PY3:
1546+ server_headers = headers.get_all(self.auth_required_header)
1547+ else:
1548+ server_headers = headers.getheaders(self.auth_required_header)
1549+ if not server_headers:
1550+ # The http error MUST have the associated
1551+ # header. This must never happen in production code.
1552+ raise KeyError('%s not found' % self.auth_required_header)
1553+
1554+ auth = self.get_auth(request)
1555+ auth['modified'] = False
1556+ # Put some common info in auth if the caller didn't
1557+ if auth.get('path', None) is None:
1558+ parsed_url = urlutils.URL.from_string(request.get_full_url())
1559+ self.update_auth(auth, 'protocol', parsed_url.scheme)
1560+ self.update_auth(auth, 'host', parsed_url.host)
1561+ self.update_auth(auth, 'port', parsed_url.port)
1562+ self.update_auth(auth, 'path', parsed_url.path)
1563+ # FIXME: the auth handler should be selected at a single place instead
1564+ # of letting all handlers try to match all headers, but the current
1565+ # design doesn't allow a simple implementation.
1566+ for server_header in server_headers:
1567+ # Several schemes can be proposed by the server, try to match each
1568+ # one in turn
1569+ matching_handler = self.auth_match(server_header, auth)
1570+ if matching_handler:
1571+ # auth_match may have modified auth (by adding the
1572+ # password or changing the realm, for example)
1573+ if (request.get_header(self.auth_header, None) is not None
1574+ and not auth['modified']):
1575+ # We already tried that, give up
1576+ return None
1577+
1578+ # Only the most secure scheme proposed by the server should be
1579+ # used, since the handlers use 'handler_order' to describe that
1580+ # property, the first handler tried takes precedence, the
1581+ # others should not attempt to authenticate if the best one
1582+ # failed.
1583+ best_scheme = auth.get('best_scheme', None)
1584+ if best_scheme is None:
1585+ # At that point, if current handler should doesn't succeed
1586+ # the credentials are wrong (or incomplete), but we know
1587+ # that the associated scheme should be used.
1588+ best_scheme = auth['best_scheme'] = self.scheme
1589+ if best_scheme != self.scheme:
1590+ continue
1591+
1592+ if self.requires_username and auth.get('user', None) is None:
1593+ # Without a known user, we can't authenticate
1594+ return None
1595+
1596+ # Housekeeping
1597+ request.connection.cleanup_pipe()
1598+ # Retry the request with an authentication header added
1599+ response = self.parent.open(request)
1600+ if response:
1601+ self.auth_successful(request, response)
1602+ return response
1603+ # We are not qualified to handle the authentication.
1604+ # Note: the authentication error handling will try all
1605+ # available handlers. If one of them authenticates
1606+ # successfully, a response will be returned. If none of
1607+ # them succeeds, None will be returned and the error
1608+ # handler will raise the 401 'Unauthorized' or the 407
1609+ # 'Proxy Authentication Required' error.
1610+ return None
1611+
1612+ def add_auth_header(self, request, header):
1613+ """Add the authentication header to the request"""
1614+ request.add_unredirected_header(self.auth_header, header)
1615+
1616+ def auth_match(self, header, auth):
1617+ """Check that we are able to handle that authentication scheme.
1618+
1619+ The request authentication parameters may need to be
1620+ updated with info from the server. Some of these
1621+ parameters, when combined, are considered to be the
1622+ authentication key, if one of them change the
1623+ authentication result may change. 'user' and 'password'
1624+ are exampls, but some auth schemes may have others
1625+ (digest's nonce is an example, digest's nonce_count is a
1626+ *counter-example*). Such parameters must be updated by
1627+ using the update_auth() method.
1628+
1629+ :param header: The authentication header sent by the server.
1630+ :param auth: The auth parameters already known. They may be
1631+ updated.
1632+ :returns: True if we can try to handle the authentication.
1633+ """
1634+ raise NotImplementedError(self.auth_match)
1635+
1636+ def build_auth_header(self, auth, request):
1637+ """Build the value of the header used to authenticate.
1638+
1639+ :param auth: The auth parameters needed to build the header.
1640+ :param request: The request needing authentication.
1641+
1642+ :return: None or header.
1643+ """
1644+ raise NotImplementedError(self.build_auth_header)
1645+
1646+ def auth_successful(self, request, response):
1647+ """The authentification was successful for the request.
1648+
1649+ Additional infos may be available in the response.
1650+
1651+ :param request: The succesfully authenticated request.
1652+ :param response: The server response (may contain auth info).
1653+ """
1654+ # It may happen that we need to reconnect later, let's be ready
1655+ self._retry_count = None
1656+
1657+ def get_user_password(self, auth):
1658+ """Ask user for a password if none is already available.
1659+
1660+ :param auth: authentication info gathered so far (from the initial url
1661+ and then during dialog with the server).
1662+ """
1663+ auth_conf = config.AuthenticationConfig()
1664+ user = auth.get('user', None)
1665+ password = auth.get('password', None)
1666+ realm = auth['realm']
1667+ port = auth.get('port', None)
1668+
1669+ if user is None:
1670+ user = auth_conf.get_user(auth['protocol'], auth['host'],
1671+ port=port, path=auth['path'],
1672+ realm=realm, ask=True,
1673+ prompt=self.build_username_prompt(auth))
1674+ if user is not None and password is None:
1675+ password = auth_conf.get_password(
1676+ auth['protocol'], auth['host'], user,
1677+ port=port,
1678+ path=auth['path'], realm=realm,
1679+ prompt=self.build_password_prompt(auth))
1680+
1681+ return user, password
1682+
1683+ def _build_password_prompt(self, auth):
1684+ """Build a prompt taking the protocol used into account.
1685+
1686+ The AuthHandler is used by http and https, we want that information in
1687+ the prompt, so we build the prompt from the authentication dict which
1688+ contains all the needed parts.
1689+
1690+ Also, http and proxy AuthHandlers present different prompts to the
1691+ user. The daughter classes should implements a public
1692+ build_password_prompt using this method.
1693+ """
1694+ prompt = u'%s' % auth['protocol'].upper() + u' %(user)s@%(host)s'
1695+ realm = auth['realm']
1696+ if realm is not None:
1697+ prompt += u", Realm: '%s'" % realm
1698+ prompt += u' password'
1699+ return prompt
1700+
1701+ def _build_username_prompt(self, auth):
1702+ """Build a prompt taking the protocol used into account.
1703+
1704+ The AuthHandler is used by http and https, we want that information in
1705+ the prompt, so we build the prompt from the authentication dict which
1706+ contains all the needed parts.
1707+
1708+ Also, http and proxy AuthHandlers present different prompts to the
1709+ user. The daughter classes should implements a public
1710+ build_username_prompt using this method.
1711+ """
1712+ prompt = u'%s' % auth['protocol'].upper() + u' %(host)s'
1713+ realm = auth['realm']
1714+ if realm is not None:
1715+ prompt += u", Realm: '%s'" % realm
1716+ prompt += u' username'
1717+ return prompt
1718+
1719+ def http_request(self, request):
1720+ """Insert an authentication header if information is available"""
1721+ auth = self.get_auth(request)
1722+ if self.auth_params_reusable(auth):
1723+ self.add_auth_header(
1724+ request, self.build_auth_header(auth, request))
1725+ return request
1726+
1727+ https_request = http_request # FIXME: Need test
1728+
1729+
1730+class NegotiateAuthHandler(AbstractAuthHandler):
1731+ """A authentication handler that handles WWW-Authenticate: Negotiate.
1732+
1733+ At the moment this handler supports just Kerberos. In the future,
1734+ NTLM support may also be added.
1735+ """
1736+
1737+ scheme = 'negotiate'
1738+ handler_order = 480
1739+ requires_username = False
1740+
1741+ def auth_match(self, header, auth):
1742+ scheme, raw_auth = self._parse_auth_header(header)
1743+ if scheme != self.scheme:
1744+ return False
1745+ self.update_auth(auth, 'scheme', scheme)
1746+ resp = self._auth_match_kerberos(auth)
1747+ if resp is None:
1748+ return False
1749+ # Optionally should try to authenticate using NTLM here
1750+ self.update_auth(auth, 'negotiate_response', resp)
1751+ return True
1752+
1753+ def _auth_match_kerberos(self, auth):
1754+ """Try to create a GSSAPI response for authenticating against a host."""
1755+ global kerberos, checked_kerberos
1756+ if kerberos is None and not checked_kerberos:
1757+ try:
1758+ import kerberos
1759+ except ImportError:
1760+ kerberos = None
1761+ checked_kerberos = True
1762+ if kerberos is None:
1763+ return None
1764+ ret, vc = kerberos.authGSSClientInit("HTTP@%(host)s" % auth)
1765+ if ret < 1:
1766+ trace.warning('Unable to create GSSAPI context for %s: %d',
1767+ auth['host'], ret)
1768+ return None
1769+ ret = kerberos.authGSSClientStep(vc, "")
1770+ if ret < 0:
1771+ trace.mutter('authGSSClientStep failed: %d', ret)
1772+ return None
1773+ return kerberos.authGSSClientResponse(vc)
1774+
1775+ def build_auth_header(self, auth, request):
1776+ return "Negotiate %s" % auth['negotiate_response']
1777+
1778+ def auth_params_reusable(self, auth):
1779+ # If the auth scheme is known, it means a previous
1780+ # authentication was successful, all information is
1781+ # available, no further checks are needed.
1782+ return (auth.get('scheme', None) == 'negotiate' and
1783+ auth.get('negotiate_response', None) is not None)
1784+
1785+
1786+class BasicAuthHandler(AbstractAuthHandler):
1787+ """A custom basic authentication handler."""
1788+
1789+ scheme = 'basic'
1790+ handler_order = 500
1791+ auth_regexp = re.compile('realm="([^"]*)"', re.I)
1792+
1793+ def build_auth_header(self, auth, request):
1794+ raw = '%s:%s' % (auth['user'], auth['password'])
1795+ auth_header = 'Basic ' + \
1796+ base64.b64encode(raw.encode('utf-8')).decode('ascii')
1797+ return auth_header
1798+
1799+ def extract_realm(self, header_value):
1800+ match = self.auth_regexp.search(header_value)
1801+ realm = None
1802+ if match:
1803+ realm = match.group(1)
1804+ return match, realm
1805+
1806+ def auth_match(self, header, auth):
1807+ scheme, raw_auth = self._parse_auth_header(header)
1808+ if scheme != self.scheme:
1809+ return False
1810+
1811+ match, realm = self.extract_realm(raw_auth)
1812+ if match:
1813+ # Put useful info into auth
1814+ self.update_auth(auth, 'scheme', scheme)
1815+ self.update_auth(auth, 'realm', realm)
1816+ if (auth.get('user', None) is None
1817+ or auth.get('password', None) is None):
1818+ user, password = self.get_user_password(auth)
1819+ self.update_auth(auth, 'user', user)
1820+ self.update_auth(auth, 'password', password)
1821+ return match is not None
1822+
1823+ def auth_params_reusable(self, auth):
1824+ # If the auth scheme is known, it means a previous
1825+ # authentication was successful, all information is
1826+ # available, no further checks are needed.
1827+ return auth.get('scheme', None) == 'basic'
1828+
1829+
1830+def get_digest_algorithm_impls(algorithm):
1831+ H = None
1832+ KD = None
1833+ if algorithm == 'MD5':
1834+ def H(x): return osutils.md5(x).hexdigest()
1835+ elif algorithm == 'SHA':
1836+ H = osutils.sha_string
1837+ if H is not None:
1838+ def KD(secret, data): return H(
1839+ ("%s:%s" % (secret, data)).encode('utf-8'))
1840+ return H, KD
1841+
1842+
1843+def get_new_cnonce(nonce, nonce_count):
1844+ raw = '%s:%d:%s:%s' % (nonce, nonce_count, time.ctime(),
1845+ osutils.rand_chars(8))
1846+ return osutils.sha_string(raw.encode('utf-8'))[:16]
1847+
1848+
1849+class DigestAuthHandler(AbstractAuthHandler):
1850+ """A custom digest authentication handler."""
1851+
1852+ scheme = 'digest'
1853+ # Before basic as digest is a bit more secure and should be preferred
1854+ handler_order = 490
1855+
1856+ def auth_params_reusable(self, auth):
1857+ # If the auth scheme is known, it means a previous
1858+ # authentication was successful, all information is
1859+ # available, no further checks are needed.
1860+ return auth.get('scheme', None) == 'digest'
1861+
1862+ def auth_match(self, header, auth):
1863+ scheme, raw_auth = self._parse_auth_header(header)
1864+ if scheme != self.scheme:
1865+ return False
1866+
1867+ # Put the requested authentication info into a dict
1868+ req_auth = urllib_request.parse_keqv_list(
1869+ urllib_request.parse_http_list(raw_auth))
1870+
1871+ # Check that we can handle that authentication
1872+ qop = req_auth.get('qop', None)
1873+ if qop != 'auth': # No auth-int so far
1874+ return False
1875+
1876+ H, KD = get_digest_algorithm_impls(req_auth.get('algorithm', 'MD5'))
1877+ if H is None:
1878+ return False
1879+
1880+ realm = req_auth.get('realm', None)
1881+ # Put useful info into auth
1882+ self.update_auth(auth, 'scheme', scheme)
1883+ self.update_auth(auth, 'realm', realm)
1884+ if auth.get('user', None) is None or auth.get('password', None) is None:
1885+ user, password = self.get_user_password(auth)
1886+ self.update_auth(auth, 'user', user)
1887+ self.update_auth(auth, 'password', password)
1888+
1889+ try:
1890+ if req_auth.get('algorithm', None) is not None:
1891+ self.update_auth(auth, 'algorithm', req_auth.get('algorithm'))
1892+ nonce = req_auth['nonce']
1893+ if auth.get('nonce', None) != nonce:
1894+ # A new nonce, never used
1895+ self.update_auth(auth, 'nonce_count', 0)
1896+ self.update_auth(auth, 'nonce', nonce)
1897+ self.update_auth(auth, 'qop', qop)
1898+ auth['opaque'] = req_auth.get('opaque', None)
1899+ except KeyError:
1900+ # Some required field is not there
1901+ return False
1902+
1903+ return True
1904+
1905+ def build_auth_header(self, auth, request):
1906+ if PY3:
1907+ selector = request.selector
1908+ else:
1909+ selector = request.get_selector()
1910+ url_scheme, url_selector = splittype(selector)
1911+ sel_host, uri = splithost(url_selector)
1912+
1913+ A1 = ('%s:%s:%s' %
1914+ (auth['user'], auth['realm'], auth['password'])).encode('utf-8')
1915+ A2 = ('%s:%s' % (request.get_method(), uri)).encode('utf-8')
1916+
1917+ nonce = auth['nonce']
1918+ qop = auth['qop']
1919+
1920+ nonce_count = auth['nonce_count'] + 1
1921+ ncvalue = '%08x' % nonce_count
1922+ cnonce = get_new_cnonce(nonce, nonce_count)
1923+
1924+ H, KD = get_digest_algorithm_impls(auth.get('algorithm', 'MD5'))
1925+ nonce_data = '%s:%s:%s:%s:%s' % (nonce, ncvalue, cnonce, qop, H(A2))
1926+ request_digest = KD(H(A1), nonce_data)
1927+
1928+ header = 'Digest '
1929+ header += 'username="%s", realm="%s", nonce="%s"' % (auth['user'],
1930+ auth['realm'],
1931+ nonce)
1932+ header += ', uri="%s"' % uri
1933+ header += ', cnonce="%s", nc=%s' % (cnonce, ncvalue)
1934+ header += ', qop="%s"' % qop
1935+ header += ', response="%s"' % request_digest
1936+ # Append the optional fields
1937+ opaque = auth.get('opaque', None)
1938+ if opaque:
1939+ header += ', opaque="%s"' % opaque
1940+ if auth.get('algorithm', None):
1941+ header += ', algorithm="%s"' % auth.get('algorithm')
1942+
1943+ # We have used the nonce once more, update the count
1944+ auth['nonce_count'] = nonce_count
1945+
1946+ return header
1947+
1948+
1949+class HTTPAuthHandler(AbstractAuthHandler):
1950+ """Custom http authentication handler.
1951+
1952+ Send the authentication preventively to avoid the roundtrip
1953+ associated with the 401 error and keep the revelant info in
1954+ the auth request attribute.
1955+ """
1956+
1957+ auth_required_header = 'www-authenticate'
1958+ auth_header = 'Authorization'
1959+
1960+ def get_auth(self, request):
1961+ """Get the auth params from the request"""
1962+ return request.auth
1963+
1964+ def set_auth(self, request, auth):
1965+ """Set the auth params for the request"""
1966+ request.auth = auth
1967+
1968+ def build_password_prompt(self, auth):
1969+ return self._build_password_prompt(auth)
1970+
1971+ def build_username_prompt(self, auth):
1972+ return self._build_username_prompt(auth)
1973+
1974+ def http_error_401(self, req, fp, code, msg, headers):
1975+ return self.auth_required(req, headers)
1976+
1977+
1978+class ProxyAuthHandler(AbstractAuthHandler):
1979+ """Custom proxy authentication handler.
1980+
1981+ Send the authentication preventively to avoid the roundtrip
1982+ associated with the 407 error and keep the revelant info in
1983+ the proxy_auth request attribute..
1984+ """
1985+
1986+ auth_required_header = 'proxy-authenticate'
1987+ # FIXME: the correct capitalization is Proxy-Authorization,
1988+ # but python-2.4 urllib_request.Request insist on using capitalize()
1989+ # instead of title().
1990+ auth_header = 'Proxy-authorization'
1991+
1992+ def get_auth(self, request):
1993+ """Get the auth params from the request"""
1994+ return request.proxy_auth
1995+
1996+ def set_auth(self, request, auth):
1997+ """Set the auth params for the request"""
1998+ request.proxy_auth = auth
1999+
2000+ def build_password_prompt(self, auth):
2001+ prompt = self._build_password_prompt(auth)
2002+ prompt = u'Proxy ' + prompt
2003+ return prompt
2004+
2005+ def build_username_prompt(self, auth):
2006+ prompt = self._build_username_prompt(auth)
2007+ prompt = u'Proxy ' + prompt
2008+ return prompt
2009+
2010+ def http_error_407(self, req, fp, code, msg, headers):
2011+ return self.auth_required(req, headers)
2012+
2013+
2014+class HTTPBasicAuthHandler(BasicAuthHandler, HTTPAuthHandler):
2015+ """Custom http basic authentication handler"""
2016+
2017+
2018+class ProxyBasicAuthHandler(BasicAuthHandler, ProxyAuthHandler):
2019+ """Custom proxy basic authentication handler"""
2020+
2021+
2022+class HTTPDigestAuthHandler(DigestAuthHandler, HTTPAuthHandler):
2023+ """Custom http basic authentication handler"""
2024+
2025+
2026+class ProxyDigestAuthHandler(DigestAuthHandler, ProxyAuthHandler):
2027+ """Custom proxy basic authentication handler"""
2028+
2029+
2030+class HTTPNegotiateAuthHandler(NegotiateAuthHandler, HTTPAuthHandler):
2031+ """Custom http negotiate authentication handler"""
2032+
2033+
2034+class ProxyNegotiateAuthHandler(NegotiateAuthHandler, ProxyAuthHandler):
2035+ """Custom proxy negotiate authentication handler"""
2036+
2037+
2038+class HTTPErrorProcessor(urllib_request.HTTPErrorProcessor):
2039+ """Process HTTP error responses.
2040+
2041+ We don't really process the errors, quite the contrary
2042+ instead, we leave our Transport handle them.
2043+ """
2044+
2045+ accepted_errors = [200, # Ok
2046+ 206, # Partial content
2047+ 404, # Not found
2048+ ]
2049+ """The error codes the caller will handle.
2050+
2051+ This can be specialized in the request on a case-by case basis, but the
2052+ common cases are covered here.
2053+ """
2054+
2055+ def http_response(self, request, response):
2056+ code, msg, hdrs = response.code, response.msg, response.info()
2057+
2058+ accepted_errors = request.accepted_errors
2059+ if accepted_errors is None:
2060+ accepted_errors = self.accepted_errors
2061+
2062+ if code not in accepted_errors:
2063+ response = self.parent.error('http', request, response,
2064+ code, msg, hdrs)
2065+ return response
2066+
2067+ https_response = http_response
2068+
2069+
2070+class HTTPDefaultErrorHandler(urllib_request.HTTPDefaultErrorHandler):
2071+ """Translate common errors into Breezy Exceptions"""
2072+
2073+ def http_error_default(self, req, fp, code, msg, hdrs):
2074+ if code == 403:
2075+ raise errors.TransportError(
2076+ 'Server refuses to fulfill the request (403 Forbidden)'
2077+ ' for %s' % req.get_full_url())
2078+ else:
2079+ raise errors.InvalidHttpResponse(req.get_full_url(),
2080+ 'Unable to handle http code %d: %s'
2081+ % (code, msg))
2082+
2083+
2084+class Opener(object):
2085+ """A wrapper around urllib_request.build_opener
2086+
2087+ Daughter classes can override to build their own specific opener
2088+ """
2089+ # TODO: Provides hooks for daughter classes.
2090+
2091+ def __init__(self,
2092+ connection=ConnectionHandler,
2093+ redirect=HTTPRedirectHandler,
2094+ error=HTTPErrorProcessor,
2095+ report_activity=None,
2096+ ca_certs=None):
2097+ self._opener = urllib_request.build_opener(
2098+ connection(report_activity=report_activity, ca_certs=ca_certs),
2099+ redirect, error,
2100+ ProxyHandler(),
2101+ HTTPBasicAuthHandler(),
2102+ HTTPDigestAuthHandler(),
2103+ HTTPNegotiateAuthHandler(),
2104+ ProxyBasicAuthHandler(),
2105+ ProxyDigestAuthHandler(),
2106+ ProxyNegotiateAuthHandler(),
2107+ HTTPHandler,
2108+ HTTPSHandler,
2109+ HTTPDefaultErrorHandler,
2110+ )
2111+
2112+ self.open = self._opener.open
2113+ if DEBUG >= 9:
2114+ # When dealing with handler order, it's easy to mess
2115+ # things up, the following will help understand which
2116+ # handler is used, when and for what.
2117+ import pprint
2118+ pprint.pprint(self._opener.__dict__)
2119
2120
2121 class HttpTransport(ConnectedTransport):
2122
2123=== removed file 'breezy/transport/http/_urllib2_wrappers.py'
2124--- breezy/transport/http/_urllib2_wrappers.py 2018-11-22 03:26:33 +0000
2125+++ breezy/transport/http/_urllib2_wrappers.py 1970-01-01 00:00:00 +0000
2126@@ -1,1900 +0,0 @@
2127-# Copyright (C) 2006-2013, 2016, 2017 Canonical Ltd
2128-#
2129-# This program is free software; you can redistribute it and/or modify
2130-# it under the terms of the GNU General Public License as published by
2131-# the Free Software Foundation; either version 2 of the License, or
2132-# (at your option) any later version.
2133-#
2134-# This program is distributed in the hope that it will be useful,
2135-# but WITHOUT ANY WARRANTY; without even the implied warranty of
2136-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2137-# GNU General Public License for more details.
2138-#
2139-# You should have received a copy of the GNU General Public License
2140-# along with this program; if not, write to the Free Software
2141-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
2142-
2143-"""Implementation of urllib2 tailored to Breezy's needs
2144-
2145-This file complements the urllib2 class hierarchy with custom classes.
2146-
2147-For instance, we create a new HTTPConnection and HTTPSConnection that inherit
2148-from the original urllib2.HTTP(s)Connection objects, but also have a new base
2149-which implements a custom getresponse and cleanup_pipe handlers.
2150-
2151-And then we implement custom HTTPHandler and HTTPSHandler classes, that use
2152-the custom HTTPConnection classes.
2153-
2154-We have a custom Response class, which lets us maintain a keep-alive
2155-connection even for requests that urllib2 doesn't expect to contain body data.
2156-
2157-And a custom Request class that lets us track redirections, and
2158-handle authentication schemes.
2159-
2160-For coherency with python libraries, we use capitalized header names throughout
2161-the code, even if the header names will be titled just before sending the
2162-request (see AbstractHTTPHandler.do_open).
2163-"""
2164-
2165-from __future__ import absolute_import
2166-
2167-DEBUG = 0
2168-
2169-# FIXME: Oversimplifying, two kind of exceptions should be
2170-# raised, once a request is issued: URLError before we have been
2171-# able to process the response, HTTPError after that. Process the
2172-# response means we are able to leave the socket clean, so if we
2173-# are not able to do that, we should close the connection. The
2174-# actual code more or less do that, tests should be written to
2175-# ensure that.
2176-
2177-import base64
2178-import errno
2179-try:
2180- import http.client as http_client
2181-except ImportError:
2182- import httplib as http_client
2183-import os
2184-import socket
2185-import urllib
2186-try:
2187- import urllib.request as urllib_request
2188-except ImportError: # python < 3
2189- import urllib2 as urllib_request
2190-try:
2191- from urllib.parse import urljoin, splitport, splittype, splithost
2192-except ImportError:
2193- from urlparse import urljoin
2194- from urllib import splitport, splittype, splithost
2195-import re
2196-import ssl
2197-import sys
2198-import time
2199-
2200-from ... import __version__ as breezy_version
2201-from ... import (
2202- config,
2203- debug,
2204- errors,
2205- lazy_import,
2206- osutils,
2207- trace,
2208- transport,
2209- ui,
2210- urlutils,
2211-)
2212-from ...sixish import (
2213- PY3,
2214- reraise,
2215- text_type,
2216-)
2217-
2218-try:
2219- _ = (ssl.match_hostname, ssl.CertificateError)
2220-except AttributeError:
2221- # Provide fallbacks for python < 2.7.9
2222- def match_hostname(cert, host):
2223- trace.warning(
2224- '%s cannot be verified, https certificates verification is only'
2225- ' available for python versions >= 2.7.9' % (host,))
2226- ssl.match_hostname = match_hostname
2227- ssl.CertificateError = ValueError
2228-
2229-
2230-# Note for packagers: if there is no package providing certs for your platform,
2231-# the curl project produces http://curl.haxx.se/ca/cacert.pem weekly.
2232-_ssl_ca_certs_known_locations = [
2233- u'/etc/ssl/certs/ca-certificates.crt', # Ubuntu/debian/gentoo
2234- u'/etc/pki/tls/certs/ca-bundle.crt', # Fedora/CentOS/RH
2235- u'/etc/ssl/ca-bundle.pem', # OpenSuse
2236- u'/etc/ssl/cert.pem', # OpenSuse
2237- u"/usr/local/share/certs/ca-root-nss.crt", # FreeBSD
2238- # XXX: Needs checking, can't trust the interweb ;) -- vila 2012-01-25
2239- u'/etc/openssl/certs/ca-certificates.crt', # Solaris
2240-]
2241-
2242-
2243-def default_ca_certs():
2244- if sys.platform == 'win32':
2245- return os.path.join(os.path.dirname(sys.executable), u"cacert.pem")
2246- elif sys.platform == 'darwin':
2247- # FIXME: Needs some default value for osx, waiting for osx installers
2248- # guys feedback -- vila 2012-01-25
2249- pass
2250- else:
2251- # Try known locations for friendly OSes providing the root certificates
2252- # without making them hard to use for any https client.
2253- for path in _ssl_ca_certs_known_locations:
2254- if os.path.exists(path):
2255- # First found wins
2256- return path
2257- # A default path that makes sense and will be mentioned in the error
2258- # presented to the user, even if not correct for all platforms
2259- return _ssl_ca_certs_known_locations[0]
2260-
2261-
2262-def ca_certs_from_store(path):
2263- if not os.path.exists(path):
2264- raise ValueError("ca certs path %s does not exist" % path)
2265- return path
2266-
2267-
2268-def cert_reqs_from_store(unicode_str):
2269- import ssl
2270- try:
2271- return {"required": ssl.CERT_REQUIRED,
2272- "none": ssl.CERT_NONE}[unicode_str]
2273- except KeyError:
2274- raise ValueError("invalid value %s" % unicode_str)
2275-
2276-
2277-def default_ca_reqs():
2278- if sys.platform in ('win32', 'darwin'):
2279- # FIXME: Once we get a native access to root certificates there, this
2280- # won't needed anymore. See http://pad.lv/920455 -- vila 2012-02-15
2281- return u'none'
2282- else:
2283- return u'required'
2284-
2285-
2286-opt_ssl_ca_certs = config.Option('ssl.ca_certs',
2287- from_unicode=ca_certs_from_store,
2288- default=default_ca_certs,
2289- invalid='warning',
2290- help="""\
2291-Path to certification authority certificates to trust.
2292-
2293-This should be a valid path to a bundle containing all root Certificate
2294-Authorities used to verify an https server certificate.
2295-
2296-Use ssl.cert_reqs=none to disable certificate verification.
2297-""")
2298-
2299-opt_ssl_cert_reqs = config.Option('ssl.cert_reqs',
2300- default=default_ca_reqs,
2301- from_unicode=cert_reqs_from_store,
2302- invalid='error',
2303- help="""\
2304-Whether to require a certificate from the remote side. (default:required)
2305-
2306-Possible values:
2307- * none: Certificates ignored
2308- * required: Certificates required and validated
2309-""")
2310-
2311-checked_kerberos = False
2312-kerberos = None
2313-
2314-
2315-class addinfourl(urllib_request.addinfourl):
2316- '''Replacement addinfourl class compatible with python-2.7's xmlrpclib
2317-
2318- In python-2.7, xmlrpclib expects that the response object that it receives
2319- has a getheader method. http_client.HTTPResponse provides this but
2320- urllib_request.addinfourl does not. Add the necessary functions here, ported to
2321- use the internal data structures of addinfourl.
2322- '''
2323-
2324- def getheader(self, name, default=None):
2325- if self.headers is None:
2326- raise http_client.ResponseNotReady()
2327- return self.headers.getheader(name, default)
2328-
2329- def getheaders(self):
2330- if self.headers is None:
2331- raise http_client.ResponseNotReady()
2332- return list(self.headers.items())
2333-
2334-
2335-class _ReportingFileSocket(object):
2336-
2337- def __init__(self, filesock, report_activity=None):
2338- self.filesock = filesock
2339- self._report_activity = report_activity
2340-
2341- def report_activity(self, size, direction):
2342- if self._report_activity:
2343- self._report_activity(size, direction)
2344-
2345- def read(self, size=1):
2346- s = self.filesock.read(size)
2347- self.report_activity(len(s), 'read')
2348- return s
2349-
2350- def readline(self, size=-1):
2351- s = self.filesock.readline(size)
2352- self.report_activity(len(s), 'read')
2353- return s
2354-
2355- def readinto(self, b):
2356- s = self.filesock.readinto(b)
2357- self.report_activity(s, 'read')
2358- return s
2359-
2360- def __getattr__(self, name):
2361- return getattr(self.filesock, name)
2362-
2363-
2364-class _ReportingSocket(object):
2365-
2366- def __init__(self, sock, report_activity=None):
2367- self.sock = sock
2368- self._report_activity = report_activity
2369-
2370- def report_activity(self, size, direction):
2371- if self._report_activity:
2372- self._report_activity(size, direction)
2373-
2374- def sendall(self, s, *args):
2375- self.sock.sendall(s, *args)
2376- self.report_activity(len(s), 'write')
2377-
2378- def recv(self, *args):
2379- s = self.sock.recv(*args)
2380- self.report_activity(len(s), 'read')
2381- return s
2382-
2383- def makefile(self, mode='r', bufsize=-1):
2384- # http_client creates a fileobject that doesn't do buffering, which
2385- # makes fp.readline() very expensive because it only reads one byte
2386- # at a time. So we wrap the socket in an object that forces
2387- # sock.makefile to make a buffered file.
2388- fsock = self.sock.makefile(mode, 65536)
2389- # And wrap that into a reporting kind of fileobject
2390- return _ReportingFileSocket(fsock, self._report_activity)
2391-
2392- def __getattr__(self, name):
2393- return getattr(self.sock, name)
2394-
2395-
2396-# We define our own Response class to keep our http_client pipe clean
2397-class Response(http_client.HTTPResponse):
2398- """Custom HTTPResponse, to avoid the need to decorate.
2399-
2400- http_client prefers to decorate the returned objects, rather
2401- than using a custom object.
2402- """
2403-
2404- # Some responses have bodies in which we have no interest
2405- _body_ignored_responses = [301, 302, 303, 307, 400, 401, 403, 404, 501]
2406-
2407- # in finish() below, we may have to discard several MB in the worst
2408- # case. To avoid buffering that much, we read and discard by chunks
2409- # instead. The underlying file is either a socket or a StringIO, so reading
2410- # 8k chunks should be fine.
2411- _discarded_buf_size = 8192
2412-
2413- if PY3:
2414- def __init__(self, sock, debuglevel=0, method=None, url=None):
2415- self.url = url
2416- super(Response, self).__init__(
2417- sock, debuglevel=debuglevel, method=method, url=url)
2418-
2419- def begin(self):
2420- """Begin to read the response from the server.
2421-
2422- http_client assumes that some responses get no content and do
2423- not even attempt to read the body in that case, leaving
2424- the body in the socket, blocking the next request. Let's
2425- try to workaround that.
2426- """
2427- http_client.HTTPResponse.begin(self)
2428- if self.status in self._body_ignored_responses:
2429- if self.debuglevel >= 2:
2430- print("For status: [%s], will ready body, length: %s" % (
2431- self.status, self.length))
2432- if not (self.length is None or self.will_close):
2433- # In some cases, we just can't read the body not
2434- # even try or we may encounter a 104, 'Connection
2435- # reset by peer' error if there is indeed no body
2436- # and the server closed the connection just after
2437- # having issued the response headers (even if the
2438- # headers indicate a Content-Type...)
2439- body = self.read(self.length)
2440- if self.debuglevel >= 9:
2441- # This one can be huge and is generally not interesting
2442- print("Consumed body: [%s]" % body)
2443- self.close()
2444- elif self.status == 200:
2445- # Whatever the request is, it went ok, so we surely don't want to
2446- # close the connection. Some cases are not correctly detected by
2447- # http_client.HTTPConnection.getresponse (called by
2448- # http_client.HTTPResponse.begin). The CONNECT response for the https
2449- # through proxy case is one. Note: the 'will_close' below refers
2450- # to the "true" socket between us and the server, whereas the
2451- # 'close()' above refers to the copy of that socket created by
2452- # http_client for the response itself. So, in the if above we close the
2453- # socket to indicate that we are done with the response whereas
2454- # below we keep the socket with the server opened.
2455- self.will_close = False
2456-
2457- def finish(self):
2458- """Finish reading the body.
2459-
2460- In some cases, the client may have left some bytes to read in the
2461- body. That will block the next request to succeed if we use a
2462- persistent connection. If we don't use a persistent connection, well,
2463- nothing will block the next request since a new connection will be
2464- issued anyway.
2465-
2466- :return: the number of bytes left on the socket (may be None)
2467- """
2468- pending = None
2469- if not self.isclosed():
2470- # Make sure nothing was left to be read on the socket
2471- pending = 0
2472- data = True
2473- while data and self.length:
2474- # read() will update self.length
2475- data = self.read(min(self.length, self._discarded_buf_size))
2476- pending += len(data)
2477- if pending:
2478- trace.mutter("%s bytes left on the HTTP socket", pending)
2479- self.close()
2480- return pending
2481-
2482-
2483-# Not inheriting from 'object' because http_client.HTTPConnection doesn't.
2484-class AbstractHTTPConnection:
2485- """A custom HTTP(S) Connection, which can reset itself on a bad response"""
2486-
2487- response_class = Response
2488-
2489- # When we detect a server responding with the whole file to range requests,
2490- # we want to warn. But not below a given thresold.
2491- _range_warning_thresold = 1024 * 1024
2492-
2493- def __init__(self, report_activity=None):
2494- self._response = None
2495- self._report_activity = report_activity
2496- self._ranges_received_whole_file = None
2497-
2498- def _mutter_connect(self):
2499- netloc = '%s:%s' % (self.host, self.port)
2500- if self.proxied_host is not None:
2501- netloc += '(proxy for %s)' % self.proxied_host
2502- trace.mutter('* About to connect() to %s' % netloc)
2503-
2504- def getresponse(self):
2505- """Capture the response to be able to cleanup"""
2506- self._response = http_client.HTTPConnection.getresponse(self)
2507- return self._response
2508-
2509- def cleanup_pipe(self):
2510- """Read the remaining bytes of the last response if any."""
2511- if self._response is not None:
2512- try:
2513- pending = self._response.finish()
2514- # Warn the user (once)
2515- if (self._ranges_received_whole_file is None
2516- and self._response.status == 200
2517- and pending
2518- and pending > self._range_warning_thresold):
2519- self._ranges_received_whole_file = True
2520- trace.warning(
2521- 'Got a 200 response when asking for multiple ranges,'
2522- ' does your server at %s:%s support range requests?',
2523- self.host, self.port)
2524- except socket.error as e:
2525- # It's conceivable that the socket is in a bad state here
2526- # (including some test cases) and in this case, it doesn't need
2527- # cleaning anymore, so no need to fail, we just get rid of the
2528- # socket and let callers reconnect
2529- if (len(e.args) == 0
2530- or e.args[0] not in (errno.ECONNRESET, errno.ECONNABORTED)):
2531- raise
2532- self.close()
2533- self._response = None
2534- # Preserve our preciousss
2535- sock = self.sock
2536- self.sock = None
2537- # Let http_client.HTTPConnection do its housekeeping
2538- self.close()
2539- # Restore our preciousss
2540- self.sock = sock
2541-
2542- def _wrap_socket_for_reporting(self, sock):
2543- """Wrap the socket before anybody use it."""
2544- self.sock = _ReportingSocket(sock, self._report_activity)
2545-
2546-
2547-class HTTPConnection(AbstractHTTPConnection, http_client.HTTPConnection):
2548-
2549- # XXX: Needs refactoring at the caller level.
2550- def __init__(self, host, port=None, proxied_host=None,
2551- report_activity=None, ca_certs=None):
2552- AbstractHTTPConnection.__init__(self, report_activity=report_activity)
2553- if PY3:
2554- http_client.HTTPConnection.__init__(self, host, port)
2555- else:
2556- # Use strict=True since we don't support HTTP/0.9
2557- http_client.HTTPConnection.__init__(self, host, port, strict=True)
2558- self.proxied_host = proxied_host
2559- # ca_certs is ignored, it's only relevant for https
2560-
2561- def connect(self):
2562- if 'http' in debug.debug_flags:
2563- self._mutter_connect()
2564- http_client.HTTPConnection.connect(self)
2565- self._wrap_socket_for_reporting(self.sock)
2566-
2567-
2568-class HTTPSConnection(AbstractHTTPConnection, http_client.HTTPSConnection):
2569-
2570- def __init__(self, host, port=None, key_file=None, cert_file=None,
2571- proxied_host=None,
2572- report_activity=None, ca_certs=None):
2573- AbstractHTTPConnection.__init__(self, report_activity=report_activity)
2574- if PY3:
2575- http_client.HTTPSConnection.__init__(
2576- self, host, port, key_file, cert_file)
2577- else:
2578- # Use strict=True since we don't support HTTP/0.9
2579- http_client.HTTPSConnection.__init__(self, host, port,
2580- key_file, cert_file, strict=True)
2581- self.proxied_host = proxied_host
2582- self.ca_certs = ca_certs
2583-
2584- def connect(self):
2585- if 'http' in debug.debug_flags:
2586- self._mutter_connect()
2587- http_client.HTTPConnection.connect(self)
2588- self._wrap_socket_for_reporting(self.sock)
2589- if self.proxied_host is None:
2590- self.connect_to_origin()
2591-
2592- def connect_to_origin(self):
2593- # FIXME JRV 2011-12-18: Use location config here?
2594- config_stack = config.GlobalStack()
2595- cert_reqs = config_stack.get('ssl.cert_reqs')
2596- if self.proxied_host is not None:
2597- host = self.proxied_host.split(":", 1)[0]
2598- else:
2599- host = self.host
2600- if cert_reqs == ssl.CERT_NONE:
2601- ui.ui_factory.show_user_warning('not_checking_ssl_cert', host=host)
2602- ui.ui_factory.suppressed_warnings.add('not_checking_ssl_cert')
2603- ca_certs = None
2604- else:
2605- if self.ca_certs is None:
2606- ca_certs = config_stack.get('ssl.ca_certs')
2607- else:
2608- ca_certs = self.ca_certs
2609- if ca_certs is None:
2610- trace.warning(
2611- "No valid trusted SSL CA certificates file set. See "
2612- "'brz help ssl.ca_certs' for more information on setting "
2613- "trusted CAs.")
2614- try:
2615- ssl_context = ssl.create_default_context(
2616- purpose=ssl.Purpose.SERVER_AUTH, cafile=ca_certs)
2617- ssl.check_hostname = True
2618- if self.cert_file:
2619- ssl_context.load_cert_chain(
2620- keyfile=self.key_file, certfile=self.cert_file)
2621- ssl_context.verify_mode = cert_reqs
2622- ssl_sock = ssl_context.wrap_socket(
2623- self.sock, server_hostname=self.host)
2624- except ssl.SSLError:
2625- trace.note(
2626- "\n"
2627- "See `brz help ssl.ca_certs` for how to specify trusted CA"
2628- "certificates.\n"
2629- "Pass -Ossl.cert_reqs=none to disable certificate "
2630- "verification entirely.\n")
2631- raise
2632- # Wrap the ssl socket before anybody use it
2633- self._wrap_socket_for_reporting(ssl_sock)
2634-
2635-
2636-class Request(urllib_request.Request):
2637- """A custom Request object.
2638-
2639- urllib_request determines the request method heuristically (based on
2640- the presence or absence of data). We set the method
2641- statically.
2642-
2643- The Request object tracks:
2644- - the connection the request will be made on.
2645- - the authentication parameters needed to preventively set
2646- the authentication header once a first authentication have
2647- been made.
2648- """
2649-
2650- def __init__(self, method, url, data=None, headers={},
2651- origin_req_host=None, unverifiable=False,
2652- connection=None, parent=None,
2653- accepted_errors=None):
2654- urllib_request.Request.__init__(
2655- self, url, data, headers,
2656- origin_req_host, unverifiable)
2657- self.method = method
2658- self.connection = connection
2659- self.accepted_errors = accepted_errors
2660- # To handle redirections
2661- self.parent = parent
2662- self.redirected_to = None
2663- # Unless told otherwise, redirections are not followed
2664- self.follow_redirections = False
2665- # auth and proxy_auth are dicts containing, at least
2666- # (scheme, host, port, realm, user, password, protocol, path).
2667- # The dict entries are mostly handled by the AuthHandler.
2668- # Some authentication schemes may add more entries.
2669- self.auth = {}
2670- self.proxy_auth = {}
2671- self.proxied_host = None
2672-
2673- def get_method(self):
2674- return self.method
2675-
2676- def set_proxy(self, proxy, type):
2677- """Set the proxy and remember the proxied host."""
2678- if PY3:
2679- host, port = splitport(self.host)
2680- else:
2681- host, port = splitport(self.get_host())
2682- if port is None:
2683- # We need to set the default port ourselves way before it gets set
2684- # in the HTTP[S]Connection object at build time.
2685- if self.type == 'https':
2686- conn_class = HTTPSConnection
2687- else:
2688- conn_class = HTTPConnection
2689- port = conn_class.default_port
2690- self.proxied_host = '%s:%s' % (host, port)
2691- urllib_request.Request.set_proxy(self, proxy, type)
2692- # When urllib_request makes a https request with our wrapper code and a proxy,
2693- # it sets Host to the https proxy, not the host we want to talk to.
2694- # I'm fairly sure this is our fault, but what is the cause is an open
2695- # question. -- Robert Collins May 8 2010.
2696- self.add_unredirected_header('Host', self.proxied_host)
2697-
2698-
2699-class _ConnectRequest(Request):
2700-
2701- def __init__(self, request):
2702- """Constructor
2703-
2704- :param request: the first request sent to the proxied host, already
2705- processed by the opener (i.e. proxied_host is already set).
2706- """
2707- # We give a fake url and redefine selector or urllib_request will be
2708- # confused
2709- Request.__init__(self, 'CONNECT', request.get_full_url(),
2710- connection=request.connection)
2711- if request.proxied_host is None:
2712- raise AssertionError()
2713- self.proxied_host = request.proxied_host
2714-
2715- @property
2716- def selector(self):
2717- return self.proxied_host
2718-
2719- def get_selector(self):
2720- return self.selector
2721-
2722- def set_proxy(self, proxy, type):
2723- """Set the proxy without remembering the proxied host.
2724-
2725- We already know the proxied host by definition, the CONNECT request
2726- occurs only when the connection goes through a proxy. The usual
2727- processing (masquerade the request so that the connection is done to
2728- the proxy while the request is targeted at another host) does not apply
2729- here. In fact, the connection is already established with proxy and we
2730- just want to enable the SSL tunneling.
2731- """
2732- urllib_request.Request.set_proxy(self, proxy, type)
2733-
2734-
2735-class ConnectionHandler(urllib_request.BaseHandler):
2736- """Provides connection-sharing by pre-processing requests.
2737-
2738- urllib_request provides no way to access the HTTPConnection object
2739- internally used. But we need it in order to achieve
2740- connection sharing. So, we add it to the request just before
2741- it is processed, and then we override the do_open method for
2742- http[s] requests in AbstractHTTPHandler.
2743- """
2744-
2745- handler_order = 1000 # after all pre-processings
2746-
2747- def __init__(self, report_activity=None, ca_certs=None):
2748- self._report_activity = report_activity
2749- self.ca_certs = ca_certs
2750-
2751- def create_connection(self, request, http_connection_class):
2752- host = request.host
2753- if not host:
2754- # Just a bit of paranoia here, this should have been
2755- # handled in the higher levels
2756- raise urlutils.InvalidURL(request.get_full_url(), 'no host given.')
2757-
2758- # We create a connection (but it will not connect until the first
2759- # request is made)
2760- try:
2761- connection = http_connection_class(
2762- host, proxied_host=request.proxied_host,
2763- report_activity=self._report_activity,
2764- ca_certs=self.ca_certs)
2765- except http_client.InvalidURL as exception:
2766- # There is only one occurrence of InvalidURL in http_client
2767- raise urlutils.InvalidURL(request.get_full_url(),
2768- extra='nonnumeric port')
2769-
2770- return connection
2771-
2772- def capture_connection(self, request, http_connection_class):
2773- """Capture or inject the request connection.
2774-
2775- Two cases:
2776- - the request have no connection: create a new one,
2777-
2778- - the request have a connection: this one have been used
2779- already, let's capture it, so that we can give it to
2780- another transport to be reused. We don't do that
2781- ourselves: the Transport object get the connection from
2782- a first request and then propagate it, from request to
2783- request or to cloned transports.
2784- """
2785- connection = request.connection
2786- if connection is None:
2787- # Create a new one
2788- connection = self.create_connection(request, http_connection_class)
2789- request.connection = connection
2790-
2791- # All connections will pass here, propagate debug level
2792- connection.set_debuglevel(DEBUG)
2793- return request
2794-
2795- def http_request(self, request):
2796- return self.capture_connection(request, HTTPConnection)
2797-
2798- def https_request(self, request):
2799- return self.capture_connection(request, HTTPSConnection)
2800-
2801-
2802-class AbstractHTTPHandler(urllib_request.AbstractHTTPHandler):
2803- """A custom handler for HTTP(S) requests.
2804-
2805- We overrive urllib_request.AbstractHTTPHandler to get a better
2806- control of the connection, the ability to implement new
2807- request types and return a response able to cope with
2808- persistent connections.
2809- """
2810-
2811- # We change our order to be before urllib_request HTTP[S]Handlers
2812- # and be chosen instead of them (the first http_open called
2813- # wins).
2814- handler_order = 400
2815-
2816- _default_headers = {'Pragma': 'no-cache',
2817- 'Cache-control': 'max-age=0',
2818- 'Connection': 'Keep-Alive',
2819- 'User-agent': 'Breezy/%s' % breezy_version,
2820- 'Accept': '*/*',
2821- }
2822-
2823- def __init__(self):
2824- urllib_request.AbstractHTTPHandler.__init__(self, debuglevel=DEBUG)
2825-
2826- def http_request(self, request):
2827- """Common headers setting"""
2828-
2829- for name, value in self._default_headers.items():
2830- if name not in request.headers:
2831- request.headers[name] = value
2832- # FIXME: We may have to add the Content-Length header if
2833- # we have data to send.
2834- return request
2835-
2836- def retry_or_raise(self, http_class, request, first_try):
2837- """Retry the request (once) or raise the exception.
2838-
2839- urllib_request raises exception of application level kind, we
2840- just have to translate them.
2841-
2842- http_client can raise exceptions of transport level (badly
2843- formatted dialog, loss of connexion or socket level
2844- problems). In that case we should issue the request again
2845- (http_client will close and reopen a new connection if
2846- needed).
2847- """
2848- # When an exception occurs, we give back the original
2849- # Traceback or the bugs are hard to diagnose.
2850- exc_type, exc_val, exc_tb = sys.exc_info()
2851- if exc_type == socket.gaierror:
2852- # No need to retry, that will not help
2853- if PY3:
2854- origin_req_host = request.origin_req_host
2855- else:
2856- origin_req_host = request.get_origin_req_host()
2857- raise errors.ConnectionError("Couldn't resolve host '%s'"
2858- % origin_req_host,
2859- orig_error=exc_val)
2860- elif isinstance(exc_val, http_client.ImproperConnectionState):
2861- # The http_client pipeline is in incorrect state, it's a bug in our
2862- # implementation.
2863- reraise(exc_type, exc_val, exc_tb)
2864- else:
2865- if first_try:
2866- if self._debuglevel >= 2:
2867- print('Received exception: [%r]' % exc_val)
2868- print(' On connection: [%r]' % request.connection)
2869- method = request.get_method()
2870- url = request.get_full_url()
2871- print(' Will retry, %s %r' % (method, url))
2872- request.connection.close()
2873- response = self.do_open(http_class, request, False)
2874- else:
2875- if self._debuglevel >= 2:
2876- print('Received second exception: [%r]' % exc_val)
2877- print(' On connection: [%r]' % request.connection)
2878- if exc_type in (http_client.BadStatusLine, http_client.UnknownProtocol):
2879- # http_client.BadStatusLine and
2880- # http_client.UnknownProtocol indicates that a
2881- # bogus server was encountered or a bad
2882- # connection (i.e. transient errors) is
2883- # experimented, we have already retried once
2884- # for that request so we raise the exception.
2885- my_exception = errors.InvalidHttpResponse(
2886- request.get_full_url(),
2887- 'Bad status line received',
2888- orig_error=exc_val)
2889- elif (isinstance(exc_val, socket.error) and len(exc_val.args)
2890- and exc_val.args[0] in (errno.ECONNRESET, 10053, 10054)):
2891- # 10053 == WSAECONNABORTED
2892- # 10054 == WSAECONNRESET
2893- raise errors.ConnectionReset(
2894- "Connection lost while sending request.")
2895- else:
2896- # All other exception are considered connection related.
2897-
2898- # socket errors generally occurs for reasons
2899- # far outside our scope, so closing the
2900- # connection and retrying is the best we can
2901- # do.
2902- if PY3:
2903- selector = request.selector
2904- else:
2905- selector = request.get_selector()
2906- my_exception = errors.ConnectionError(
2907- msg='while sending %s %s:' % (request.get_method(),
2908- selector),
2909- orig_error=exc_val)
2910-
2911- if self._debuglevel >= 2:
2912- print('On connection: [%r]' % request.connection)
2913- method = request.get_method()
2914- url = request.get_full_url()
2915- print(' Failed again, %s %r' % (method, url))
2916- print(' Will raise: [%r]' % my_exception)
2917- reraise(type(my_exception), my_exception, exc_tb)
2918- return response
2919-
2920- def do_open(self, http_class, request, first_try=True):
2921- """See urllib_request.AbstractHTTPHandler.do_open for the general idea.
2922-
2923- The request will be retried once if it fails.
2924- """
2925- connection = request.connection
2926- if connection is None:
2927- raise AssertionError(
2928- 'Cannot process a request without a connection')
2929-
2930- # Get all the headers
2931- headers = {}
2932- headers.update(request.header_items())
2933- headers.update(request.unredirected_hdrs)
2934- # Some servers or proxies will choke on headers not properly
2935- # cased. http_client/urllib/urllib_request all use capitalize to get canonical
2936- # header names, but only python2.5 urllib_request use title() to fix them just
2937- # before sending the request. And not all versions of python 2.5 do
2938- # that. Since we replace urllib_request.AbstractHTTPHandler.do_open we do it
2939- # ourself below.
2940- headers = {name.title(): val for name, val in headers.items()}
2941-
2942- try:
2943- method = request.get_method()
2944- if PY3:
2945- url = request.selector
2946- else:
2947- url = request.get_selector()
2948- if sys.version_info[:2] >= (3, 6):
2949- connection._send_request(method, url,
2950- # FIXME: implements 100-continue
2951- # None, # We don't send the body yet
2952- request.data,
2953- headers, encode_chunked=False)
2954- else:
2955- connection._send_request(method, url,
2956- # FIXME: implements 100-continue
2957- # None, # We don't send the body yet
2958- request.data,
2959- headers)
2960- if 'http' in debug.debug_flags:
2961- trace.mutter('> %s %s' % (method, url))
2962- hdrs = []
2963- for k, v in headers.items():
2964- # People are often told to paste -Dhttp output to help
2965- # debug. Don't compromise credentials.
2966- if k in ('Authorization', 'Proxy-Authorization'):
2967- v = '<masked>'
2968- hdrs.append('%s: %s' % (k, v))
2969- trace.mutter('> ' + '\n> '.join(hdrs) + '\n')
2970- if self._debuglevel >= 1:
2971- print('Request sent: [%r] from (%s)'
2972- % (request, request.connection.sock.getsockname()))
2973- response = connection.getresponse()
2974- convert_to_addinfourl = True
2975- except (ssl.SSLError, ssl.CertificateError):
2976- # Something is wrong with either the certificate or the hostname,
2977- # re-trying won't help
2978- raise
2979- except (socket.gaierror, http_client.BadStatusLine, http_client.UnknownProtocol,
2980- socket.error, http_client.HTTPException):
2981- response = self.retry_or_raise(http_class, request, first_try)
2982- convert_to_addinfourl = False
2983-
2984- if PY3:
2985- response.msg = response.reason
2986- return response
2987-
2988-# FIXME: HTTPConnection does not fully support 100-continue (the
2989-# server responses are just ignored)
2990-
2991-# if code == 100:
2992-# mutter('Will send the body')
2993-# # We can send the body now
2994-# body = request.data
2995-# if body is None:
2996-# raise URLError("No data given")
2997-# connection.send(body)
2998-# response = connection.getresponse()
2999-
3000- if self._debuglevel >= 2:
3001- print('Receives response: %r' % response)
3002- print(' For: %r(%r)' % (request.get_method(),
3003- request.get_full_url()))
3004-
3005- if convert_to_addinfourl:
3006- # Shamelessly copied from urllib_request
3007- req = request
3008- r = response
3009- r.recv = r.read
3010- fp = socket._fileobject(r, bufsize=65536)
3011- resp = addinfourl(fp, r.msg, req.get_full_url())
3012- resp.code = r.status
3013- resp.msg = r.reason
3014- resp.version = r.version
3015- if self._debuglevel >= 2:
3016- print('Create addinfourl: %r' % resp)
3017- print(' For: %r(%r)' % (request.get_method(),
3018- request.get_full_url()))
3019- if 'http' in debug.debug_flags:
3020- version = 'HTTP/%d.%d'
3021- try:
3022- version = version % (resp.version / 10,
3023- resp.version % 10)
3024- except:
3025- version = 'HTTP/%r' % resp.version
3026- trace.mutter('< %s %s %s' % (version, resp.code,
3027- resp.msg))
3028- # Use the raw header lines instead of treating resp.info() as a
3029- # dict since we may miss duplicated headers otherwise.
3030- hdrs = [h.rstrip('\r\n') for h in resp.info().headers]
3031- trace.mutter('< ' + '\n< '.join(hdrs) + '\n')
3032- else:
3033- resp = response
3034- return resp
3035-
3036-
3037-class HTTPHandler(AbstractHTTPHandler):
3038- """A custom handler that just thunks into HTTPConnection"""
3039-
3040- def http_open(self, request):
3041- return self.do_open(HTTPConnection, request)
3042-
3043-
3044-class HTTPSHandler(AbstractHTTPHandler):
3045- """A custom handler that just thunks into HTTPSConnection"""
3046-
3047- https_request = AbstractHTTPHandler.http_request
3048-
3049- def https_open(self, request):
3050- connection = request.connection
3051- if connection.sock is None and \
3052- connection.proxied_host is not None and \
3053- request.get_method() != 'CONNECT': # Don't loop
3054- # FIXME: We need a gazillion connection tests here, but we still
3055- # miss a https server :-( :
3056- # - with and without proxy
3057- # - with and without certificate
3058- # - with self-signed certificate
3059- # - with and without authentication
3060- # - with good and bad credentials (especially the proxy auth around
3061- # CONNECT)
3062- # - with basic and digest schemes
3063- # - reconnection on errors
3064- # - connection persistence behaviour (including reconnection)
3065-
3066- # We are about to connect for the first time via a proxy, we must
3067- # issue a CONNECT request first to establish the encrypted link
3068- connect = _ConnectRequest(request)
3069- response = self.parent.open(connect)
3070- if response.code != 200:
3071- raise errors.ConnectionError("Can't connect to %s via proxy %s" % (
3072- connect.proxied_host, self.host))
3073- # Housekeeping
3074- connection.cleanup_pipe()
3075- # Establish the connection encryption
3076- connection.connect_to_origin()
3077- # Propagate the connection to the original request
3078- request.connection = connection
3079- return self.do_open(HTTPSConnection, request)
3080-
3081-
3082-class HTTPRedirectHandler(urllib_request.HTTPRedirectHandler):
3083- """Handles redirect requests.
3084-
3085- We have to implement our own scheme because we use a specific
3086- Request object and because we want to implement a specific
3087- policy.
3088- """
3089- _debuglevel = DEBUG
3090- # RFC2616 says that only read requests should be redirected
3091- # without interacting with the user. But Breezy uses some
3092- # shortcuts to optimize against roundtrips which can leads to
3093- # write requests being issued before read requests of
3094- # containing dirs can be redirected. So we redirect write
3095- # requests in the same way which seems to respect the spirit
3096- # of the RFC if not its letter.
3097-
3098- def redirect_request(self, req, fp, code, msg, headers, newurl):
3099- """See urllib_request.HTTPRedirectHandler.redirect_request"""
3100- # We would have preferred to update the request instead
3101- # of creating a new one, but the urllib_request.Request object
3102- # has a too complicated creation process to provide a
3103- # simple enough equivalent update process. Instead, when
3104- # redirecting, we only update the following request in
3105- # the redirect chain with a reference to the parent
3106- # request .
3107-
3108- # Some codes make no sense in our context and are treated
3109- # as errors:
3110-
3111- # 300: Multiple choices for different representations of
3112- # the URI. Using that mechanisn with Breezy will violate the
3113- # protocol neutrality of Transport.
3114-
3115- # 304: Not modified (SHOULD only occurs with conditional
3116- # GETs which are not used by our implementation)
3117-
3118- # 305: Use proxy. I can't imagine this one occurring in
3119- # our context-- vila/20060909
3120-
3121- # 306: Unused (if the RFC says so...)
3122-
3123- # If the code is 302 and the request is HEAD, some may
3124- # think that it is a sufficent hint that the file exists
3125- # and that we MAY avoid following the redirections. But
3126- # if we want to be sure, we MUST follow them.
3127-
3128- if PY3:
3129- origin_req_host = req.origin_req_host
3130- else:
3131- origin_req_host = req.get_origin_req_host()
3132-
3133- if code in (301, 302, 303, 307):
3134- return Request(req.get_method(), newurl,
3135- headers=req.headers,
3136- origin_req_host=origin_req_host,
3137- unverifiable=True,
3138- # TODO: It will be nice to be able to
3139- # detect virtual hosts sharing the same
3140- # IP address, that will allow us to
3141- # share the same connection...
3142- connection=None,
3143- parent=req,
3144- )
3145- else:
3146- raise urllib_request.HTTPError(
3147- req.get_full_url(), code, msg, headers, fp)
3148-
3149- def http_error_302(self, req, fp, code, msg, headers):
3150- """Requests the redirected to URI.
3151-
3152- Copied from urllib_request to be able to clean the pipe of the associated
3153- connection, *before* issuing the redirected request but *after* having
3154- eventually raised an error.
3155- """
3156- # Some servers (incorrectly) return multiple Location headers
3157- # (so probably same goes for URI). Use first header.
3158-
3159- # TODO: Once we get rid of addinfourl objects, the
3160- # following will need to be updated to use correct case
3161- # for headers.
3162- if 'location' in headers:
3163- newurl = headers.get('location')
3164- elif 'uri' in headers:
3165- newurl = headers.get('uri')
3166- else:
3167- return
3168- if self._debuglevel >= 1:
3169- print('Redirected to: %s (followed: %r)' % (newurl,
3170- req.follow_redirections))
3171- if req.follow_redirections is False:
3172- req.redirected_to = newurl
3173- return fp
3174-
3175- newurl = urljoin(req.get_full_url(), newurl)
3176-
3177- # This call succeeds or raise an error. urllib_request returns
3178- # if redirect_request returns None, but our
3179- # redirect_request never returns None.
3180- redirected_req = self.redirect_request(req, fp, code, msg, headers,
3181- newurl)
3182-
3183- # loop detection
3184- # .redirect_dict has a key url if url was previously visited.
3185- if hasattr(req, 'redirect_dict'):
3186- visited = redirected_req.redirect_dict = req.redirect_dict
3187- if (visited.get(newurl, 0) >= self.max_repeats or
3188- len(visited) >= self.max_redirections):
3189- raise urllib_request.HTTPError(req.get_full_url(), code,
3190- self.inf_msg + msg, headers, fp)
3191- else:
3192- visited = redirected_req.redirect_dict = req.redirect_dict = {}
3193- visited[newurl] = visited.get(newurl, 0) + 1
3194-
3195- # We can close the fp now that we are sure that we won't
3196- # use it with HTTPError.
3197- fp.close()
3198- # We have all we need already in the response
3199- req.connection.cleanup_pipe()
3200-
3201- return self.parent.open(redirected_req)
3202-
3203- http_error_301 = http_error_303 = http_error_307 = http_error_302
3204-
3205-
3206-class ProxyHandler(urllib_request.ProxyHandler):
3207- """Handles proxy setting.
3208-
3209- Copied and modified from urllib_request to be able to modify the request during
3210- the request pre-processing instead of modifying it at _open time. As we
3211- capture (or create) the connection object during request processing, _open
3212- time was too late.
3213-
3214- The main task is to modify the request so that the connection is done to
3215- the proxy while the request still refers to the destination host.
3216-
3217- Note: the proxy handling *may* modify the protocol used; the request may be
3218- against an https server proxied through an http proxy. So, https_request
3219- will be called, but later it's really http_open that will be called. This
3220- explains why we don't have to call self.parent.open as the urllib_request did.
3221- """
3222-
3223- # Proxies must be in front
3224- handler_order = 100
3225- _debuglevel = DEBUG
3226-
3227- def __init__(self, proxies=None):
3228- urllib_request.ProxyHandler.__init__(self, proxies)
3229- # First, let's get rid of urllib_request implementation
3230- for type, proxy in self.proxies.items():
3231- if self._debuglevel >= 3:
3232- print('Will unbind %s_open for %r' % (type, proxy))
3233- delattr(self, '%s_open' % type)
3234-
3235- def bind_scheme_request(proxy, scheme):
3236- if proxy is None:
3237- return
3238- scheme_request = scheme + '_request'
3239- if self._debuglevel >= 3:
3240- print('Will bind %s for %r' % (scheme_request, proxy))
3241- setattr(self, scheme_request,
3242- lambda request: self.set_proxy(request, scheme))
3243- # We are interested only by the http[s] proxies
3244- http_proxy = self.get_proxy_env_var('http')
3245- bind_scheme_request(http_proxy, 'http')
3246- https_proxy = self.get_proxy_env_var('https')
3247- bind_scheme_request(https_proxy, 'https')
3248-
3249- def get_proxy_env_var(self, name, default_to='all'):
3250- """Get a proxy env var.
3251-
3252- Note that we indirectly rely on
3253- urllib.getproxies_environment taking into account the
3254- uppercased values for proxy variables.
3255- """
3256- try:
3257- return self.proxies[name.lower()]
3258- except KeyError:
3259- if default_to is not None:
3260- # Try to get the alternate environment variable
3261- try:
3262- return self.proxies[default_to]
3263- except KeyError:
3264- pass
3265- return None
3266-
3267- def proxy_bypass(self, host):
3268- """Check if host should be proxied or not.
3269-
3270- :returns: True to skip the proxy, False otherwise.
3271- """
3272- no_proxy = self.get_proxy_env_var('no', default_to=None)
3273- bypass = self.evaluate_proxy_bypass(host, no_proxy)
3274- if bypass is None:
3275- # Nevertheless, there are platform-specific ways to
3276- # ignore proxies...
3277- return urllib.proxy_bypass(host)
3278- else:
3279- return bypass
3280-
3281- def evaluate_proxy_bypass(self, host, no_proxy):
3282- """Check the host against a comma-separated no_proxy list as a string.
3283-
3284- :param host: ``host:port`` being requested
3285-
3286- :param no_proxy: comma-separated list of hosts to access directly.
3287-
3288- :returns: True to skip the proxy, False not to, or None to
3289- leave it to urllib.
3290- """
3291- if no_proxy is None:
3292- # All hosts are proxied
3293- return False
3294- hhost, hport = splitport(host)
3295- # Does host match any of the domains mentioned in
3296- # no_proxy ? The rules about what is authorized in no_proxy
3297- # are fuzzy (to say the least). We try to allow most
3298- # commonly seen values.
3299- for domain in no_proxy.split(','):
3300- domain = domain.strip()
3301- if domain == '':
3302- continue
3303- dhost, dport = splitport(domain)
3304- if hport == dport or dport is None:
3305- # Protect glob chars
3306- dhost = dhost.replace(".", r"\.")
3307- dhost = dhost.replace("*", r".*")
3308- dhost = dhost.replace("?", r".")
3309- if re.match(dhost, hhost, re.IGNORECASE):
3310- return True
3311- # Nothing explicitly avoid the host
3312- return None
3313-
3314- def set_proxy(self, request, type):
3315- if PY3:
3316- host = request.host
3317- else:
3318- host = request.get_host()
3319- if self.proxy_bypass(host):
3320- return request
3321-
3322- proxy = self.get_proxy_env_var(type)
3323- if self._debuglevel >= 3:
3324- print('set_proxy %s_request for %r' % (type, proxy))
3325- # FIXME: python 2.5 urlparse provides a better _parse_proxy which can
3326- # grok user:password@host:port as well as
3327- # http://user:password@host:port
3328-
3329- parsed_url = transport.ConnectedTransport._split_url(proxy)
3330- if not parsed_url.host:
3331- raise urlutils.InvalidURL(proxy, 'No host component')
3332-
3333- if request.proxy_auth == {}:
3334- # No proxy auth parameter are available, we are handling the first
3335- # proxied request, intialize. scheme (the authentication scheme)
3336- # and realm will be set by the AuthHandler
3337- request.proxy_auth = {
3338- 'host': parsed_url.host,
3339- 'port': parsed_url.port,
3340- 'user': parsed_url.user,
3341- 'password': parsed_url.password,
3342- 'protocol': parsed_url.scheme,
3343- # We ignore path since we connect to a proxy
3344- 'path': None}
3345- if parsed_url.port is None:
3346- phost = parsed_url.host
3347- else:
3348- phost = parsed_url.host + ':%d' % parsed_url.port
3349- request.set_proxy(phost, type)
3350- if self._debuglevel >= 3:
3351- print('set_proxy: proxy set to %s://%s' % (type, phost))
3352- return request
3353-
3354-
3355-class AbstractAuthHandler(urllib_request.BaseHandler):
3356- """A custom abstract authentication handler for all http authentications.
3357-
3358- Provides the meat to handle authentication errors and
3359- preventively set authentication headers after the first
3360- successful authentication.
3361-
3362- This can be used for http and proxy, as well as for basic, negotiate and
3363- digest authentications.
3364-
3365- This provides an unified interface for all authentication handlers
3366- (urllib_request provides far too many with different policies).
3367-
3368- The interaction between this handler and the urllib_request
3369- framework is not obvious, it works as follow:
3370-
3371- opener.open(request) is called:
3372-
3373- - that may trigger http_request which will add an authentication header
3374- (self.build_header) if enough info is available.
3375-
3376- - the request is sent to the server,
3377-
3378- - if an authentication error is received self.auth_required is called,
3379- we acquire the authentication info in the error headers and call
3380- self.auth_match to check that we are able to try the
3381- authentication and complete the authentication parameters,
3382-
3383- - we call parent.open(request), that may trigger http_request
3384- and will add a header (self.build_header), but here we have
3385- all the required info (keep in mind that the request and
3386- authentication used in the recursive calls are really (and must be)
3387- the *same* objects).
3388-
3389- - if the call returns a response, the authentication have been
3390- successful and the request authentication parameters have been updated.
3391- """
3392-
3393- scheme = None
3394- """The scheme as it appears in the server header (lower cased)"""
3395-
3396- _max_retry = 3
3397- """We don't want to retry authenticating endlessly"""
3398-
3399- requires_username = True
3400- """Whether the auth mechanism requires a username."""
3401-
3402- # The following attributes should be defined by daughter
3403- # classes:
3404- # - auth_required_header: the header received from the server
3405- # - auth_header: the header sent in the request
3406-
3407- def __init__(self):
3408- # We want to know when we enter into an try/fail cycle of
3409- # authentications so we initialize to None to indicate that we aren't
3410- # in such a cycle by default.
3411- self._retry_count = None
3412-
3413- def _parse_auth_header(self, server_header):
3414- """Parse the authentication header.
3415-
3416- :param server_header: The value of the header sent by the server
3417- describing the authenticaion request.
3418-
3419- :return: A tuple (scheme, remainder) scheme being the first word in the
3420- given header (lower cased), remainder may be None.
3421- """
3422- try:
3423- scheme, remainder = server_header.split(None, 1)
3424- except ValueError:
3425- scheme = server_header
3426- remainder = None
3427- return (scheme.lower(), remainder)
3428-
3429- def update_auth(self, auth, key, value):
3430- """Update a value in auth marking the auth as modified if needed"""
3431- old_value = auth.get(key, None)
3432- if old_value != value:
3433- auth[key] = value
3434- auth['modified'] = True
3435-
3436- def auth_required(self, request, headers):
3437- """Retry the request if the auth scheme is ours.
3438-
3439- :param request: The request needing authentication.
3440- :param headers: The headers for the authentication error response.
3441- :return: None or the response for the authenticated request.
3442- """
3443- # Don't try to authenticate endlessly
3444- if self._retry_count is None:
3445- # The retry being recusrsive calls, None identify the first retry
3446- self._retry_count = 1
3447- else:
3448- self._retry_count += 1
3449- if self._retry_count > self._max_retry:
3450- # Let's be ready for next round
3451- self._retry_count = None
3452- return None
3453- if PY3:
3454- server_headers = headers.get_all(self.auth_required_header)
3455- else:
3456- server_headers = headers.getheaders(self.auth_required_header)
3457- if not server_headers:
3458- # The http error MUST have the associated
3459- # header. This must never happen in production code.
3460- raise KeyError('%s not found' % self.auth_required_header)
3461-
3462- auth = self.get_auth(request)
3463- auth['modified'] = False
3464- # Put some common info in auth if the caller didn't
3465- if auth.get('path', None) is None:
3466- parsed_url = urlutils.URL.from_string(request.get_full_url())
3467- self.update_auth(auth, 'protocol', parsed_url.scheme)
3468- self.update_auth(auth, 'host', parsed_url.host)
3469- self.update_auth(auth, 'port', parsed_url.port)
3470- self.update_auth(auth, 'path', parsed_url.path)
3471- # FIXME: the auth handler should be selected at a single place instead
3472- # of letting all handlers try to match all headers, but the current
3473- # design doesn't allow a simple implementation.
3474- for server_header in server_headers:
3475- # Several schemes can be proposed by the server, try to match each
3476- # one in turn
3477- matching_handler = self.auth_match(server_header, auth)
3478- if matching_handler:
3479- # auth_match may have modified auth (by adding the
3480- # password or changing the realm, for example)
3481- if (request.get_header(self.auth_header, None) is not None
3482- and not auth['modified']):
3483- # We already tried that, give up
3484- return None
3485-
3486- # Only the most secure scheme proposed by the server should be
3487- # used, since the handlers use 'handler_order' to describe that
3488- # property, the first handler tried takes precedence, the
3489- # others should not attempt to authenticate if the best one
3490- # failed.
3491- best_scheme = auth.get('best_scheme', None)
3492- if best_scheme is None:
3493- # At that point, if current handler should doesn't succeed
3494- # the credentials are wrong (or incomplete), but we know
3495- # that the associated scheme should be used.
3496- best_scheme = auth['best_scheme'] = self.scheme
3497- if best_scheme != self.scheme:
3498- continue
3499-
3500- if self.requires_username and auth.get('user', None) is None:
3501- # Without a known user, we can't authenticate
3502- return None
3503-
3504- # Housekeeping
3505- request.connection.cleanup_pipe()
3506- # Retry the request with an authentication header added
3507- response = self.parent.open(request)
3508- if response:
3509- self.auth_successful(request, response)
3510- return response
3511- # We are not qualified to handle the authentication.
3512- # Note: the authentication error handling will try all
3513- # available handlers. If one of them authenticates
3514- # successfully, a response will be returned. If none of
3515- # them succeeds, None will be returned and the error
3516- # handler will raise the 401 'Unauthorized' or the 407
3517- # 'Proxy Authentication Required' error.
3518- return None
3519-
3520- def add_auth_header(self, request, header):
3521- """Add the authentication header to the request"""
3522- request.add_unredirected_header(self.auth_header, header)
3523-
3524- def auth_match(self, header, auth):
3525- """Check that we are able to handle that authentication scheme.
3526-
3527- The request authentication parameters may need to be
3528- updated with info from the server. Some of these
3529- parameters, when combined, are considered to be the
3530- authentication key, if one of them change the
3531- authentication result may change. 'user' and 'password'
3532- are exampls, but some auth schemes may have others
3533- (digest's nonce is an example, digest's nonce_count is a
3534- *counter-example*). Such parameters must be updated by
3535- using the update_auth() method.
3536-
3537- :param header: The authentication header sent by the server.
3538- :param auth: The auth parameters already known. They may be
3539- updated.
3540- :returns: True if we can try to handle the authentication.
3541- """
3542- raise NotImplementedError(self.auth_match)
3543-
3544- def build_auth_header(self, auth, request):
3545- """Build the value of the header used to authenticate.
3546-
3547- :param auth: The auth parameters needed to build the header.
3548- :param request: The request needing authentication.
3549-
3550- :return: None or header.
3551- """
3552- raise NotImplementedError(self.build_auth_header)
3553-
3554- def auth_successful(self, request, response):
3555- """The authentification was successful for the request.
3556-
3557- Additional infos may be available in the response.
3558-
3559- :param request: The succesfully authenticated request.
3560- :param response: The server response (may contain auth info).
3561- """
3562- # It may happen that we need to reconnect later, let's be ready
3563- self._retry_count = None
3564-
3565- def get_user_password(self, auth):
3566- """Ask user for a password if none is already available.
3567-
3568- :param auth: authentication info gathered so far (from the initial url
3569- and then during dialog with the server).
3570- """
3571- auth_conf = config.AuthenticationConfig()
3572- user = auth.get('user', None)
3573- password = auth.get('password', None)
3574- realm = auth['realm']
3575- port = auth.get('port', None)
3576-
3577- if user is None:
3578- user = auth_conf.get_user(auth['protocol'], auth['host'],
3579- port=port, path=auth['path'],
3580- realm=realm, ask=True,
3581- prompt=self.build_username_prompt(auth))
3582- if user is not None and password is None:
3583- password = auth_conf.get_password(
3584- auth['protocol'], auth['host'], user,
3585- port=port,
3586- path=auth['path'], realm=realm,
3587- prompt=self.build_password_prompt(auth))
3588-
3589- return user, password
3590-
3591- def _build_password_prompt(self, auth):
3592- """Build a prompt taking the protocol used into account.
3593-
3594- The AuthHandler is used by http and https, we want that information in
3595- the prompt, so we build the prompt from the authentication dict which
3596- contains all the needed parts.
3597-
3598- Also, http and proxy AuthHandlers present different prompts to the
3599- user. The daughter classes should implements a public
3600- build_password_prompt using this method.
3601- """
3602- prompt = u'%s' % auth['protocol'].upper() + u' %(user)s@%(host)s'
3603- realm = auth['realm']
3604- if realm is not None:
3605- prompt += u", Realm: '%s'" % realm
3606- prompt += u' password'
3607- return prompt
3608-
3609- def _build_username_prompt(self, auth):
3610- """Build a prompt taking the protocol used into account.
3611-
3612- The AuthHandler is used by http and https, we want that information in
3613- the prompt, so we build the prompt from the authentication dict which
3614- contains all the needed parts.
3615-
3616- Also, http and proxy AuthHandlers present different prompts to the
3617- user. The daughter classes should implements a public
3618- build_username_prompt using this method.
3619- """
3620- prompt = u'%s' % auth['protocol'].upper() + u' %(host)s'
3621- realm = auth['realm']
3622- if realm is not None:
3623- prompt += u", Realm: '%s'" % realm
3624- prompt += u' username'
3625- return prompt
3626-
3627- def http_request(self, request):
3628- """Insert an authentication header if information is available"""
3629- auth = self.get_auth(request)
3630- if self.auth_params_reusable(auth):
3631- self.add_auth_header(
3632- request, self.build_auth_header(auth, request))
3633- return request
3634-
3635- https_request = http_request # FIXME: Need test
3636-
3637-
3638-class NegotiateAuthHandler(AbstractAuthHandler):
3639- """A authentication handler that handles WWW-Authenticate: Negotiate.
3640-
3641- At the moment this handler supports just Kerberos. In the future,
3642- NTLM support may also be added.
3643- """
3644-
3645- scheme = 'negotiate'
3646- handler_order = 480
3647- requires_username = False
3648-
3649- def auth_match(self, header, auth):
3650- scheme, raw_auth = self._parse_auth_header(header)
3651- if scheme != self.scheme:
3652- return False
3653- self.update_auth(auth, 'scheme', scheme)
3654- resp = self._auth_match_kerberos(auth)
3655- if resp is None:
3656- return False
3657- # Optionally should try to authenticate using NTLM here
3658- self.update_auth(auth, 'negotiate_response', resp)
3659- return True
3660-
3661- def _auth_match_kerberos(self, auth):
3662- """Try to create a GSSAPI response for authenticating against a host."""
3663- global kerberos, checked_kerberos
3664- if kerberos is None and not checked_kerberos:
3665- try:
3666- import kerberos
3667- except ImportError:
3668- kerberos = None
3669- checked_kerberos = True
3670- if kerberos is None:
3671- return None
3672- ret, vc = kerberos.authGSSClientInit("HTTP@%(host)s" % auth)
3673- if ret < 1:
3674- trace.warning('Unable to create GSSAPI context for %s: %d',
3675- auth['host'], ret)
3676- return None
3677- ret = kerberos.authGSSClientStep(vc, "")
3678- if ret < 0:
3679- trace.mutter('authGSSClientStep failed: %d', ret)
3680- return None
3681- return kerberos.authGSSClientResponse(vc)
3682-
3683- def build_auth_header(self, auth, request):
3684- return "Negotiate %s" % auth['negotiate_response']
3685-
3686- def auth_params_reusable(self, auth):
3687- # If the auth scheme is known, it means a previous
3688- # authentication was successful, all information is
3689- # available, no further checks are needed.
3690- return (auth.get('scheme', None) == 'negotiate' and
3691- auth.get('negotiate_response', None) is not None)
3692-
3693-
3694-class BasicAuthHandler(AbstractAuthHandler):
3695- """A custom basic authentication handler."""
3696-
3697- scheme = 'basic'
3698- handler_order = 500
3699- auth_regexp = re.compile('realm="([^"]*)"', re.I)
3700-
3701- def build_auth_header(self, auth, request):
3702- raw = '%s:%s' % (auth['user'], auth['password'])
3703- auth_header = 'Basic ' + \
3704- base64.b64encode(raw.encode('utf-8')).decode('ascii')
3705- return auth_header
3706-
3707- def extract_realm(self, header_value):
3708- match = self.auth_regexp.search(header_value)
3709- realm = None
3710- if match:
3711- realm = match.group(1)
3712- return match, realm
3713-
3714- def auth_match(self, header, auth):
3715- scheme, raw_auth = self._parse_auth_header(header)
3716- if scheme != self.scheme:
3717- return False
3718-
3719- match, realm = self.extract_realm(raw_auth)
3720- if match:
3721- # Put useful info into auth
3722- self.update_auth(auth, 'scheme', scheme)
3723- self.update_auth(auth, 'realm', realm)
3724- if (auth.get('user', None) is None
3725- or auth.get('password', None) is None):
3726- user, password = self.get_user_password(auth)
3727- self.update_auth(auth, 'user', user)
3728- self.update_auth(auth, 'password', password)
3729- return match is not None
3730-
3731- def auth_params_reusable(self, auth):
3732- # If the auth scheme is known, it means a previous
3733- # authentication was successful, all information is
3734- # available, no further checks are needed.
3735- return auth.get('scheme', None) == 'basic'
3736-
3737-
3738-def get_digest_algorithm_impls(algorithm):
3739- H = None
3740- KD = None
3741- if algorithm == 'MD5':
3742- def H(x): return osutils.md5(x).hexdigest()
3743- elif algorithm == 'SHA':
3744- H = osutils.sha_string
3745- if H is not None:
3746- def KD(secret, data): return H(
3747- ("%s:%s" % (secret, data)).encode('utf-8'))
3748- return H, KD
3749-
3750-
3751-def get_new_cnonce(nonce, nonce_count):
3752- raw = '%s:%d:%s:%s' % (nonce, nonce_count, time.ctime(),
3753- osutils.rand_chars(8))
3754- return osutils.sha_string(raw.encode('utf-8'))[:16]
3755-
3756-
3757-class DigestAuthHandler(AbstractAuthHandler):
3758- """A custom digest authentication handler."""
3759-
3760- scheme = 'digest'
3761- # Before basic as digest is a bit more secure and should be preferred
3762- handler_order = 490
3763-
3764- def auth_params_reusable(self, auth):
3765- # If the auth scheme is known, it means a previous
3766- # authentication was successful, all information is
3767- # available, no further checks are needed.
3768- return auth.get('scheme', None) == 'digest'
3769-
3770- def auth_match(self, header, auth):
3771- scheme, raw_auth = self._parse_auth_header(header)
3772- if scheme != self.scheme:
3773- return False
3774-
3775- # Put the requested authentication info into a dict
3776- req_auth = urllib_request.parse_keqv_list(
3777- urllib_request.parse_http_list(raw_auth))
3778-
3779- # Check that we can handle that authentication
3780- qop = req_auth.get('qop', None)
3781- if qop != 'auth': # No auth-int so far
3782- return False
3783-
3784- H, KD = get_digest_algorithm_impls(req_auth.get('algorithm', 'MD5'))
3785- if H is None:
3786- return False
3787-
3788- realm = req_auth.get('realm', None)
3789- # Put useful info into auth
3790- self.update_auth(auth, 'scheme', scheme)
3791- self.update_auth(auth, 'realm', realm)
3792- if auth.get('user', None) is None or auth.get('password', None) is None:
3793- user, password = self.get_user_password(auth)
3794- self.update_auth(auth, 'user', user)
3795- self.update_auth(auth, 'password', password)
3796-
3797- try:
3798- if req_auth.get('algorithm', None) is not None:
3799- self.update_auth(auth, 'algorithm', req_auth.get('algorithm'))
3800- nonce = req_auth['nonce']
3801- if auth.get('nonce', None) != nonce:
3802- # A new nonce, never used
3803- self.update_auth(auth, 'nonce_count', 0)
3804- self.update_auth(auth, 'nonce', nonce)
3805- self.update_auth(auth, 'qop', qop)
3806- auth['opaque'] = req_auth.get('opaque', None)
3807- except KeyError:
3808- # Some required field is not there
3809- return False
3810-
3811- return True
3812-
3813- def build_auth_header(self, auth, request):
3814- if PY3:
3815- selector = request.selector
3816- else:
3817- selector = request.get_selector()
3818- url_scheme, url_selector = splittype(selector)
3819- sel_host, uri = splithost(url_selector)
3820-
3821- A1 = ('%s:%s:%s' %
3822- (auth['user'], auth['realm'], auth['password'])).encode('utf-8')
3823- A2 = ('%s:%s' % (request.get_method(), uri)).encode('utf-8')
3824-
3825- nonce = auth['nonce']
3826- qop = auth['qop']
3827-
3828- nonce_count = auth['nonce_count'] + 1
3829- ncvalue = '%08x' % nonce_count
3830- cnonce = get_new_cnonce(nonce, nonce_count)
3831-
3832- H, KD = get_digest_algorithm_impls(auth.get('algorithm', 'MD5'))
3833- nonce_data = '%s:%s:%s:%s:%s' % (nonce, ncvalue, cnonce, qop, H(A2))
3834- request_digest = KD(H(A1), nonce_data)
3835-
3836- header = 'Digest '
3837- header += 'username="%s", realm="%s", nonce="%s"' % (auth['user'],
3838- auth['realm'],
3839- nonce)
3840- header += ', uri="%s"' % uri
3841- header += ', cnonce="%s", nc=%s' % (cnonce, ncvalue)
3842- header += ', qop="%s"' % qop
3843- header += ', response="%s"' % request_digest
3844- # Append the optional fields
3845- opaque = auth.get('opaque', None)
3846- if opaque:
3847- header += ', opaque="%s"' % opaque
3848- if auth.get('algorithm', None):
3849- header += ', algorithm="%s"' % auth.get('algorithm')
3850-
3851- # We have used the nonce once more, update the count
3852- auth['nonce_count'] = nonce_count
3853-
3854- return header
3855-
3856-
3857-class HTTPAuthHandler(AbstractAuthHandler):
3858- """Custom http authentication handler.
3859-
3860- Send the authentication preventively to avoid the roundtrip
3861- associated with the 401 error and keep the revelant info in
3862- the auth request attribute.
3863- """
3864-
3865- auth_required_header = 'www-authenticate'
3866- auth_header = 'Authorization'
3867-
3868- def get_auth(self, request):
3869- """Get the auth params from the request"""
3870- return request.auth
3871-
3872- def set_auth(self, request, auth):
3873- """Set the auth params for the request"""
3874- request.auth = auth
3875-
3876- def build_password_prompt(self, auth):
3877- return self._build_password_prompt(auth)
3878-
3879- def build_username_prompt(self, auth):
3880- return self._build_username_prompt(auth)
3881-
3882- def http_error_401(self, req, fp, code, msg, headers):
3883- return self.auth_required(req, headers)
3884-
3885-
3886-class ProxyAuthHandler(AbstractAuthHandler):
3887- """Custom proxy authentication handler.
3888-
3889- Send the authentication preventively to avoid the roundtrip
3890- associated with the 407 error and keep the revelant info in
3891- the proxy_auth request attribute..
3892- """
3893-
3894- auth_required_header = 'proxy-authenticate'
3895- # FIXME: the correct capitalization is Proxy-Authorization,
3896- # but python-2.4 urllib_request.Request insist on using capitalize()
3897- # instead of title().
3898- auth_header = 'Proxy-authorization'
3899-
3900- def get_auth(self, request):
3901- """Get the auth params from the request"""
3902- return request.proxy_auth
3903-
3904- def set_auth(self, request, auth):
3905- """Set the auth params for the request"""
3906- request.proxy_auth = auth
3907-
3908- def build_password_prompt(self, auth):
3909- prompt = self._build_password_prompt(auth)
3910- prompt = u'Proxy ' + prompt
3911- return prompt
3912-
3913- def build_username_prompt(self, auth):
3914- prompt = self._build_username_prompt(auth)
3915- prompt = u'Proxy ' + prompt
3916- return prompt
3917-
3918- def http_error_407(self, req, fp, code, msg, headers):
3919- return self.auth_required(req, headers)
3920-
3921-
3922-class HTTPBasicAuthHandler(BasicAuthHandler, HTTPAuthHandler):
3923- """Custom http basic authentication handler"""
3924-
3925-
3926-class ProxyBasicAuthHandler(BasicAuthHandler, ProxyAuthHandler):
3927- """Custom proxy basic authentication handler"""
3928-
3929-
3930-class HTTPDigestAuthHandler(DigestAuthHandler, HTTPAuthHandler):
3931- """Custom http basic authentication handler"""
3932-
3933-
3934-class ProxyDigestAuthHandler(DigestAuthHandler, ProxyAuthHandler):
3935- """Custom proxy basic authentication handler"""
3936-
3937-
3938-class HTTPNegotiateAuthHandler(NegotiateAuthHandler, HTTPAuthHandler):
3939- """Custom http negotiate authentication handler"""
3940-
3941-
3942-class ProxyNegotiateAuthHandler(NegotiateAuthHandler, ProxyAuthHandler):
3943- """Custom proxy negotiate authentication handler"""
3944-
3945-
3946-class HTTPErrorProcessor(urllib_request.HTTPErrorProcessor):
3947- """Process HTTP error responses.
3948-
3949- We don't really process the errors, quite the contrary
3950- instead, we leave our Transport handle them.
3951- """
3952-
3953- accepted_errors = [200, # Ok
3954- 206, # Partial content
3955- 404, # Not found
3956- ]
3957- """The error codes the caller will handle.
3958-
3959- This can be specialized in the request on a case-by case basis, but the
3960- common cases are covered here.
3961- """
3962-
3963- def http_response(self, request, response):
3964- code, msg, hdrs = response.code, response.msg, response.info()
3965-
3966- accepted_errors = request.accepted_errors
3967- if accepted_errors is None:
3968- accepted_errors = self.accepted_errors
3969-
3970- if code not in accepted_errors:
3971- response = self.parent.error('http', request, response,
3972- code, msg, hdrs)
3973- return response
3974-
3975- https_response = http_response
3976-
3977-
3978-class HTTPDefaultErrorHandler(urllib_request.HTTPDefaultErrorHandler):
3979- """Translate common errors into Breezy Exceptions"""
3980-
3981- def http_error_default(self, req, fp, code, msg, hdrs):
3982- if code == 403:
3983- raise errors.TransportError(
3984- 'Server refuses to fulfill the request (403 Forbidden)'
3985- ' for %s' % req.get_full_url())
3986- else:
3987- raise errors.InvalidHttpResponse(req.get_full_url(),
3988- 'Unable to handle http code %d: %s'
3989- % (code, msg))
3990-
3991-
3992-class Opener(object):
3993- """A wrapper around urllib_request.build_opener
3994-
3995- Daughter classes can override to build their own specific opener
3996- """
3997- # TODO: Provides hooks for daughter classes.
3998-
3999- def __init__(self,
4000- connection=ConnectionHandler,
4001- redirect=HTTPRedirectHandler,
4002- error=HTTPErrorProcessor,
4003- report_activity=None,
4004- ca_certs=None):
4005- self._opener = urllib_request.build_opener(
4006- connection(report_activity=report_activity, ca_certs=ca_certs),
4007- redirect, error,
4008- ProxyHandler(),
4009- HTTPBasicAuthHandler(),
4010- HTTPDigestAuthHandler(),
4011- HTTPNegotiateAuthHandler(),
4012- ProxyBasicAuthHandler(),
4013- ProxyDigestAuthHandler(),
4014- ProxyNegotiateAuthHandler(),
4015- HTTPHandler,
4016- HTTPSHandler,
4017- HTTPDefaultErrorHandler,
4018- )
4019-
4020- self.open = self._opener.open
4021- if DEBUG >= 9:
4022- # When dealing with handler order, it's easy to mess
4023- # things up, the following will help understand which
4024- # handler is used, when and for what.
4025- import pprint
4026- pprint.pprint(self._opener.__dict__)

Subscribers

People subscribed via source and target branches