Merge lp:~jelmer/brz/split-http into lp:brz/3.1

Proposed by Jelmer Vernooij
Status: Merged
Approved by: Jelmer Vernooij
Approved revision: no longer in the source branch.
Merge reported by: The Breezy Bot
Merged at revision: not available
Proposed branch: lp:~jelmer/brz/split-http
Merge into: lp:brz/3.1
Diff against target: 5544 lines (+2668/-2624)
11 files modified
breezy/bzr/tests/test_bzrdir.py (+1/-1)
breezy/bzr/tests/test_smart_transport.py (+2/-2)
breezy/plugins/fossil/__init__.py (+1/-1)
breezy/tests/__init__.py (+1/-1)
breezy/tests/test_http.py (+24/-17)
breezy/tests/test_http_response.py (+3/-3)
breezy/tests/test_selftest.py (+1/-1)
breezy/tests/test_transport.py (+2/-1)
breezy/transport/__init__.py (+4/-4)
breezy/transport/http/__init__.py (+3/-2593)
breezy/transport/http/urllib.py (+2626/-0)
To merge this branch: bzr merge lp:~jelmer/brz/split-http
Reviewer Review Type Date Requested Status
Martin Packman Approve
Review via email: mp+395620@code.launchpad.net

Description of the change

Move urllib-specific bits into a separate module.

To post a comment you must log in.
Revision history for this message
Martin Packman (gz) wrote :

Looks good!

review: Approve
Revision history for this message
The Breezy Bot (the-breezy-bot) wrote :
Revision history for this message
The Breezy Bot (the-breezy-bot) wrote :

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'breezy/bzr/tests/test_bzrdir.py'
2--- breezy/bzr/tests/test_bzrdir.py 2020-07-26 15:29:07 +0000
3+++ breezy/bzr/tests/test_bzrdir.py 2020-12-27 18:07:47 +0000
4@@ -68,7 +68,7 @@
5 memory,
6 pathfilter,
7 )
8-from ...transport.http import HttpTransport
9+from ...transport.http.urllib import HttpTransport
10 from ...transport.nosmart import NoSmartTransportDecorator
11 from ...transport.readonly import ReadonlyTransportDecorator
12 from .. import knitrepo, knitpack_repo
13
14=== modified file 'breezy/bzr/tests/test_smart_transport.py'
15--- breezy/bzr/tests/test_smart_transport.py 2020-06-10 02:56:53 +0000
16+++ breezy/bzr/tests/test_smart_transport.py 2020-12-27 18:07:47 +0000
17@@ -59,12 +59,12 @@
18 int2byte,
19 )
20 from ...transport import (
21- http,
22 local,
23 memory,
24 remote,
25 ssh,
26 )
27+from ...transport.http import urllib
28 from . import (
29 test_smart,
30 )
31@@ -4276,7 +4276,7 @@
32
33 def test_smart_http_medium_request_accept_bytes(self):
34 medium = FakeHTTPMedium()
35- request = http.SmartClientHTTPMediumRequest(medium)
36+ request = urllib.SmartClientHTTPMediumRequest(medium)
37 request.accept_bytes(b'abc')
38 request.accept_bytes(b'def')
39 self.assertEqual(None, medium.written_request)
40
41=== modified file 'breezy/plugins/fossil/__init__.py'
42--- breezy/plugins/fossil/__init__.py 2020-07-29 22:12:53 +0000
43+++ breezy/plugins/fossil/__init__.py 2020-12-27 18:07:47 +0000
44@@ -71,7 +71,7 @@
45
46 @classmethod
47 def probe_transport(klass, transport):
48- from breezy.transport.http import HttpTransport
49+ from breezy.transport.http.urllib import HttpTransport
50 if not isinstance(transport, HttpTransport):
51 raise errors.NotBranchError(path=transport.base)
52 response = transport.request(
53
54=== modified file 'breezy/tests/__init__.py'
55--- breezy/tests/__init__.py 2020-07-28 01:28:20 +0000
56+++ breezy/tests/__init__.py 2020-12-27 18:07:47 +0000
57@@ -4208,7 +4208,7 @@
58 'breezy.tests',
59 'breezy.tests.fixtures',
60 'breezy.timestamp',
61- 'breezy.transport.http',
62+ 'breezy.transport.http.urllib',
63 'breezy.version_info_formats.format_custom',
64 ]
65
66
67=== modified file 'breezy/tests/test_http.py'
68--- breezy/tests/test_http.py 2019-05-19 22:33:19 +0000
69+++ breezy/tests/test_http.py 2020-12-27 18:07:47 +0000
70@@ -62,11 +62,18 @@
71 multiply_scenarios,
72 )
73 from ..transport import (
74- http,
75 remote,
76 )
77-from ..transport.http import (
78+from ..transport.http import urllib
79+from ..transport.http.urllib import (
80+ AbstractAuthHandler,
81+ BasicAuthHandler,
82 HttpTransport,
83+ HTTPAuthHandler,
84+ HTTPConnection,
85+ HTTPSConnection,
86+ ProxyHandler,
87+ Request,
88 )
89
90
91@@ -225,7 +232,7 @@
92
93 def parse_header(self, header, auth_handler_class=None):
94 if auth_handler_class is None:
95- auth_handler_class = http.AbstractAuthHandler
96+ auth_handler_class = AbstractAuthHandler
97 self.auth_handler = auth_handler_class()
98 return self.auth_handler._parse_auth_header(header)
99
100@@ -246,7 +253,7 @@
101 self.assertEqual('realm="Thou should not pass"', remainder)
102
103 def test_build_basic_header_with_long_creds(self):
104- handler = http.BasicAuthHandler()
105+ handler = BasicAuthHandler()
106 user = 'user' * 10 # length 40
107 password = 'password' * 5 # length 40
108 header = handler.build_auth_header(
109@@ -258,7 +265,7 @@
110 def test_basic_extract_realm(self):
111 scheme, remainder = self.parse_header(
112 'Basic realm="Thou should not pass"',
113- http.BasicAuthHandler)
114+ BasicAuthHandler)
115 match, realm = self.auth_handler.extract_realm(remainder)
116 self.assertTrue(match is not None)
117 self.assertEqual(u'Thou should not pass', realm)
118@@ -514,7 +521,7 @@
119 offsets = [(start, end - start + 1) for start, end in ranges]
120 coalesce = transport.Transport._coalesce_offsets
121 coalesced = list(coalesce(offsets, limit=0, fudge_factor=0))
122- range_header = http.HttpTransport._range_header
123+ range_header = HttpTransport._range_header
124 self.assertEqual(value, range_header(coalesced, tail))
125
126 def test_range_header_single(self):
127@@ -1129,13 +1136,13 @@
128 """
129
130 def _proxied_request(self):
131- handler = http.ProxyHandler()
132- request = http.Request('GET', 'http://baz/buzzle')
133+ handler = ProxyHandler()
134+ request = Request('GET', 'http://baz/buzzle')
135 handler.set_proxy(request, 'http')
136 return request
137
138 def assertEvaluateProxyBypass(self, expected, host, no_proxy):
139- handler = http.ProxyHandler()
140+ handler = ProxyHandler()
141 self.assertEqual(expected,
142 handler.evaluate_proxy_bypass(host, no_proxy))
143
144@@ -1327,24 +1334,24 @@
145 self.get_new_transport().get('a').read())
146
147
148-class RedirectedRequest(http.Request):
149+class RedirectedRequest(Request):
150 """Request following redirections. """
151
152- init_orig = http.Request.__init__
153+ init_orig = Request.__init__
154
155 def __init__(self, method, url, *args, **kwargs):
156 """Constructor.
157
158 """
159 # Since the tests using this class will replace
160- # http.Request, we can't just call the base class __init__
161+ # Request, we can't just call the base class __init__
162 # or we'll loop.
163 RedirectedRequest.init_orig(self, method, url, *args, **kwargs)
164 self.follow_redirections = True
165
166
167 def install_redirected_request(test):
168- test.overrideAttr(http, 'Request', RedirectedRequest)
169+ test.overrideAttr(urllib, 'Request', RedirectedRequest)
170
171
172 def cleanup_http_redirection_connections(test):
173@@ -1361,13 +1368,13 @@
174 test.http_connect_orig(connection)
175 test.addCleanup(socket_disconnect, connection.sock)
176 test.http_connect_orig = test.overrideAttr(
177- http.HTTPConnection, 'connect', connect)
178+ HTTPConnection, 'connect', connect)
179
180 def connect(connection):
181 test.https_connect_orig(connection)
182 test.addCleanup(socket_disconnect, connection.sock)
183 test.https_connect_orig = test.overrideAttr(
184- http.HTTPSConnection, 'connect', connect)
185+ HTTPSConnection, 'connect', connect)
186
187
188 class TestHTTPSilentRedirections(http_utils.TestCaseWithRedirectedWebserver):
189@@ -1375,7 +1382,7 @@
190
191 http implementations do not redirect silently anymore (they
192 do not redirect at all in fact). The mechanism is still in
193- place at the http.Request level and these tests
194+ place at the Request level and these tests
195 exercise it.
196 """
197
198@@ -1499,7 +1506,7 @@
199 password = 'foo'
200 _setup_authentication_config(scheme='http', host='localhost',
201 user=user, password=password)
202- handler = http.HTTPAuthHandler()
203+ handler = HTTPAuthHandler()
204 got_pass = handler.get_user_password(dict(
205 user='joe',
206 protocol='http',
207
208=== modified file 'breezy/tests/test_http_response.py'
209--- breezy/tests/test_http_response.py 2020-06-09 17:24:27 +0000
210+++ breezy/tests/test_http_response.py 2020-12-27 18:07:47 +0000
211@@ -57,7 +57,7 @@
212 )
213 from ..transport.http import (
214 response,
215- HTTPConnection,
216+ urllib,
217 )
218 from .file_utils import (
219 FakeReadFile,
220@@ -74,10 +74,10 @@
221 return self.readfile
222
223
224-class FakeHTTPConnection(HTTPConnection):
225+class FakeHTTPConnection(urllib.HTTPConnection):
226
227 def __init__(self, sock):
228- HTTPConnection.__init__(self, 'localhost')
229+ urllib.HTTPConnection.__init__(self, 'localhost')
230 # Set the socket to bypass the connection
231 self.sock = sock
232
233
234=== modified file 'breezy/tests/test_selftest.py'
235--- breezy/tests/test_selftest.py 2020-06-21 02:15:25 +0000
236+++ breezy/tests/test_selftest.py 2020-12-27 18:07:47 +0000
237@@ -692,7 +692,7 @@
238
239 def test_get_readonly_url_http(self):
240 from .http_server import HttpServer
241- from ..transport.http import HttpTransport
242+ from ..transport.http.urllib import HttpTransport
243 self.transport_server = test_server.LocalURLServer
244 self.transport_readonly_server = HttpServer
245 # calling get_readonly_transport() gives us a HTTP server instance.
246
247=== modified file 'breezy/tests/test_transport.py'
248--- breezy/tests/test_transport.py 2019-03-03 19:49:20 +0000
249+++ breezy/tests/test_transport.py 2020-12-27 18:07:47 +0000
250@@ -40,6 +40,7 @@
251 pathfilter,
252 readonly,
253 )
254+from ..transport.http import urllib
255 import breezy.transport.trace
256 from . import (
257 features,
258@@ -1104,6 +1105,6 @@
259
260 def test_truncation(self):
261 fake_html = "<p>something!\n" * 1000
262- result = http.unhtml_roughly(fake_html)
263+ result = urllib.unhtml_roughly(fake_html)
264 self.assertEqual(len(result), 1000)
265 self.assertStartsWith(result, " something!")
266
267=== modified file 'breezy/transport/__init__.py'
268--- breezy/transport/__init__.py 2020-06-21 02:15:25 +0000
269+++ breezy/transport/__init__.py 2020-12-27 18:07:47 +0000
270@@ -1661,12 +1661,12 @@
271 register_transport_proto('http+urllib://',
272 # help="Read-only access of branches exported on the web."
273 register_netloc=True)
274-register_lazy_transport('http+urllib://', 'breezy.transport.http',
275+register_lazy_transport('http+urllib://', 'breezy.transport.http.urllib',
276 'HttpTransport')
277 register_transport_proto('https+urllib://',
278 # help="Read-only access of branches exported on the web using SSL."
279 register_netloc=True)
280-register_lazy_transport('https+urllib://', 'breezy.transport.http',
281+register_lazy_transport('https+urllib://', 'breezy.transport.http.urllib',
282 'HttpTransport')
283 # Default http transports (last declared wins (if it can be imported))
284 register_transport_proto('http://',
285@@ -1674,9 +1674,9 @@
286 register_transport_proto('https://',
287 help="Read-only access of branches exported on the web using SSL.")
288 # The default http implementation is urllib
289-register_lazy_transport('http://', 'breezy.transport.http',
290+register_lazy_transport('http://', 'breezy.transport.http.urllib',
291 'HttpTransport')
292-register_lazy_transport('https://', 'breezy.transport.http',
293+register_lazy_transport('https://', 'breezy.transport.http.urllib',
294 'HttpTransport')
295
296 register_transport_proto(
297
298=== modified file 'breezy/transport/http/__init__.py'
299--- breezy/transport/http/__init__.py 2020-07-30 21:37:51 +0000
300+++ breezy/transport/http/__init__.py 2020-12-27 18:07:47 +0000
301@@ -21,67 +21,15 @@
302
303 from __future__ import absolute_import
304
305-DEBUG = 0
306
307-import base64
308-import cgi
309-import errno
310 import os
311-import re
312-import socket
313 import ssl
314 import sys
315-import time
316-import urllib
317-import weakref
318-
319-try:
320- import http.client as http_client
321-except ImportError:
322- import httplib as http_client
323-try:
324- import urllib.request as urllib_request
325-except ImportError: # python < 3
326- import urllib2 as urllib_request
327-try:
328- from urllib.parse import urljoin, splitport, splittype, splithost, urlencode
329-except ImportError:
330- from urlparse import urljoin
331- from urllib import splitport, splittype, splithost, urlencode
332-
333-# TODO: handle_response should be integrated into the http/__init__.py
334-from .response import handle_response
335-
336-# FIXME: Oversimplifying, two kind of exceptions should be
337-# raised, once a request is issued: URLError before we have been
338-# able to process the response, HTTPError after that. Process the
339-# response means we are able to leave the socket clean, so if we
340-# are not able to do that, we should close the connection. The
341-# actual code more or less do that, tests should be written to
342-# ensure that.
343-
344-from ... import __version__ as breezy_version
345+
346+
347 from ... import (
348+ version_string as breezy_version,
349 config,
350- debug,
351- errors,
352- lazy_import,
353- osutils,
354- trace,
355- transport,
356- ui,
357- urlutils,
358-)
359-from ...bzr.smart import medium
360-from ...sixish import (
361- PY3,
362- reraise,
363- text_type,
364-)
365-from ...trace import mutter
366-from ...transport import (
367- ConnectedTransport,
368- UnusableRedirect,
369 )
370
371
372@@ -181,2541 +129,3 @@
373 * none: Certificates ignored
374 * required: Certificates required and validated
375 """)
376-
377-checked_kerberos = False
378-kerberos = None
379-
380-
381-class addinfourl(urllib_request.addinfourl):
382- '''Replacement addinfourl class compatible with python-2.7's xmlrpclib
383-
384- In python-2.7, xmlrpclib expects that the response object that it receives
385- has a getheader method. http_client.HTTPResponse provides this but
386- urllib_request.addinfourl does not. Add the necessary functions here, ported to
387- use the internal data structures of addinfourl.
388- '''
389-
390- def getheader(self, name, default=None):
391- if self.headers is None:
392- raise http_client.ResponseNotReady()
393- return self.headers.getheader(name, default)
394-
395- def getheaders(self):
396- if self.headers is None:
397- raise http_client.ResponseNotReady()
398- return list(self.headers.items())
399-
400-
401-class _ReportingFileSocket(object):
402-
403- def __init__(self, filesock, report_activity=None):
404- self.filesock = filesock
405- self._report_activity = report_activity
406-
407- def report_activity(self, size, direction):
408- if self._report_activity:
409- self._report_activity(size, direction)
410-
411- def read(self, size=1):
412- s = self.filesock.read(size)
413- self.report_activity(len(s), 'read')
414- return s
415-
416- def readline(self, size=-1):
417- s = self.filesock.readline(size)
418- self.report_activity(len(s), 'read')
419- return s
420-
421- def readinto(self, b):
422- s = self.filesock.readinto(b)
423- self.report_activity(s, 'read')
424- return s
425-
426- def __getattr__(self, name):
427- return getattr(self.filesock, name)
428-
429-
430-class _ReportingSocket(object):
431-
432- def __init__(self, sock, report_activity=None):
433- self.sock = sock
434- self._report_activity = report_activity
435-
436- def report_activity(self, size, direction):
437- if self._report_activity:
438- self._report_activity(size, direction)
439-
440- def sendall(self, s, *args):
441- self.sock.sendall(s, *args)
442- self.report_activity(len(s), 'write')
443-
444- def recv(self, *args):
445- s = self.sock.recv(*args)
446- self.report_activity(len(s), 'read')
447- return s
448-
449- def makefile(self, mode='r', bufsize=-1):
450- # http_client creates a fileobject that doesn't do buffering, which
451- # makes fp.readline() very expensive because it only reads one byte
452- # at a time. So we wrap the socket in an object that forces
453- # sock.makefile to make a buffered file.
454- fsock = self.sock.makefile(mode, 65536)
455- # And wrap that into a reporting kind of fileobject
456- return _ReportingFileSocket(fsock, self._report_activity)
457-
458- def __getattr__(self, name):
459- return getattr(self.sock, name)
460-
461-
462-# We define our own Response class to keep our http_client pipe clean
463-class Response(http_client.HTTPResponse):
464- """Custom HTTPResponse, to avoid the need to decorate.
465-
466- http_client prefers to decorate the returned objects, rather
467- than using a custom object.
468- """
469-
470- # Some responses have bodies in which we have no interest
471- _body_ignored_responses = [301, 302, 303, 307, 308, 400, 401, 403, 404, 501]
472-
473- # in finish() below, we may have to discard several MB in the worst
474- # case. To avoid buffering that much, we read and discard by chunks
475- # instead. The underlying file is either a socket or a StringIO, so reading
476- # 8k chunks should be fine.
477- _discarded_buf_size = 8192
478-
479- if PY3:
480- def __init__(self, sock, debuglevel=0, method=None, url=None):
481- self.url = url
482- super(Response, self).__init__(
483- sock, debuglevel=debuglevel, method=method, url=url)
484-
485- def begin(self):
486- """Begin to read the response from the server.
487-
488- http_client assumes that some responses get no content and do
489- not even attempt to read the body in that case, leaving
490- the body in the socket, blocking the next request. Let's
491- try to workaround that.
492- """
493- http_client.HTTPResponse.begin(self)
494- if self.status in self._body_ignored_responses:
495- if self.debuglevel >= 2:
496- print("For status: [%s], will ready body, length: %s" % (
497- self.status, self.length))
498- if not (self.length is None or self.will_close):
499- # In some cases, we just can't read the body not
500- # even try or we may encounter a 104, 'Connection
501- # reset by peer' error if there is indeed no body
502- # and the server closed the connection just after
503- # having issued the response headers (even if the
504- # headers indicate a Content-Type...)
505- body = self.read(self.length)
506- if self.debuglevel >= 9:
507- # This one can be huge and is generally not interesting
508- print("Consumed body: [%s]" % body)
509- self.close()
510- elif self.status == 200:
511- # Whatever the request is, it went ok, so we surely don't want to
512- # close the connection. Some cases are not correctly detected by
513- # http_client.HTTPConnection.getresponse (called by
514- # http_client.HTTPResponse.begin). The CONNECT response for the https
515- # through proxy case is one. Note: the 'will_close' below refers
516- # to the "true" socket between us and the server, whereas the
517- # 'close()' above refers to the copy of that socket created by
518- # http_client for the response itself. So, in the if above we close the
519- # socket to indicate that we are done with the response whereas
520- # below we keep the socket with the server opened.
521- self.will_close = False
522-
523- def finish(self):
524- """Finish reading the body.
525-
526- In some cases, the client may have left some bytes to read in the
527- body. That will block the next request to succeed if we use a
528- persistent connection. If we don't use a persistent connection, well,
529- nothing will block the next request since a new connection will be
530- issued anyway.
531-
532- :return: the number of bytes left on the socket (may be None)
533- """
534- pending = None
535- if not self.isclosed():
536- # Make sure nothing was left to be read on the socket
537- pending = 0
538- data = True
539- while data and self.length:
540- # read() will update self.length
541- data = self.read(min(self.length, self._discarded_buf_size))
542- pending += len(data)
543- if pending:
544- trace.mutter("%s bytes left on the HTTP socket", pending)
545- self.close()
546- return pending
547-
548-
549-# Not inheriting from 'object' because http_client.HTTPConnection doesn't.
550-class AbstractHTTPConnection:
551- """A custom HTTP(S) Connection, which can reset itself on a bad response"""
552-
553- response_class = Response
554-
555- # When we detect a server responding with the whole file to range requests,
556- # we want to warn. But not below a given thresold.
557- _range_warning_thresold = 1024 * 1024
558-
559- def __init__(self, report_activity=None):
560- self._response = None
561- self._report_activity = report_activity
562- self._ranges_received_whole_file = None
563-
564- def _mutter_connect(self):
565- netloc = '%s:%s' % (self.host, self.port)
566- if self.proxied_host is not None:
567- netloc += '(proxy for %s)' % self.proxied_host
568- trace.mutter('* About to connect() to %s' % netloc)
569-
570- def getresponse(self):
571- """Capture the response to be able to cleanup"""
572- self._response = http_client.HTTPConnection.getresponse(self)
573- return self._response
574-
575- def cleanup_pipe(self):
576- """Read the remaining bytes of the last response if any."""
577- if self._response is not None:
578- try:
579- pending = self._response.finish()
580- # Warn the user (once)
581- if (self._ranges_received_whole_file is None
582- and self._response.status == 200
583- and pending
584- and pending > self._range_warning_thresold):
585- self._ranges_received_whole_file = True
586- trace.warning(
587- 'Got a 200 response when asking for multiple ranges,'
588- ' does your server at %s:%s support range requests?',
589- self.host, self.port)
590- except socket.error as e:
591- # It's conceivable that the socket is in a bad state here
592- # (including some test cases) and in this case, it doesn't need
593- # cleaning anymore, so no need to fail, we just get rid of the
594- # socket and let callers reconnect
595- if (len(e.args) == 0
596- or e.args[0] not in (errno.ECONNRESET, errno.ECONNABORTED)):
597- raise
598- self.close()
599- self._response = None
600- # Preserve our preciousss
601- sock = self.sock
602- self.sock = None
603- # Let http_client.HTTPConnection do its housekeeping
604- self.close()
605- # Restore our preciousss
606- self.sock = sock
607-
608- def _wrap_socket_for_reporting(self, sock):
609- """Wrap the socket before anybody use it."""
610- self.sock = _ReportingSocket(sock, self._report_activity)
611-
612-
613-class HTTPConnection(AbstractHTTPConnection, http_client.HTTPConnection):
614-
615- # XXX: Needs refactoring at the caller level.
616- def __init__(self, host, port=None, proxied_host=None,
617- report_activity=None, ca_certs=None):
618- AbstractHTTPConnection.__init__(self, report_activity=report_activity)
619- if PY3:
620- http_client.HTTPConnection.__init__(self, host, port)
621- else:
622- # Use strict=True since we don't support HTTP/0.9
623- http_client.HTTPConnection.__init__(self, host, port, strict=True)
624- self.proxied_host = proxied_host
625- # ca_certs is ignored, it's only relevant for https
626-
627- def connect(self):
628- if 'http' in debug.debug_flags:
629- self._mutter_connect()
630- http_client.HTTPConnection.connect(self)
631- self._wrap_socket_for_reporting(self.sock)
632-
633-
634-class HTTPSConnection(AbstractHTTPConnection, http_client.HTTPSConnection):
635-
636- def __init__(self, host, port=None, key_file=None, cert_file=None,
637- proxied_host=None,
638- report_activity=None, ca_certs=None):
639- AbstractHTTPConnection.__init__(self, report_activity=report_activity)
640- if PY3:
641- http_client.HTTPSConnection.__init__(
642- self, host, port, key_file, cert_file)
643- else:
644- # Use strict=True since we don't support HTTP/0.9
645- http_client.HTTPSConnection.__init__(self, host, port,
646- key_file, cert_file, strict=True)
647- self.proxied_host = proxied_host
648- self.ca_certs = ca_certs
649-
650- def connect(self):
651- if 'http' in debug.debug_flags:
652- self._mutter_connect()
653- http_client.HTTPConnection.connect(self)
654- self._wrap_socket_for_reporting(self.sock)
655- if self.proxied_host is None:
656- self.connect_to_origin()
657-
658- def connect_to_origin(self):
659- # FIXME JRV 2011-12-18: Use location config here?
660- config_stack = config.GlobalStack()
661- cert_reqs = config_stack.get('ssl.cert_reqs')
662- if self.proxied_host is not None:
663- host = self.proxied_host.split(":", 1)[0]
664- else:
665- host = self.host
666- if cert_reqs == ssl.CERT_NONE:
667- ui.ui_factory.show_user_warning('not_checking_ssl_cert', host=host)
668- ui.ui_factory.suppressed_warnings.add('not_checking_ssl_cert')
669- ca_certs = None
670- else:
671- if self.ca_certs is None:
672- ca_certs = config_stack.get('ssl.ca_certs')
673- else:
674- ca_certs = self.ca_certs
675- if ca_certs is None:
676- trace.warning(
677- "No valid trusted SSL CA certificates file set. See "
678- "'brz help ssl.ca_certs' for more information on setting "
679- "trusted CAs.")
680- try:
681- ssl_context = ssl.create_default_context(
682- purpose=ssl.Purpose.SERVER_AUTH, cafile=ca_certs)
683- ssl_context.check_hostname = cert_reqs != ssl.CERT_NONE
684- if self.cert_file:
685- ssl_context.load_cert_chain(
686- keyfile=self.key_file, certfile=self.cert_file)
687- ssl_context.verify_mode = cert_reqs
688- ssl_sock = ssl_context.wrap_socket(
689- self.sock, server_hostname=self.host)
690- except ssl.SSLError:
691- trace.note(
692- "\n"
693- "See `brz help ssl.ca_certs` for how to specify trusted CA"
694- "certificates.\n"
695- "Pass -Ossl.cert_reqs=none to disable certificate "
696- "verification entirely.\n")
697- raise
698- # Wrap the ssl socket before anybody use it
699- self._wrap_socket_for_reporting(ssl_sock)
700-
701-
702-class Request(urllib_request.Request):
703- """A custom Request object.
704-
705- urllib_request determines the request method heuristically (based on
706- the presence or absence of data). We set the method
707- statically.
708-
709- The Request object tracks:
710- - the connection the request will be made on.
711- - the authentication parameters needed to preventively set
712- the authentication header once a first authentication have
713- been made.
714- """
715-
716- def __init__(self, method, url, data=None, headers={},
717- origin_req_host=None, unverifiable=False,
718- connection=None, parent=None):
719- urllib_request.Request.__init__(
720- self, url, data, headers,
721- origin_req_host, unverifiable)
722- self.method = method
723- self.connection = connection
724- # To handle redirections
725- self.parent = parent
726- self.redirected_to = None
727- # Unless told otherwise, redirections are not followed
728- self.follow_redirections = False
729- # auth and proxy_auth are dicts containing, at least
730- # (scheme, host, port, realm, user, password, protocol, path).
731- # The dict entries are mostly handled by the AuthHandler.
732- # Some authentication schemes may add more entries.
733- self.auth = {}
734- self.proxy_auth = {}
735- self.proxied_host = None
736-
737- def get_method(self):
738- return self.method
739-
740- def set_proxy(self, proxy, type):
741- """Set the proxy and remember the proxied host."""
742- if PY3:
743- host, port = splitport(self.host)
744- else:
745- host, port = splitport(self.get_host())
746- if port is None:
747- # We need to set the default port ourselves way before it gets set
748- # in the HTTP[S]Connection object at build time.
749- if self.type == 'https':
750- conn_class = HTTPSConnection
751- else:
752- conn_class = HTTPConnection
753- port = conn_class.default_port
754- self.proxied_host = '%s:%s' % (host, port)
755- urllib_request.Request.set_proxy(self, proxy, type)
756- # When urllib_request makes a https request with our wrapper code and a proxy,
757- # it sets Host to the https proxy, not the host we want to talk to.
758- # I'm fairly sure this is our fault, but what is the cause is an open
759- # question. -- Robert Collins May 8 2010.
760- self.add_unredirected_header('Host', self.proxied_host)
761-
762-
763-class _ConnectRequest(Request):
764-
765- def __init__(self, request):
766- """Constructor
767-
768- :param request: the first request sent to the proxied host, already
769- processed by the opener (i.e. proxied_host is already set).
770- """
771- # We give a fake url and redefine selector or urllib_request will be
772- # confused
773- Request.__init__(self, 'CONNECT', request.get_full_url(),
774- connection=request.connection)
775- if request.proxied_host is None:
776- raise AssertionError()
777- self.proxied_host = request.proxied_host
778-
779- @property
780- def selector(self):
781- return self.proxied_host
782-
783- def get_selector(self):
784- return self.selector
785-
786- def set_proxy(self, proxy, type):
787- """Set the proxy without remembering the proxied host.
788-
789- We already know the proxied host by definition, the CONNECT request
790- occurs only when the connection goes through a proxy. The usual
791- processing (masquerade the request so that the connection is done to
792- the proxy while the request is targeted at another host) does not apply
793- here. In fact, the connection is already established with proxy and we
794- just want to enable the SSL tunneling.
795- """
796- urllib_request.Request.set_proxy(self, proxy, type)
797-
798-
799-class ConnectionHandler(urllib_request.BaseHandler):
800- """Provides connection-sharing by pre-processing requests.
801-
802- urllib_request provides no way to access the HTTPConnection object
803- internally used. But we need it in order to achieve
804- connection sharing. So, we add it to the request just before
805- it is processed, and then we override the do_open method for
806- http[s] requests in AbstractHTTPHandler.
807- """
808-
809- handler_order = 1000 # after all pre-processings
810-
811- def __init__(self, report_activity=None, ca_certs=None):
812- self._report_activity = report_activity
813- self.ca_certs = ca_certs
814-
815- def create_connection(self, request, http_connection_class):
816- host = request.host
817- if not host:
818- # Just a bit of paranoia here, this should have been
819- # handled in the higher levels
820- raise urlutils.InvalidURL(request.get_full_url(), 'no host given.')
821-
822- # We create a connection (but it will not connect until the first
823- # request is made)
824- try:
825- connection = http_connection_class(
826- host, proxied_host=request.proxied_host,
827- report_activity=self._report_activity,
828- ca_certs=self.ca_certs)
829- except http_client.InvalidURL as exception:
830- # There is only one occurrence of InvalidURL in http_client
831- raise urlutils.InvalidURL(request.get_full_url(),
832- extra='nonnumeric port')
833-
834- return connection
835-
836- def capture_connection(self, request, http_connection_class):
837- """Capture or inject the request connection.
838-
839- Two cases:
840- - the request have no connection: create a new one,
841-
842- - the request have a connection: this one have been used
843- already, let's capture it, so that we can give it to
844- another transport to be reused. We don't do that
845- ourselves: the Transport object get the connection from
846- a first request and then propagate it, from request to
847- request or to cloned transports.
848- """
849- connection = request.connection
850- if connection is None:
851- # Create a new one
852- connection = self.create_connection(request, http_connection_class)
853- request.connection = connection
854-
855- # All connections will pass here, propagate debug level
856- connection.set_debuglevel(DEBUG)
857- return request
858-
859- def http_request(self, request):
860- return self.capture_connection(request, HTTPConnection)
861-
862- def https_request(self, request):
863- return self.capture_connection(request, HTTPSConnection)
864-
865-
866-class AbstractHTTPHandler(urllib_request.AbstractHTTPHandler):
867- """A custom handler for HTTP(S) requests.
868-
869- We overrive urllib_request.AbstractHTTPHandler to get a better
870- control of the connection, the ability to implement new
871- request types and return a response able to cope with
872- persistent connections.
873- """
874-
875- # We change our order to be before urllib_request HTTP[S]Handlers
876- # and be chosen instead of them (the first http_open called
877- # wins).
878- handler_order = 400
879-
880- _default_headers = {'Pragma': 'no-cache',
881- 'Cache-control': 'max-age=0',
882- 'Connection': 'Keep-Alive',
883- 'User-agent': default_user_agent(),
884- 'Accept': '*/*',
885- }
886-
887- def __init__(self):
888- urllib_request.AbstractHTTPHandler.__init__(self, debuglevel=DEBUG)
889-
890- def http_request(self, request):
891- """Common headers setting"""
892-
893- for name, value in self._default_headers.items():
894- if name not in request.headers:
895- request.headers[name] = value
896- # FIXME: We may have to add the Content-Length header if
897- # we have data to send.
898- return request
899-
900- def retry_or_raise(self, http_class, request, first_try):
901- """Retry the request (once) or raise the exception.
902-
903- urllib_request raises exception of application level kind, we
904- just have to translate them.
905-
906- http_client can raise exceptions of transport level (badly
907- formatted dialog, loss of connexion or socket level
908- problems). In that case we should issue the request again
909- (http_client will close and reopen a new connection if
910- needed).
911- """
912- # When an exception occurs, we give back the original
913- # Traceback or the bugs are hard to diagnose.
914- exc_type, exc_val, exc_tb = sys.exc_info()
915- if exc_type == socket.gaierror:
916- # No need to retry, that will not help
917- if PY3:
918- origin_req_host = request.origin_req_host
919- else:
920- origin_req_host = request.get_origin_req_host()
921- raise errors.ConnectionError("Couldn't resolve host '%s'"
922- % origin_req_host,
923- orig_error=exc_val)
924- elif isinstance(exc_val, http_client.ImproperConnectionState):
925- # The http_client pipeline is in incorrect state, it's a bug in our
926- # implementation.
927- reraise(exc_type, exc_val, exc_tb)
928- else:
929- if first_try:
930- if self._debuglevel >= 2:
931- print('Received exception: [%r]' % exc_val)
932- print(' On connection: [%r]' % request.connection)
933- method = request.get_method()
934- url = request.get_full_url()
935- print(' Will retry, %s %r' % (method, url))
936- request.connection.close()
937- response = self.do_open(http_class, request, False)
938- else:
939- if self._debuglevel >= 2:
940- print('Received second exception: [%r]' % exc_val)
941- print(' On connection: [%r]' % request.connection)
942- if exc_type in (http_client.BadStatusLine, http_client.UnknownProtocol):
943- # http_client.BadStatusLine and
944- # http_client.UnknownProtocol indicates that a
945- # bogus server was encountered or a bad
946- # connection (i.e. transient errors) is
947- # experimented, we have already retried once
948- # for that request so we raise the exception.
949- my_exception = errors.InvalidHttpResponse(
950- request.get_full_url(),
951- 'Bad status line received',
952- orig_error=exc_val)
953- elif (isinstance(exc_val, socket.error) and len(exc_val.args)
954- and exc_val.args[0] in (errno.ECONNRESET, 10053, 10054)):
955- # 10053 == WSAECONNABORTED
956- # 10054 == WSAECONNRESET
957- raise errors.ConnectionReset(
958- "Connection lost while sending request.")
959- else:
960- # All other exception are considered connection related.
961-
962- # socket errors generally occurs for reasons
963- # far outside our scope, so closing the
964- # connection and retrying is the best we can
965- # do.
966- if PY3:
967- selector = request.selector
968- else:
969- selector = request.get_selector()
970- my_exception = errors.ConnectionError(
971- msg='while sending %s %s:' % (request.get_method(),
972- selector),
973- orig_error=exc_val)
974-
975- if self._debuglevel >= 2:
976- print('On connection: [%r]' % request.connection)
977- method = request.get_method()
978- url = request.get_full_url()
979- print(' Failed again, %s %r' % (method, url))
980- print(' Will raise: [%r]' % my_exception)
981- reraise(type(my_exception), my_exception, exc_tb)
982- return response
983-
984- def do_open(self, http_class, request, first_try=True):
985- """See urllib_request.AbstractHTTPHandler.do_open for the general idea.
986-
987- The request will be retried once if it fails.
988- """
989- connection = request.connection
990- if connection is None:
991- raise AssertionError(
992- 'Cannot process a request without a connection')
993-
994- # Get all the headers
995- headers = {}
996- headers.update(request.header_items())
997- headers.update(request.unredirected_hdrs)
998- # Some servers or proxies will choke on headers not properly
999- # cased. http_client/urllib/urllib_request all use capitalize to get canonical
1000- # header names, but only python2.5 urllib_request use title() to fix them just
1001- # before sending the request. And not all versions of python 2.5 do
1002- # that. Since we replace urllib_request.AbstractHTTPHandler.do_open we do it
1003- # ourself below.
1004- headers = {name.title(): val for name, val in headers.items()}
1005-
1006- try:
1007- method = request.get_method()
1008- if PY3:
1009- url = request.selector
1010- else:
1011- url = request.get_selector()
1012- if sys.version_info[:2] >= (3, 6):
1013- connection._send_request(method, url,
1014- # FIXME: implements 100-continue
1015- # None, # We don't send the body yet
1016- request.data,
1017- headers, encode_chunked=False)
1018- else:
1019- connection._send_request(method, url,
1020- # FIXME: implements 100-continue
1021- # None, # We don't send the body yet
1022- request.data,
1023- headers)
1024- if 'http' in debug.debug_flags:
1025- trace.mutter('> %s %s' % (method, url))
1026- hdrs = []
1027- for k, v in headers.items():
1028- # People are often told to paste -Dhttp output to help
1029- # debug. Don't compromise credentials.
1030- if k in ('Authorization', 'Proxy-Authorization'):
1031- v = '<masked>'
1032- hdrs.append('%s: %s' % (k, v))
1033- trace.mutter('> ' + '\n> '.join(hdrs) + '\n')
1034- if self._debuglevel >= 1:
1035- print('Request sent: [%r] from (%s)'
1036- % (request, request.connection.sock.getsockname()))
1037- response = connection.getresponse()
1038- convert_to_addinfourl = True
1039- except (ssl.SSLError, ssl.CertificateError):
1040- # Something is wrong with either the certificate or the hostname,
1041- # re-trying won't help
1042- raise
1043- except (socket.gaierror, http_client.BadStatusLine, http_client.UnknownProtocol,
1044- socket.error, http_client.HTTPException):
1045- response = self.retry_or_raise(http_class, request, first_try)
1046- convert_to_addinfourl = False
1047-
1048- if PY3:
1049- response.msg = response.reason
1050- return response
1051-
1052-# FIXME: HTTPConnection does not fully support 100-continue (the
1053-# server responses are just ignored)
1054-
1055-# if code == 100:
1056-# mutter('Will send the body')
1057-# # We can send the body now
1058-# body = request.data
1059-# if body is None:
1060-# raise URLError("No data given")
1061-# connection.send(body)
1062-# response = connection.getresponse()
1063-
1064- if self._debuglevel >= 2:
1065- print('Receives response: %r' % response)
1066- print(' For: %r(%r)' % (request.get_method(),
1067- request.get_full_url()))
1068-
1069- if convert_to_addinfourl:
1070- # Shamelessly copied from urllib_request
1071- req = request
1072- r = response
1073- r.recv = r.read
1074- fp = socket._fileobject(r, bufsize=65536)
1075- resp = addinfourl(fp, r.msg, req.get_full_url())
1076- resp.code = r.status
1077- resp.msg = r.reason
1078- resp.version = r.version
1079- if self._debuglevel >= 2:
1080- print('Create addinfourl: %r' % resp)
1081- print(' For: %r(%r)' % (request.get_method(),
1082- request.get_full_url()))
1083- if 'http' in debug.debug_flags:
1084- version = 'HTTP/%d.%d'
1085- try:
1086- version = version % (resp.version / 10,
1087- resp.version % 10)
1088- except:
1089- version = 'HTTP/%r' % resp.version
1090- trace.mutter('< %s %s %s' % (version, resp.code,
1091- resp.msg))
1092- # Use the raw header lines instead of treating resp.info() as a
1093- # dict since we may miss duplicated headers otherwise.
1094- hdrs = [h.rstrip('\r\n') for h in resp.info().headers]
1095- trace.mutter('< ' + '\n< '.join(hdrs) + '\n')
1096- else:
1097- resp = response
1098- return resp
1099-
1100-
1101-class HTTPHandler(AbstractHTTPHandler):
1102- """A custom handler that just thunks into HTTPConnection"""
1103-
1104- def http_open(self, request):
1105- return self.do_open(HTTPConnection, request)
1106-
1107-
1108-class HTTPSHandler(AbstractHTTPHandler):
1109- """A custom handler that just thunks into HTTPSConnection"""
1110-
1111- https_request = AbstractHTTPHandler.http_request
1112-
1113- def https_open(self, request):
1114- connection = request.connection
1115- if connection.sock is None and \
1116- connection.proxied_host is not None and \
1117- request.get_method() != 'CONNECT': # Don't loop
1118- # FIXME: We need a gazillion connection tests here, but we still
1119- # miss a https server :-( :
1120- # - with and without proxy
1121- # - with and without certificate
1122- # - with self-signed certificate
1123- # - with and without authentication
1124- # - with good and bad credentials (especially the proxy auth around
1125- # CONNECT)
1126- # - with basic and digest schemes
1127- # - reconnection on errors
1128- # - connection persistence behaviour (including reconnection)
1129-
1130- # We are about to connect for the first time via a proxy, we must
1131- # issue a CONNECT request first to establish the encrypted link
1132- connect = _ConnectRequest(request)
1133- response = self.parent.open(connect)
1134- if response.code != 200:
1135- raise errors.ConnectionError("Can't connect to %s via proxy %s" % (
1136- connect.proxied_host, self.host))
1137- # Housekeeping
1138- connection.cleanup_pipe()
1139- # Establish the connection encryption
1140- connection.connect_to_origin()
1141- # Propagate the connection to the original request
1142- request.connection = connection
1143- return self.do_open(HTTPSConnection, request)
1144-
1145-
1146-class HTTPRedirectHandler(urllib_request.HTTPRedirectHandler):
1147- """Handles redirect requests.
1148-
1149- We have to implement our own scheme because we use a specific
1150- Request object and because we want to implement a specific
1151- policy.
1152- """
1153- _debuglevel = DEBUG
1154- # RFC2616 says that only read requests should be redirected
1155- # without interacting with the user. But Breezy uses some
1156- # shortcuts to optimize against roundtrips which can leads to
1157- # write requests being issued before read requests of
1158- # containing dirs can be redirected. So we redirect write
1159- # requests in the same way which seems to respect the spirit
1160- # of the RFC if not its letter.
1161-
1162- def redirect_request(self, req, fp, code, msg, headers, newurl):
1163- """See urllib_request.HTTPRedirectHandler.redirect_request"""
1164- # We would have preferred to update the request instead
1165- # of creating a new one, but the urllib_request.Request object
1166- # has a too complicated creation process to provide a
1167- # simple enough equivalent update process. Instead, when
1168- # redirecting, we only update the following request in
1169- # the redirect chain with a reference to the parent
1170- # request .
1171-
1172- # Some codes make no sense in our context and are treated
1173- # as errors:
1174-
1175- # 300: Multiple choices for different representations of
1176- # the URI. Using that mechanisn with Breezy will violate the
1177- # protocol neutrality of Transport.
1178-
1179- # 304: Not modified (SHOULD only occurs with conditional
1180- # GETs which are not used by our implementation)
1181-
1182- # 305: Use proxy. I can't imagine this one occurring in
1183- # our context-- vila/20060909
1184-
1185- # 306: Unused (if the RFC says so...)
1186-
1187- # If the code is 302 and the request is HEAD, some may
1188- # think that it is a sufficent hint that the file exists
1189- # and that we MAY avoid following the redirections. But
1190- # if we want to be sure, we MUST follow them.
1191-
1192- if PY3:
1193- origin_req_host = req.origin_req_host
1194- else:
1195- origin_req_host = req.get_origin_req_host()
1196-
1197- if code in (301, 302, 303, 307, 308):
1198- return Request(req.get_method(), newurl,
1199- headers=req.headers,
1200- origin_req_host=origin_req_host,
1201- unverifiable=True,
1202- # TODO: It will be nice to be able to
1203- # detect virtual hosts sharing the same
1204- # IP address, that will allow us to
1205- # share the same connection...
1206- connection=None,
1207- parent=req,
1208- )
1209- else:
1210- raise urllib_request.HTTPError(
1211- req.get_full_url(), code, msg, headers, fp)
1212-
1213- def http_error_302(self, req, fp, code, msg, headers):
1214- """Requests the redirected to URI.
1215-
1216- Copied from urllib_request to be able to clean the pipe of the associated
1217- connection, *before* issuing the redirected request but *after* having
1218- eventually raised an error.
1219- """
1220- # Some servers (incorrectly) return multiple Location headers
1221- # (so probably same goes for URI). Use first header.
1222-
1223- # TODO: Once we get rid of addinfourl objects, the
1224- # following will need to be updated to use correct case
1225- # for headers.
1226- if 'location' in headers:
1227- newurl = headers.get('location')
1228- elif 'uri' in headers:
1229- newurl = headers.get('uri')
1230- else:
1231- return
1232-
1233- newurl = urljoin(req.get_full_url(), newurl)
1234-
1235- if self._debuglevel >= 1:
1236- print('Redirected to: %s (followed: %r)' % (newurl,
1237- req.follow_redirections))
1238- if req.follow_redirections is False:
1239- req.redirected_to = newurl
1240- return fp
1241-
1242- # This call succeeds or raise an error. urllib_request returns
1243- # if redirect_request returns None, but our
1244- # redirect_request never returns None.
1245- redirected_req = self.redirect_request(req, fp, code, msg, headers,
1246- newurl)
1247-
1248- # loop detection
1249- # .redirect_dict has a key url if url was previously visited.
1250- if hasattr(req, 'redirect_dict'):
1251- visited = redirected_req.redirect_dict = req.redirect_dict
1252- if (visited.get(newurl, 0) >= self.max_repeats or
1253- len(visited) >= self.max_redirections):
1254- raise urllib_request.HTTPError(req.get_full_url(), code,
1255- self.inf_msg + msg, headers, fp)
1256- else:
1257- visited = redirected_req.redirect_dict = req.redirect_dict = {}
1258- visited[newurl] = visited.get(newurl, 0) + 1
1259-
1260- # We can close the fp now that we are sure that we won't
1261- # use it with HTTPError.
1262- fp.close()
1263- # We have all we need already in the response
1264- req.connection.cleanup_pipe()
1265-
1266- return self.parent.open(redirected_req)
1267-
1268- http_error_301 = http_error_303 = http_error_307 = http_error_308 = http_error_302
1269-
1270-
1271-class ProxyHandler(urllib_request.ProxyHandler):
1272- """Handles proxy setting.
1273-
1274- Copied and modified from urllib_request to be able to modify the request during
1275- the request pre-processing instead of modifying it at _open time. As we
1276- capture (or create) the connection object during request processing, _open
1277- time was too late.
1278-
1279- The main task is to modify the request so that the connection is done to
1280- the proxy while the request still refers to the destination host.
1281-
1282- Note: the proxy handling *may* modify the protocol used; the request may be
1283- against an https server proxied through an http proxy. So, https_request
1284- will be called, but later it's really http_open that will be called. This
1285- explains why we don't have to call self.parent.open as the urllib_request did.
1286- """
1287-
1288- # Proxies must be in front
1289- handler_order = 100
1290- _debuglevel = DEBUG
1291-
1292- def __init__(self, proxies=None):
1293- urllib_request.ProxyHandler.__init__(self, proxies)
1294- # First, let's get rid of urllib_request implementation
1295- for type, proxy in self.proxies.items():
1296- if self._debuglevel >= 3:
1297- print('Will unbind %s_open for %r' % (type, proxy))
1298- delattr(self, '%s_open' % type)
1299-
1300- def bind_scheme_request(proxy, scheme):
1301- if proxy is None:
1302- return
1303- scheme_request = scheme + '_request'
1304- if self._debuglevel >= 3:
1305- print('Will bind %s for %r' % (scheme_request, proxy))
1306- setattr(self, scheme_request,
1307- lambda request: self.set_proxy(request, scheme))
1308- # We are interested only by the http[s] proxies
1309- http_proxy = self.get_proxy_env_var('http')
1310- bind_scheme_request(http_proxy, 'http')
1311- https_proxy = self.get_proxy_env_var('https')
1312- bind_scheme_request(https_proxy, 'https')
1313-
1314- def get_proxy_env_var(self, name, default_to='all'):
1315- """Get a proxy env var.
1316-
1317- Note that we indirectly rely on
1318- urllib.getproxies_environment taking into account the
1319- uppercased values for proxy variables.
1320- """
1321- try:
1322- return self.proxies[name.lower()]
1323- except KeyError:
1324- if default_to is not None:
1325- # Try to get the alternate environment variable
1326- try:
1327- return self.proxies[default_to]
1328- except KeyError:
1329- pass
1330- return None
1331-
1332- def proxy_bypass(self, host):
1333- """Check if host should be proxied or not.
1334-
1335- :returns: True to skip the proxy, False otherwise.
1336- """
1337- no_proxy = self.get_proxy_env_var('no', default_to=None)
1338- bypass = self.evaluate_proxy_bypass(host, no_proxy)
1339- if bypass is None:
1340- # Nevertheless, there are platform-specific ways to
1341- # ignore proxies...
1342- return urllib_request.proxy_bypass(host)
1343- else:
1344- return bypass
1345-
1346- def evaluate_proxy_bypass(self, host, no_proxy):
1347- """Check the host against a comma-separated no_proxy list as a string.
1348-
1349- :param host: ``host:port`` being requested
1350-
1351- :param no_proxy: comma-separated list of hosts to access directly.
1352-
1353- :returns: True to skip the proxy, False not to, or None to
1354- leave it to urllib.
1355- """
1356- if no_proxy is None:
1357- # All hosts are proxied
1358- return False
1359- hhost, hport = splitport(host)
1360- # Does host match any of the domains mentioned in
1361- # no_proxy ? The rules about what is authorized in no_proxy
1362- # are fuzzy (to say the least). We try to allow most
1363- # commonly seen values.
1364- for domain in no_proxy.split(','):
1365- domain = domain.strip()
1366- if domain == '':
1367- continue
1368- dhost, dport = splitport(domain)
1369- if hport == dport or dport is None:
1370- # Protect glob chars
1371- dhost = dhost.replace(".", r"\.")
1372- dhost = dhost.replace("*", r".*")
1373- dhost = dhost.replace("?", r".")
1374- if re.match(dhost, hhost, re.IGNORECASE):
1375- return True
1376- # Nothing explicitly avoid the host
1377- return None
1378-
1379- def set_proxy(self, request, type):
1380- if PY3:
1381- host = request.host
1382- else:
1383- host = request.get_host()
1384- if self.proxy_bypass(host):
1385- return request
1386-
1387- proxy = self.get_proxy_env_var(type)
1388- if self._debuglevel >= 3:
1389- print('set_proxy %s_request for %r' % (type, proxy))
1390- # FIXME: python 2.5 urlparse provides a better _parse_proxy which can
1391- # grok user:password@host:port as well as
1392- # http://user:password@host:port
1393-
1394- parsed_url = transport.ConnectedTransport._split_url(proxy)
1395- if not parsed_url.host:
1396- raise urlutils.InvalidURL(proxy, 'No host component')
1397-
1398- if request.proxy_auth == {}:
1399- # No proxy auth parameter are available, we are handling the first
1400- # proxied request, intialize. scheme (the authentication scheme)
1401- # and realm will be set by the AuthHandler
1402- request.proxy_auth = {
1403- 'host': parsed_url.host,
1404- 'port': parsed_url.port,
1405- 'user': parsed_url.user,
1406- 'password': parsed_url.password,
1407- 'protocol': parsed_url.scheme,
1408- # We ignore path since we connect to a proxy
1409- 'path': None}
1410- if parsed_url.port is None:
1411- phost = parsed_url.host
1412- else:
1413- phost = parsed_url.host + ':%d' % parsed_url.port
1414- request.set_proxy(phost, type)
1415- if self._debuglevel >= 3:
1416- print('set_proxy: proxy set to %s://%s' % (type, phost))
1417- return request
1418-
1419-
1420-class AbstractAuthHandler(urllib_request.BaseHandler):
1421- """A custom abstract authentication handler for all http authentications.
1422-
1423- Provides the meat to handle authentication errors and
1424- preventively set authentication headers after the first
1425- successful authentication.
1426-
1427- This can be used for http and proxy, as well as for basic, negotiate and
1428- digest authentications.
1429-
1430- This provides an unified interface for all authentication handlers
1431- (urllib_request provides far too many with different policies).
1432-
1433- The interaction between this handler and the urllib_request
1434- framework is not obvious, it works as follow:
1435-
1436- opener.open(request) is called:
1437-
1438- - that may trigger http_request which will add an authentication header
1439- (self.build_header) if enough info is available.
1440-
1441- - the request is sent to the server,
1442-
1443- - if an authentication error is received self.auth_required is called,
1444- we acquire the authentication info in the error headers and call
1445- self.auth_match to check that we are able to try the
1446- authentication and complete the authentication parameters,
1447-
1448- - we call parent.open(request), that may trigger http_request
1449- and will add a header (self.build_header), but here we have
1450- all the required info (keep in mind that the request and
1451- authentication used in the recursive calls are really (and must be)
1452- the *same* objects).
1453-
1454- - if the call returns a response, the authentication have been
1455- successful and the request authentication parameters have been updated.
1456- """
1457-
1458- scheme = None
1459- """The scheme as it appears in the server header (lower cased)"""
1460-
1461- _max_retry = 3
1462- """We don't want to retry authenticating endlessly"""
1463-
1464- requires_username = True
1465- """Whether the auth mechanism requires a username."""
1466-
1467- # The following attributes should be defined by daughter
1468- # classes:
1469- # - auth_required_header: the header received from the server
1470- # - auth_header: the header sent in the request
1471-
1472- def __init__(self):
1473- # We want to know when we enter into an try/fail cycle of
1474- # authentications so we initialize to None to indicate that we aren't
1475- # in such a cycle by default.
1476- self._retry_count = None
1477-
1478- def _parse_auth_header(self, server_header):
1479- """Parse the authentication header.
1480-
1481- :param server_header: The value of the header sent by the server
1482- describing the authenticaion request.
1483-
1484- :return: A tuple (scheme, remainder) scheme being the first word in the
1485- given header (lower cased), remainder may be None.
1486- """
1487- try:
1488- scheme, remainder = server_header.split(None, 1)
1489- except ValueError:
1490- scheme = server_header
1491- remainder = None
1492- return (scheme.lower(), remainder)
1493-
1494- def update_auth(self, auth, key, value):
1495- """Update a value in auth marking the auth as modified if needed"""
1496- old_value = auth.get(key, None)
1497- if old_value != value:
1498- auth[key] = value
1499- auth['modified'] = True
1500-
1501- def auth_required(self, request, headers):
1502- """Retry the request if the auth scheme is ours.
1503-
1504- :param request: The request needing authentication.
1505- :param headers: The headers for the authentication error response.
1506- :return: None or the response for the authenticated request.
1507- """
1508- # Don't try to authenticate endlessly
1509- if self._retry_count is None:
1510- # The retry being recusrsive calls, None identify the first retry
1511- self._retry_count = 1
1512- else:
1513- self._retry_count += 1
1514- if self._retry_count > self._max_retry:
1515- # Let's be ready for next round
1516- self._retry_count = None
1517- return None
1518- if PY3:
1519- server_headers = headers.get_all(self.auth_required_header)
1520- else:
1521- server_headers = headers.getheaders(self.auth_required_header)
1522- if not server_headers:
1523- # The http error MUST have the associated
1524- # header. This must never happen in production code.
1525- trace.mutter('%s not found', self.auth_required_header)
1526- return None
1527-
1528- auth = self.get_auth(request)
1529- auth['modified'] = False
1530- # Put some common info in auth if the caller didn't
1531- if auth.get('path', None) is None:
1532- parsed_url = urlutils.URL.from_string(request.get_full_url())
1533- self.update_auth(auth, 'protocol', parsed_url.scheme)
1534- self.update_auth(auth, 'host', parsed_url.host)
1535- self.update_auth(auth, 'port', parsed_url.port)
1536- self.update_auth(auth, 'path', parsed_url.path)
1537- # FIXME: the auth handler should be selected at a single place instead
1538- # of letting all handlers try to match all headers, but the current
1539- # design doesn't allow a simple implementation.
1540- for server_header in server_headers:
1541- # Several schemes can be proposed by the server, try to match each
1542- # one in turn
1543- matching_handler = self.auth_match(server_header, auth)
1544- if matching_handler:
1545- # auth_match may have modified auth (by adding the
1546- # password or changing the realm, for example)
1547- if (request.get_header(self.auth_header, None) is not None
1548- and not auth['modified']):
1549- # We already tried that, give up
1550- return None
1551-
1552- # Only the most secure scheme proposed by the server should be
1553- # used, since the handlers use 'handler_order' to describe that
1554- # property, the first handler tried takes precedence, the
1555- # others should not attempt to authenticate if the best one
1556- # failed.
1557- best_scheme = auth.get('best_scheme', None)
1558- if best_scheme is None:
1559- # At that point, if current handler should doesn't succeed
1560- # the credentials are wrong (or incomplete), but we know
1561- # that the associated scheme should be used.
1562- best_scheme = auth['best_scheme'] = self.scheme
1563- if best_scheme != self.scheme:
1564- continue
1565-
1566- if self.requires_username and auth.get('user', None) is None:
1567- # Without a known user, we can't authenticate
1568- return None
1569-
1570- # Housekeeping
1571- request.connection.cleanup_pipe()
1572- # Retry the request with an authentication header added
1573- response = self.parent.open(request)
1574- if response:
1575- self.auth_successful(request, response)
1576- return response
1577- # We are not qualified to handle the authentication.
1578- # Note: the authentication error handling will try all
1579- # available handlers. If one of them authenticates
1580- # successfully, a response will be returned. If none of
1581- # them succeeds, None will be returned and the error
1582- # handler will raise the 401 'Unauthorized' or the 407
1583- # 'Proxy Authentication Required' error.
1584- return None
1585-
1586- def add_auth_header(self, request, header):
1587- """Add the authentication header to the request"""
1588- request.add_unredirected_header(self.auth_header, header)
1589-
1590- def auth_match(self, header, auth):
1591- """Check that we are able to handle that authentication scheme.
1592-
1593- The request authentication parameters may need to be
1594- updated with info from the server. Some of these
1595- parameters, when combined, are considered to be the
1596- authentication key, if one of them change the
1597- authentication result may change. 'user' and 'password'
1598- are exampls, but some auth schemes may have others
1599- (digest's nonce is an example, digest's nonce_count is a
1600- *counter-example*). Such parameters must be updated by
1601- using the update_auth() method.
1602-
1603- :param header: The authentication header sent by the server.
1604- :param auth: The auth parameters already known. They may be
1605- updated.
1606- :returns: True if we can try to handle the authentication.
1607- """
1608- raise NotImplementedError(self.auth_match)
1609-
1610- def build_auth_header(self, auth, request):
1611- """Build the value of the header used to authenticate.
1612-
1613- :param auth: The auth parameters needed to build the header.
1614- :param request: The request needing authentication.
1615-
1616- :return: None or header.
1617- """
1618- raise NotImplementedError(self.build_auth_header)
1619-
1620- def auth_successful(self, request, response):
1621- """The authentification was successful for the request.
1622-
1623- Additional infos may be available in the response.
1624-
1625- :param request: The succesfully authenticated request.
1626- :param response: The server response (may contain auth info).
1627- """
1628- # It may happen that we need to reconnect later, let's be ready
1629- self._retry_count = None
1630-
1631- def get_user_password(self, auth):
1632- """Ask user for a password if none is already available.
1633-
1634- :param auth: authentication info gathered so far (from the initial url
1635- and then during dialog with the server).
1636- """
1637- auth_conf = config.AuthenticationConfig()
1638- user = auth.get('user', None)
1639- password = auth.get('password', None)
1640- realm = auth['realm']
1641- port = auth.get('port', None)
1642-
1643- if user is None:
1644- user = auth_conf.get_user(auth['protocol'], auth['host'],
1645- port=port, path=auth['path'],
1646- realm=realm, ask=True,
1647- prompt=self.build_username_prompt(auth))
1648- if user is not None and password is None:
1649- password = auth_conf.get_password(
1650- auth['protocol'], auth['host'], user,
1651- port=port,
1652- path=auth['path'], realm=realm,
1653- prompt=self.build_password_prompt(auth))
1654-
1655- return user, password
1656-
1657- def _build_password_prompt(self, auth):
1658- """Build a prompt taking the protocol used into account.
1659-
1660- The AuthHandler is used by http and https, we want that information in
1661- the prompt, so we build the prompt from the authentication dict which
1662- contains all the needed parts.
1663-
1664- Also, http and proxy AuthHandlers present different prompts to the
1665- user. The daughter classes should implements a public
1666- build_password_prompt using this method.
1667- """
1668- prompt = u'%s' % auth['protocol'].upper() + u' %(user)s@%(host)s'
1669- realm = auth['realm']
1670- if realm is not None:
1671- prompt += u", Realm: '%s'" % realm
1672- prompt += u' password'
1673- return prompt
1674-
1675- def _build_username_prompt(self, auth):
1676- """Build a prompt taking the protocol used into account.
1677-
1678- The AuthHandler is used by http and https, we want that information in
1679- the prompt, so we build the prompt from the authentication dict which
1680- contains all the needed parts.
1681-
1682- Also, http and proxy AuthHandlers present different prompts to the
1683- user. The daughter classes should implements a public
1684- build_username_prompt using this method.
1685- """
1686- prompt = u'%s' % auth['protocol'].upper() + u' %(host)s'
1687- realm = auth['realm']
1688- if realm is not None:
1689- prompt += u", Realm: '%s'" % realm
1690- prompt += u' username'
1691- return prompt
1692-
1693- def http_request(self, request):
1694- """Insert an authentication header if information is available"""
1695- auth = self.get_auth(request)
1696- if self.auth_params_reusable(auth):
1697- self.add_auth_header(
1698- request, self.build_auth_header(auth, request))
1699- return request
1700-
1701- https_request = http_request # FIXME: Need test
1702-
1703-
1704-class NegotiateAuthHandler(AbstractAuthHandler):
1705- """A authentication handler that handles WWW-Authenticate: Negotiate.
1706-
1707- At the moment this handler supports just Kerberos. In the future,
1708- NTLM support may also be added.
1709- """
1710-
1711- scheme = 'negotiate'
1712- handler_order = 480
1713- requires_username = False
1714-
1715- def auth_match(self, header, auth):
1716- scheme, raw_auth = self._parse_auth_header(header)
1717- if scheme != self.scheme:
1718- return False
1719- self.update_auth(auth, 'scheme', scheme)
1720- resp = self._auth_match_kerberos(auth)
1721- if resp is None:
1722- return False
1723- # Optionally should try to authenticate using NTLM here
1724- self.update_auth(auth, 'negotiate_response', resp)
1725- return True
1726-
1727- def _auth_match_kerberos(self, auth):
1728- """Try to create a GSSAPI response for authenticating against a host."""
1729- global kerberos, checked_kerberos
1730- if kerberos is None and not checked_kerberos:
1731- try:
1732- import kerberos
1733- except ImportError:
1734- kerberos = None
1735- checked_kerberos = True
1736- if kerberos is None:
1737- return None
1738- ret, vc = kerberos.authGSSClientInit("HTTP@%(host)s" % auth)
1739- if ret < 1:
1740- trace.warning('Unable to create GSSAPI context for %s: %d',
1741- auth['host'], ret)
1742- return None
1743- ret = kerberos.authGSSClientStep(vc, "")
1744- if ret < 0:
1745- trace.mutter('authGSSClientStep failed: %d', ret)
1746- return None
1747- return kerberos.authGSSClientResponse(vc)
1748-
1749- def build_auth_header(self, auth, request):
1750- return "Negotiate %s" % auth['negotiate_response']
1751-
1752- def auth_params_reusable(self, auth):
1753- # If the auth scheme is known, it means a previous
1754- # authentication was successful, all information is
1755- # available, no further checks are needed.
1756- return (auth.get('scheme', None) == 'negotiate' and
1757- auth.get('negotiate_response', None) is not None)
1758-
1759-
1760-class BasicAuthHandler(AbstractAuthHandler):
1761- """A custom basic authentication handler."""
1762-
1763- scheme = 'basic'
1764- handler_order = 500
1765- auth_regexp = re.compile('realm="([^"]*)"', re.I)
1766-
1767- def build_auth_header(self, auth, request):
1768- raw = '%s:%s' % (auth['user'], auth['password'])
1769- auth_header = 'Basic ' + \
1770- base64.b64encode(raw.encode('utf-8')).decode('ascii')
1771- return auth_header
1772-
1773- def extract_realm(self, header_value):
1774- match = self.auth_regexp.search(header_value)
1775- realm = None
1776- if match:
1777- realm = match.group(1)
1778- return match, realm
1779-
1780- def auth_match(self, header, auth):
1781- scheme, raw_auth = self._parse_auth_header(header)
1782- if scheme != self.scheme:
1783- return False
1784-
1785- match, realm = self.extract_realm(raw_auth)
1786- if match:
1787- # Put useful info into auth
1788- self.update_auth(auth, 'scheme', scheme)
1789- self.update_auth(auth, 'realm', realm)
1790- if (auth.get('user', None) is None
1791- or auth.get('password', None) is None):
1792- user, password = self.get_user_password(auth)
1793- self.update_auth(auth, 'user', user)
1794- self.update_auth(auth, 'password', password)
1795- return match is not None
1796-
1797- def auth_params_reusable(self, auth):
1798- # If the auth scheme is known, it means a previous
1799- # authentication was successful, all information is
1800- # available, no further checks are needed.
1801- return auth.get('scheme', None) == 'basic'
1802-
1803-
1804-def get_digest_algorithm_impls(algorithm):
1805- H = None
1806- KD = None
1807- if algorithm == 'MD5':
1808- def H(x): return osutils.md5(x).hexdigest()
1809- elif algorithm == 'SHA':
1810- H = osutils.sha_string
1811- if H is not None:
1812- def KD(secret, data): return H(
1813- ("%s:%s" % (secret, data)).encode('utf-8'))
1814- return H, KD
1815-
1816-
1817-def get_new_cnonce(nonce, nonce_count):
1818- raw = '%s:%d:%s:%s' % (nonce, nonce_count, time.ctime(),
1819- osutils.rand_chars(8))
1820- return osutils.sha_string(raw.encode('utf-8'))[:16]
1821-
1822-
1823-class DigestAuthHandler(AbstractAuthHandler):
1824- """A custom digest authentication handler."""
1825-
1826- scheme = 'digest'
1827- # Before basic as digest is a bit more secure and should be preferred
1828- handler_order = 490
1829-
1830- def auth_params_reusable(self, auth):
1831- # If the auth scheme is known, it means a previous
1832- # authentication was successful, all information is
1833- # available, no further checks are needed.
1834- return auth.get('scheme', None) == 'digest'
1835-
1836- def auth_match(self, header, auth):
1837- scheme, raw_auth = self._parse_auth_header(header)
1838- if scheme != self.scheme:
1839- return False
1840-
1841- # Put the requested authentication info into a dict
1842- req_auth = urllib_request.parse_keqv_list(
1843- urllib_request.parse_http_list(raw_auth))
1844-
1845- # Check that we can handle that authentication
1846- qop = req_auth.get('qop', None)
1847- if qop != 'auth': # No auth-int so far
1848- return False
1849-
1850- H, KD = get_digest_algorithm_impls(req_auth.get('algorithm', 'MD5'))
1851- if H is None:
1852- return False
1853-
1854- realm = req_auth.get('realm', None)
1855- # Put useful info into auth
1856- self.update_auth(auth, 'scheme', scheme)
1857- self.update_auth(auth, 'realm', realm)
1858- if auth.get('user', None) is None or auth.get('password', None) is None:
1859- user, password = self.get_user_password(auth)
1860- self.update_auth(auth, 'user', user)
1861- self.update_auth(auth, 'password', password)
1862-
1863- try:
1864- if req_auth.get('algorithm', None) is not None:
1865- self.update_auth(auth, 'algorithm', req_auth.get('algorithm'))
1866- nonce = req_auth['nonce']
1867- if auth.get('nonce', None) != nonce:
1868- # A new nonce, never used
1869- self.update_auth(auth, 'nonce_count', 0)
1870- self.update_auth(auth, 'nonce', nonce)
1871- self.update_auth(auth, 'qop', qop)
1872- auth['opaque'] = req_auth.get('opaque', None)
1873- except KeyError:
1874- # Some required field is not there
1875- return False
1876-
1877- return True
1878-
1879- def build_auth_header(self, auth, request):
1880- if PY3:
1881- selector = request.selector
1882- else:
1883- selector = request.get_selector()
1884- url_scheme, url_selector = splittype(selector)
1885- sel_host, uri = splithost(url_selector)
1886-
1887- A1 = ('%s:%s:%s' %
1888- (auth['user'], auth['realm'], auth['password'])).encode('utf-8')
1889- A2 = ('%s:%s' % (request.get_method(), uri)).encode('utf-8')
1890-
1891- nonce = auth['nonce']
1892- qop = auth['qop']
1893-
1894- nonce_count = auth['nonce_count'] + 1
1895- ncvalue = '%08x' % nonce_count
1896- cnonce = get_new_cnonce(nonce, nonce_count)
1897-
1898- H, KD = get_digest_algorithm_impls(auth.get('algorithm', 'MD5'))
1899- nonce_data = '%s:%s:%s:%s:%s' % (nonce, ncvalue, cnonce, qop, H(A2))
1900- request_digest = KD(H(A1), nonce_data)
1901-
1902- header = 'Digest '
1903- header += 'username="%s", realm="%s", nonce="%s"' % (auth['user'],
1904- auth['realm'],
1905- nonce)
1906- header += ', uri="%s"' % uri
1907- header += ', cnonce="%s", nc=%s' % (cnonce, ncvalue)
1908- header += ', qop="%s"' % qop
1909- header += ', response="%s"' % request_digest
1910- # Append the optional fields
1911- opaque = auth.get('opaque', None)
1912- if opaque:
1913- header += ', opaque="%s"' % opaque
1914- if auth.get('algorithm', None):
1915- header += ', algorithm="%s"' % auth.get('algorithm')
1916-
1917- # We have used the nonce once more, update the count
1918- auth['nonce_count'] = nonce_count
1919-
1920- return header
1921-
1922-
1923-class HTTPAuthHandler(AbstractAuthHandler):
1924- """Custom http authentication handler.
1925-
1926- Send the authentication preventively to avoid the roundtrip
1927- associated with the 401 error and keep the revelant info in
1928- the auth request attribute.
1929- """
1930-
1931- auth_required_header = 'www-authenticate'
1932- auth_header = 'Authorization'
1933-
1934- def get_auth(self, request):
1935- """Get the auth params from the request"""
1936- return request.auth
1937-
1938- def set_auth(self, request, auth):
1939- """Set the auth params for the request"""
1940- request.auth = auth
1941-
1942- def build_password_prompt(self, auth):
1943- return self._build_password_prompt(auth)
1944-
1945- def build_username_prompt(self, auth):
1946- return self._build_username_prompt(auth)
1947-
1948- def http_error_401(self, req, fp, code, msg, headers):
1949- return self.auth_required(req, headers)
1950-
1951-
1952-class ProxyAuthHandler(AbstractAuthHandler):
1953- """Custom proxy authentication handler.
1954-
1955- Send the authentication preventively to avoid the roundtrip
1956- associated with the 407 error and keep the revelant info in
1957- the proxy_auth request attribute..
1958- """
1959-
1960- auth_required_header = 'proxy-authenticate'
1961- # FIXME: the correct capitalization is Proxy-Authorization,
1962- # but python-2.4 urllib_request.Request insist on using capitalize()
1963- # instead of title().
1964- auth_header = 'Proxy-authorization'
1965-
1966- def get_auth(self, request):
1967- """Get the auth params from the request"""
1968- return request.proxy_auth
1969-
1970- def set_auth(self, request, auth):
1971- """Set the auth params for the request"""
1972- request.proxy_auth = auth
1973-
1974- def build_password_prompt(self, auth):
1975- prompt = self._build_password_prompt(auth)
1976- prompt = u'Proxy ' + prompt
1977- return prompt
1978-
1979- def build_username_prompt(self, auth):
1980- prompt = self._build_username_prompt(auth)
1981- prompt = u'Proxy ' + prompt
1982- return prompt
1983-
1984- def http_error_407(self, req, fp, code, msg, headers):
1985- return self.auth_required(req, headers)
1986-
1987-
1988-class HTTPBasicAuthHandler(BasicAuthHandler, HTTPAuthHandler):
1989- """Custom http basic authentication handler"""
1990-
1991-
1992-class ProxyBasicAuthHandler(BasicAuthHandler, ProxyAuthHandler):
1993- """Custom proxy basic authentication handler"""
1994-
1995-
1996-class HTTPDigestAuthHandler(DigestAuthHandler, HTTPAuthHandler):
1997- """Custom http basic authentication handler"""
1998-
1999-
2000-class ProxyDigestAuthHandler(DigestAuthHandler, ProxyAuthHandler):
2001- """Custom proxy basic authentication handler"""
2002-
2003-
2004-class HTTPNegotiateAuthHandler(NegotiateAuthHandler, HTTPAuthHandler):
2005- """Custom http negotiate authentication handler"""
2006-
2007-
2008-class ProxyNegotiateAuthHandler(NegotiateAuthHandler, ProxyAuthHandler):
2009- """Custom proxy negotiate authentication handler"""
2010-
2011-
2012-class HTTPErrorProcessor(urllib_request.HTTPErrorProcessor):
2013- """Process HTTP error responses.
2014-
2015- We don't really process the errors, quite the contrary
2016- instead, we leave our Transport handle them.
2017- """
2018-
2019- accepted_errors = [200, # Ok
2020- 201,
2021- 202,
2022- 204,
2023- 206, # Partial content
2024- 400,
2025- 403,
2026- 404, # Not found
2027- 405, # Method not allowed
2028- 406, # Not Acceptable
2029- 409, # Conflict
2030- 416, # Range not satisfiable
2031- 422, # Unprocessible entity
2032- 501, # Not implemented
2033- ]
2034- """The error codes the caller will handle.
2035-
2036- This can be specialized in the request on a case-by case basis, but the
2037- common cases are covered here.
2038- """
2039-
2040- def http_response(self, request, response):
2041- code, msg, hdrs = response.code, response.msg, response.info()
2042-
2043- if code not in self.accepted_errors:
2044- response = self.parent.error('http', request, response,
2045- code, msg, hdrs)
2046- return response
2047-
2048- https_response = http_response
2049-
2050-
2051-class HTTPDefaultErrorHandler(urllib_request.HTTPDefaultErrorHandler):
2052- """Translate common errors into Breezy Exceptions"""
2053-
2054- def http_error_default(self, req, fp, code, msg, hdrs):
2055- if code == 403:
2056- raise errors.TransportError(
2057- 'Server refuses to fulfill the request (403 Forbidden)'
2058- ' for %s' % req.get_full_url())
2059- else:
2060- raise errors.UnexpectedHttpStatus(
2061- req.get_full_url(), code,
2062- 'Unable to handle http code: %s' % msg)
2063-
2064-
2065-class Opener(object):
2066- """A wrapper around urllib_request.build_opener
2067-
2068- Daughter classes can override to build their own specific opener
2069- """
2070- # TODO: Provides hooks for daughter classes.
2071-
2072- def __init__(self,
2073- connection=ConnectionHandler,
2074- redirect=HTTPRedirectHandler,
2075- error=HTTPErrorProcessor,
2076- report_activity=None,
2077- ca_certs=None):
2078- self._opener = urllib_request.build_opener(
2079- connection(report_activity=report_activity, ca_certs=ca_certs),
2080- redirect, error,
2081- ProxyHandler(),
2082- HTTPBasicAuthHandler(),
2083- HTTPDigestAuthHandler(),
2084- HTTPNegotiateAuthHandler(),
2085- ProxyBasicAuthHandler(),
2086- ProxyDigestAuthHandler(),
2087- ProxyNegotiateAuthHandler(),
2088- HTTPHandler,
2089- HTTPSHandler,
2090- HTTPDefaultErrorHandler,
2091- )
2092-
2093- self.open = self._opener.open
2094- if DEBUG >= 9:
2095- # When dealing with handler order, it's easy to mess
2096- # things up, the following will help understand which
2097- # handler is used, when and for what.
2098- import pprint
2099- pprint.pprint(self._opener.__dict__)
2100-
2101-
2102-class HttpTransport(ConnectedTransport):
2103- """HTTP Client implementations.
2104-
2105- The protocol can be given as e.g. http+urllib://host/ to use a particular
2106- implementation.
2107- """
2108-
2109- # _unqualified_scheme: "http" or "https"
2110- # _scheme: may have "+pycurl", etc
2111-
2112- # In order to debug we have to issue our traces in sync with
2113- # httplib, which use print :(
2114- _debuglevel = 0
2115-
2116- def __init__(self, base, _from_transport=None, ca_certs=None):
2117- """Set the base path where files will be stored."""
2118- proto_match = re.match(r'^(https?)(\+\w+)?://', base)
2119- if not proto_match:
2120- raise AssertionError("not a http url: %r" % base)
2121- self._unqualified_scheme = proto_match.group(1)
2122- super(HttpTransport, self).__init__(
2123- base, _from_transport=_from_transport)
2124- self._medium = None
2125- # range hint is handled dynamically throughout the life
2126- # of the transport object. We start by trying multi-range
2127- # requests and if the server returns bogus results, we
2128- # retry with single range requests and, finally, we
2129- # forget about range if the server really can't
2130- # understand. Once acquired, this piece of info is
2131- # propagated to clones.
2132- if _from_transport is not None:
2133- self._range_hint = _from_transport._range_hint
2134- self._opener = _from_transport._opener
2135- else:
2136- self._range_hint = 'multi'
2137- self._opener = Opener(
2138- report_activity=self._report_activity, ca_certs=ca_certs)
2139-
2140- def request(self, method, url, fields=None, headers=None, **urlopen_kw):
2141- body = urlopen_kw.pop('body', None)
2142- if fields is not None:
2143- data = urlencode(fields).encode()
2144- if body is not None:
2145- raise ValueError(
2146- 'body and fields are mutually exclusive')
2147- else:
2148- data = body
2149- if headers is None:
2150- headers = {}
2151- request = Request(method, url, data, headers)
2152- request.follow_redirections = (urlopen_kw.pop('retries', 0) > 0)
2153- if urlopen_kw:
2154- raise NotImplementedError(
2155- 'unknown arguments: %r' % urlopen_kw.keys())
2156- connection = self._get_connection()
2157- if connection is not None:
2158- # Give back shared info
2159- request.connection = connection
2160- (auth, proxy_auth) = self._get_credentials()
2161- # Clean the httplib.HTTPConnection pipeline in case the previous
2162- # request couldn't do it
2163- connection.cleanup_pipe()
2164- else:
2165- # First request, initialize credentials.
2166- # scheme and realm will be set by the _urllib2_wrappers.AuthHandler
2167- auth = self._create_auth()
2168- # Proxy initialization will be done by the first proxied request
2169- proxy_auth = dict()
2170- # Ensure authentication info is provided
2171- request.auth = auth
2172- request.proxy_auth = proxy_auth
2173-
2174- if self._debuglevel > 0:
2175- print('perform: %s base: %s, url: %s' % (request.method, self.base,
2176- request.get_full_url()))
2177- response = self._opener.open(request)
2178- if self._get_connection() is not request.connection:
2179- # First connection or reconnection
2180- self._set_connection(request.connection,
2181- (request.auth, request.proxy_auth))
2182- else:
2183- # http may change the credentials while keeping the
2184- # connection opened
2185- self._update_credentials((request.auth, request.proxy_auth))
2186-
2187- code = response.code
2188- if (request.follow_redirections is False
2189- and code in (301, 302, 303, 307, 308)):
2190- raise errors.RedirectRequested(request.get_full_url(),
2191- request.redirected_to,
2192- is_permanent=(code in (301, 308)))
2193-
2194- if request.redirected_to is not None:
2195- trace.mutter('redirected from: %s to: %s' % (request.get_full_url(),
2196- request.redirected_to))
2197-
2198- class Urllib3LikeResponse(object):
2199-
2200- def __init__(self, actual):
2201- self._actual = actual
2202- self._data = None
2203-
2204- def getheader(self, name, default=None):
2205- if self._actual.headers is None:
2206- raise http_client.ResponseNotReady()
2207- if PY3:
2208- return self._actual.headers.get(name, default)
2209- else:
2210- return self._actual.headers.getheader(name, default)
2211-
2212- def getheaders(self):
2213- if self._actual.headers is None:
2214- raise http_client.ResponseNotReady()
2215- return list(self._actual.headers.items())
2216-
2217- @property
2218- def status(self):
2219- return self._actual.code
2220-
2221- @property
2222- def reason(self):
2223- return self._actual.reason
2224-
2225- @property
2226- def data(self):
2227- if self._data is None:
2228- self._data = self._actual.read()
2229- return self._data
2230-
2231- @property
2232- def text(self):
2233- if self.status == 204:
2234- return None
2235- charset = cgi.parse_header(
2236- self._actual.headers['Content-Type'])[1].get('charset')
2237- if charset:
2238- return self.data.decode(charset)
2239- else:
2240- return self.data.decode()
2241-
2242- def read(self, amt=None):
2243- return self._actual.read(amt)
2244-
2245- def readlines(self):
2246- return self._actual.readlines()
2247-
2248- def readline(self, size=-1):
2249- return self._actual.readline(size)
2250-
2251- return Urllib3LikeResponse(response)
2252-
2253- def disconnect(self):
2254- connection = self._get_connection()
2255- if connection is not None:
2256- connection.close()
2257-
2258- def has(self, relpath):
2259- """Does the target location exist?
2260- """
2261- response = self._head(relpath)
2262-
2263- code = response.status
2264- if code == 200: # "ok",
2265- return True
2266- else:
2267- return False
2268-
2269- def get(self, relpath):
2270- """Get the file at the given relative path.
2271-
2272- :param relpath: The relative path to the file
2273- """
2274- code, response_file = self._get(relpath, None)
2275- return response_file
2276-
2277- def _get(self, relpath, offsets, tail_amount=0):
2278- """Get a file, or part of a file.
2279-
2280- :param relpath: Path relative to transport base URL
2281- :param offsets: None to get the whole file;
2282- or a list of _CoalescedOffset to fetch parts of a file.
2283- :param tail_amount: The amount to get from the end of the file.
2284-
2285- :returns: (http_code, result_file)
2286- """
2287- abspath = self._remote_path(relpath)
2288- headers = {}
2289- if offsets or tail_amount:
2290- range_header = self._attempted_range_header(offsets, tail_amount)
2291- if range_header is not None:
2292- bytes = 'bytes=' + range_header
2293- headers = {'Range': bytes}
2294- else:
2295- range_header = None
2296-
2297- response = self.request('GET', abspath, headers=headers)
2298-
2299- if response.status == 404: # not found
2300- raise errors.NoSuchFile(abspath)
2301- elif response.status == 416:
2302- # We don't know which, but one of the ranges we specified was
2303- # wrong.
2304- raise errors.InvalidHttpRange(abspath, range_header,
2305- 'Server return code %d' % response.status)
2306- elif response.status == 400:
2307- if range_header:
2308- # We don't know which, but one of the ranges we specified was
2309- # wrong.
2310- raise errors.InvalidHttpRange(
2311- abspath, range_header,
2312- 'Server return code %d' % response.status)
2313- else:
2314- raise errors.BadHttpRequest(abspath, response.reason)
2315- elif response.status not in (200, 206):
2316- raise errors.UnexpectedHttpStatus(abspath, response.status)
2317-
2318- data = handle_response(
2319- abspath, response.status, response.getheader, response)
2320- return response.status, data
2321-
2322- def _remote_path(self, relpath):
2323- """See ConnectedTransport._remote_path.
2324-
2325- user and passwords are not embedded in the path provided to the server.
2326- """
2327- url = self._parsed_url.clone(relpath)
2328- url.user = url.quoted_user = None
2329- url.password = url.quoted_password = None
2330- url.scheme = self._unqualified_scheme
2331- return str(url)
2332-
2333- def _create_auth(self):
2334- """Returns a dict containing the credentials provided at build time."""
2335- auth = dict(host=self._parsed_url.host, port=self._parsed_url.port,
2336- user=self._parsed_url.user, password=self._parsed_url.password,
2337- protocol=self._unqualified_scheme,
2338- path=self._parsed_url.path)
2339- return auth
2340-
2341- def get_smart_medium(self):
2342- """See Transport.get_smart_medium."""
2343- if self._medium is None:
2344- # Since medium holds some state (smart server probing at least), we
2345- # need to keep it around. Note that this is needed because medium
2346- # has the same 'base' attribute as the transport so it can't be
2347- # shared between transports having different bases.
2348- self._medium = SmartClientHTTPMedium(self)
2349- return self._medium
2350-
2351- def _degrade_range_hint(self, relpath, ranges):
2352- if self._range_hint == 'multi':
2353- self._range_hint = 'single'
2354- mutter('Retry "%s" with single range request' % relpath)
2355- elif self._range_hint == 'single':
2356- self._range_hint = None
2357- mutter('Retry "%s" without ranges' % relpath)
2358- else:
2359- # We tried all the tricks, but nothing worked, caller must reraise.
2360- return False
2361- return True
2362-
2363- # _coalesce_offsets is a helper for readv, it try to combine ranges without
2364- # degrading readv performances. _bytes_to_read_before_seek is the value
2365- # used for the limit parameter and has been tuned for other transports. For
2366- # HTTP, the name is inappropriate but the parameter is still useful and
2367- # helps reduce the number of chunks in the response. The overhead for a
2368- # chunk (headers, length, footer around the data itself is variable but
2369- # around 50 bytes. We use 128 to reduce the range specifiers that appear in
2370- # the header, some servers (notably Apache) enforce a maximum length for a
2371- # header and issue a '400: Bad request' error when too much ranges are
2372- # specified.
2373- _bytes_to_read_before_seek = 128
2374- # No limit on the offset number that get combined into one, we are trying
2375- # to avoid downloading the whole file.
2376- _max_readv_combine = 0
2377- # By default Apache has a limit of ~400 ranges before replying with a 400
2378- # Bad Request. So we go underneath that amount to be safe.
2379- _max_get_ranges = 200
2380- # We impose no limit on the range size. But see _pycurl.py for a different
2381- # use.
2382- _get_max_size = 0
2383-
2384- def _readv(self, relpath, offsets):
2385- """Get parts of the file at the given relative path.
2386-
2387- :param offsets: A list of (offset, size) tuples.
2388- :param return: A list or generator of (offset, data) tuples
2389- """
2390- # offsets may be a generator, we will iterate it several times, so
2391- # build a list
2392- offsets = list(offsets)
2393-
2394- try_again = True
2395- retried_offset = None
2396- while try_again:
2397- try_again = False
2398-
2399- # Coalesce the offsets to minimize the GET requests issued
2400- sorted_offsets = sorted(offsets)
2401- coalesced = self._coalesce_offsets(
2402- sorted_offsets, limit=self._max_readv_combine,
2403- fudge_factor=self._bytes_to_read_before_seek,
2404- max_size=self._get_max_size)
2405-
2406- # Turn it into a list, we will iterate it several times
2407- coalesced = list(coalesced)
2408- if 'http' in debug.debug_flags:
2409- mutter('http readv of %s offsets => %s collapsed %s',
2410- relpath, len(offsets), len(coalesced))
2411-
2412- # Cache the data read, but only until it's been used
2413- data_map = {}
2414- # We will iterate on the data received from the GET requests and
2415- # serve the corresponding offsets respecting the initial order. We
2416- # need an offset iterator for that.
2417- iter_offsets = iter(offsets)
2418- try:
2419- cur_offset_and_size = next(iter_offsets)
2420- except StopIteration:
2421- return
2422-
2423- try:
2424- for cur_coal, rfile in self._coalesce_readv(relpath, coalesced):
2425- # Split the received chunk
2426- for offset, size in cur_coal.ranges:
2427- start = cur_coal.start + offset
2428- rfile.seek(start, os.SEEK_SET)
2429- data = rfile.read(size)
2430- data_len = len(data)
2431- if data_len != size:
2432- raise errors.ShortReadvError(relpath, start, size,
2433- actual=data_len)
2434- if (start, size) == cur_offset_and_size:
2435- # The offset requested are sorted as the coalesced
2436- # ones, no need to cache. Win !
2437- yield cur_offset_and_size[0], data
2438- try:
2439- cur_offset_and_size = next(iter_offsets)
2440- except StopIteration:
2441- return
2442- else:
2443- # Different sorting. We need to cache.
2444- data_map[(start, size)] = data
2445-
2446- # Yield everything we can
2447- while cur_offset_and_size in data_map:
2448- # Clean the cached data since we use it
2449- # XXX: will break if offsets contains duplicates --
2450- # vila20071129
2451- this_data = data_map.pop(cur_offset_and_size)
2452- yield cur_offset_and_size[0], this_data
2453- try:
2454- cur_offset_and_size = next(iter_offsets)
2455- except StopIteration:
2456- return
2457-
2458- except (errors.ShortReadvError, errors.InvalidRange,
2459- errors.InvalidHttpRange, errors.HttpBoundaryMissing) as e:
2460- mutter('Exception %r: %s during http._readv', e, e)
2461- if (not isinstance(e, errors.ShortReadvError)
2462- or retried_offset == cur_offset_and_size):
2463- # We don't degrade the range hint for ShortReadvError since
2464- # they do not indicate a problem with the server ability to
2465- # handle ranges. Except when we fail to get back a required
2466- # offset twice in a row. In that case, falling back to
2467- # single range or whole file should help.
2468- if not self._degrade_range_hint(relpath, coalesced):
2469- raise
2470- # Some offsets may have been already processed, so we retry
2471- # only the unsuccessful ones.
2472- offsets = [cur_offset_and_size] + [o for o in iter_offsets]
2473- retried_offset = cur_offset_and_size
2474- try_again = True
2475-
2476- def _coalesce_readv(self, relpath, coalesced):
2477- """Issue several GET requests to satisfy the coalesced offsets"""
2478-
2479- def get_and_yield(relpath, coalesced):
2480- if coalesced:
2481- # Note that the _get below may raise
2482- # errors.InvalidHttpRange. It's the caller's responsibility to
2483- # decide how to retry since it may provide different coalesced
2484- # offsets.
2485- code, rfile = self._get(relpath, coalesced)
2486- for coal in coalesced:
2487- yield coal, rfile
2488-
2489- if self._range_hint is None:
2490- # Download whole file
2491- for c, rfile in get_and_yield(relpath, coalesced):
2492- yield c, rfile
2493- else:
2494- total = len(coalesced)
2495- if self._range_hint == 'multi':
2496- max_ranges = self._max_get_ranges
2497- elif self._range_hint == 'single':
2498- max_ranges = total
2499- else:
2500- raise AssertionError("Unknown _range_hint %r"
2501- % (self._range_hint,))
2502- # TODO: Some web servers may ignore the range requests and return
2503- # the whole file, we may want to detect that and avoid further
2504- # requests.
2505- # Hint: test_readv_multiple_get_requests will fail once we do that
2506- cumul = 0
2507- ranges = []
2508- for coal in coalesced:
2509- if ((self._get_max_size > 0
2510- and cumul + coal.length > self._get_max_size) or
2511- len(ranges) >= max_ranges):
2512- # Get that much and yield
2513- for c, rfile in get_and_yield(relpath, ranges):
2514- yield c, rfile
2515- # Restart with the current offset
2516- ranges = [coal]
2517- cumul = coal.length
2518- else:
2519- ranges.append(coal)
2520- cumul += coal.length
2521- # Get the rest and yield
2522- for c, rfile in get_and_yield(relpath, ranges):
2523- yield c, rfile
2524-
2525- def recommended_page_size(self):
2526- """See Transport.recommended_page_size().
2527-
2528- For HTTP we suggest a large page size to reduce the overhead
2529- introduced by latency.
2530- """
2531- return 64 * 1024
2532-
2533- def _post(self, body_bytes):
2534- """POST body_bytes to .bzr/smart on this transport.
2535-
2536- :returns: (response code, response body file-like object).
2537- """
2538- # TODO: Requiring all the body_bytes to be available at the beginning of
2539- # the POST may require large client buffers. It would be nice to have
2540- # an interface that allows streaming via POST when possible (and
2541- # degrades to a local buffer when not).
2542- abspath = self._remote_path('.bzr/smart')
2543- response = self.request(
2544- 'POST', abspath, body=body_bytes,
2545- headers={'Content-Type': 'application/octet-stream'})
2546- if response.status not in (200, 403):
2547- raise errors.UnexpectedHttpStatus(abspath, response.status)
2548- code = response.status
2549- data = handle_response(
2550- abspath, code, response.getheader, response)
2551- return code, data
2552-
2553- def _head(self, relpath):
2554- """Request the HEAD of a file.
2555-
2556- Performs the request and leaves callers handle the results.
2557- """
2558- abspath = self._remote_path(relpath)
2559- response = self.request('HEAD', abspath)
2560- if response.status not in (200, 404):
2561- raise errors.UnexpectedHttpStatus(abspath, response.status)
2562-
2563- return response
2564-
2565- raise NotImplementedError(self._post)
2566-
2567- def put_file(self, relpath, f, mode=None):
2568- """Copy the file-like object into the location.
2569-
2570- :param relpath: Location to put the contents, relative to base.
2571- :param f: File-like object.
2572- """
2573- raise errors.TransportNotPossible('http PUT not supported')
2574-
2575- def mkdir(self, relpath, mode=None):
2576- """Create a directory at the given path."""
2577- raise errors.TransportNotPossible('http does not support mkdir()')
2578-
2579- def rmdir(self, relpath):
2580- """See Transport.rmdir."""
2581- raise errors.TransportNotPossible('http does not support rmdir()')
2582-
2583- def append_file(self, relpath, f, mode=None):
2584- """Append the text in the file-like object into the final
2585- location.
2586- """
2587- raise errors.TransportNotPossible('http does not support append()')
2588-
2589- def copy(self, rel_from, rel_to):
2590- """Copy the item at rel_from to the location at rel_to"""
2591- raise errors.TransportNotPossible('http does not support copy()')
2592-
2593- def copy_to(self, relpaths, other, mode=None, pb=None):
2594- """Copy a set of entries from self into another Transport.
2595-
2596- :param relpaths: A list/generator of entries to be copied.
2597-
2598- TODO: if other is LocalTransport, is it possible to
2599- do better than put(get())?
2600- """
2601- # At this point HttpTransport might be able to check and see if
2602- # the remote location is the same, and rather than download, and
2603- # then upload, it could just issue a remote copy_this command.
2604- if isinstance(other, HttpTransport):
2605- raise errors.TransportNotPossible(
2606- 'http cannot be the target of copy_to()')
2607- else:
2608- return super(HttpTransport, self).\
2609- copy_to(relpaths, other, mode=mode, pb=pb)
2610-
2611- def move(self, rel_from, rel_to):
2612- """Move the item at rel_from to the location at rel_to"""
2613- raise errors.TransportNotPossible('http does not support move()')
2614-
2615- def delete(self, relpath):
2616- """Delete the item at relpath"""
2617- raise errors.TransportNotPossible('http does not support delete()')
2618-
2619- def external_url(self):
2620- """See breezy.transport.Transport.external_url."""
2621- # HTTP URL's are externally usable as long as they don't mention their
2622- # implementation qualifier
2623- url = self._parsed_url.clone()
2624- url.scheme = self._unqualified_scheme
2625- return str(url)
2626-
2627- def is_readonly(self):
2628- """See Transport.is_readonly."""
2629- return True
2630-
2631- def listable(self):
2632- """See Transport.listable."""
2633- return False
2634-
2635- def stat(self, relpath):
2636- """Return the stat information for a file.
2637- """
2638- raise errors.TransportNotPossible('http does not support stat()')
2639-
2640- def lock_read(self, relpath):
2641- """Lock the given file for shared (read) access.
2642- :return: A lock object, which should be passed to Transport.unlock()
2643- """
2644- # The old RemoteBranch ignore lock for reading, so we will
2645- # continue that tradition and return a bogus lock object.
2646- class BogusLock(object):
2647- def __init__(self, path):
2648- self.path = path
2649-
2650- def unlock(self):
2651- pass
2652- return BogusLock(relpath)
2653-
2654- def lock_write(self, relpath):
2655- """Lock the given file for exclusive (write) access.
2656- WARNING: many transports do not support this, so trying avoid using it
2657-
2658- :return: A lock object, which should be passed to Transport.unlock()
2659- """
2660- raise errors.TransportNotPossible('http does not support lock_write()')
2661-
2662- def _attempted_range_header(self, offsets, tail_amount):
2663- """Prepare a HTTP Range header at a level the server should accept.
2664-
2665- :return: the range header representing offsets/tail_amount or None if
2666- no header can be built.
2667- """
2668-
2669- if self._range_hint == 'multi':
2670- # Generate the header describing all offsets
2671- return self._range_header(offsets, tail_amount)
2672- elif self._range_hint == 'single':
2673- # Combine all the requested ranges into a single
2674- # encompassing one
2675- if len(offsets) > 0:
2676- if tail_amount not in (0, None):
2677- # Nothing we can do here to combine ranges with tail_amount
2678- # in a single range, just returns None. The whole file
2679- # should be downloaded.
2680- return None
2681- else:
2682- start = offsets[0].start
2683- last = offsets[-1]
2684- end = last.start + last.length - 1
2685- whole = self._coalesce_offsets([(start, end - start + 1)],
2686- limit=0, fudge_factor=0)
2687- return self._range_header(list(whole), 0)
2688- else:
2689- # Only tail_amount, requested, leave range_header
2690- # do its work
2691- return self._range_header(offsets, tail_amount)
2692- else:
2693- return None
2694-
2695- @staticmethod
2696- def _range_header(ranges, tail_amount):
2697- """Turn a list of bytes ranges into a HTTP Range header value.
2698-
2699- :param ranges: A list of _CoalescedOffset
2700- :param tail_amount: The amount to get from the end of the file.
2701-
2702- :return: HTTP range header string.
2703-
2704- At least a non-empty ranges *or* a tail_amount must be
2705- provided.
2706- """
2707- strings = []
2708- for offset in ranges:
2709- strings.append('%d-%d' % (offset.start,
2710- offset.start + offset.length - 1))
2711-
2712- if tail_amount:
2713- strings.append('-%d' % tail_amount)
2714-
2715- return ','.join(strings)
2716-
2717- def _redirected_to(self, source, target):
2718- """Returns a transport suitable to re-issue a redirected request.
2719-
2720- :param source: The source url as returned by the server.
2721- :param target: The target url as returned by the server.
2722-
2723- The redirection can be handled only if the relpath involved is not
2724- renamed by the redirection.
2725-
2726- :returns: A transport
2727- :raise UnusableRedirect: when the URL can not be reinterpreted
2728- """
2729- parsed_source = self._split_url(source)
2730- parsed_target = self._split_url(target)
2731- pl = len(self._parsed_url.path)
2732- # determine the excess tail - the relative path that was in
2733- # the original request but not part of this transports' URL.
2734- excess_tail = parsed_source.path[pl:].strip("/")
2735- if not parsed_target.path.endswith(excess_tail):
2736- # The final part of the url has been renamed, we can't handle the
2737- # redirection.
2738- raise UnusableRedirect(
2739- source, target, "final part of the url was renamed")
2740-
2741- target_path = parsed_target.path
2742- if excess_tail:
2743- # Drop the tail that was in the redirect but not part of
2744- # the path of this transport.
2745- target_path = target_path[:-len(excess_tail)]
2746-
2747- if parsed_target.scheme in ('http', 'https'):
2748- # Same protocol family (i.e. http[s]), we will preserve the same
2749- # http client implementation when a redirection occurs from one to
2750- # the other (otherwise users may be surprised that bzr switches
2751- # from one implementation to the other, and devs may suffer
2752- # debugging it).
2753- if (parsed_target.scheme == self._unqualified_scheme
2754- and parsed_target.host == self._parsed_url.host
2755- and parsed_target.port == self._parsed_url.port
2756- and (parsed_target.user is None or
2757- parsed_target.user == self._parsed_url.user)):
2758- # If a user is specified, it should match, we don't care about
2759- # passwords, wrong passwords will be rejected anyway.
2760- return self.clone(target_path)
2761- else:
2762- # Rebuild the url preserving the scheme qualification and the
2763- # credentials (if they don't apply, the redirected to server
2764- # will tell us, but if they do apply, we avoid prompting the
2765- # user)
2766- redir_scheme = parsed_target.scheme
2767- new_url = self._unsplit_url(redir_scheme,
2768- self._parsed_url.user,
2769- self._parsed_url.password,
2770- parsed_target.host, parsed_target.port,
2771- target_path)
2772- return transport.get_transport_from_url(new_url)
2773- else:
2774- # Redirected to a different protocol
2775- new_url = self._unsplit_url(parsed_target.scheme,
2776- parsed_target.user,
2777- parsed_target.password,
2778- parsed_target.host, parsed_target.port,
2779- target_path)
2780- return transport.get_transport_from_url(new_url)
2781-
2782- def _options(self, relpath):
2783- abspath = self._remote_path(relpath)
2784- resp = self.request('OPTIONS', abspath)
2785- if resp.status == 404:
2786- raise errors.NoSuchFile(abspath)
2787- if resp.status in (403, 405):
2788- raise errors.InvalidHttpResponse(
2789- abspath,
2790- "OPTIONS not supported or forbidden for remote URL")
2791- return resp.getheaders()
2792-
2793-
2794-# TODO: May be better located in smart/medium.py with the other
2795-# SmartMedium classes
2796-class SmartClientHTTPMedium(medium.SmartClientMedium):
2797-
2798- def __init__(self, http_transport):
2799- super(SmartClientHTTPMedium, self).__init__(http_transport.base)
2800- # We don't want to create a circular reference between the http
2801- # transport and its associated medium. Since the transport will live
2802- # longer than the medium, the medium keep only a weak reference to its
2803- # transport.
2804- self._http_transport_ref = weakref.ref(http_transport)
2805-
2806- def get_request(self):
2807- return SmartClientHTTPMediumRequest(self)
2808-
2809- def should_probe(self):
2810- return True
2811-
2812- def remote_path_from_transport(self, transport):
2813- # Strip the optional 'bzr+' prefix from transport so it will have the
2814- # same scheme as self.
2815- transport_base = transport.base
2816- if transport_base.startswith('bzr+'):
2817- transport_base = transport_base[4:]
2818- rel_url = urlutils.relative_url(self.base, transport_base)
2819- return urlutils.unquote(rel_url)
2820-
2821- def send_http_smart_request(self, bytes):
2822- try:
2823- # Get back the http_transport hold by the weak reference
2824- t = self._http_transport_ref()
2825- code, body_filelike = t._post(bytes)
2826- if code != 200:
2827- raise errors.UnexpectedHttpStatus(
2828- t._remote_path('.bzr/smart'), code)
2829- except (errors.InvalidHttpResponse, errors.ConnectionReset) as e:
2830- raise errors.SmartProtocolError(str(e))
2831- return body_filelike
2832-
2833- def _report_activity(self, bytes, direction):
2834- """See SmartMedium._report_activity.
2835-
2836- Does nothing; the underlying plain HTTP transport will report the
2837- activity that this medium would report.
2838- """
2839- pass
2840-
2841- def disconnect(self):
2842- """See SmartClientMedium.disconnect()."""
2843- t = self._http_transport_ref()
2844- t.disconnect()
2845-
2846-
2847-# TODO: May be better located in smart/medium.py with the other
2848-# SmartMediumRequest classes
2849-class SmartClientHTTPMediumRequest(medium.SmartClientMediumRequest):
2850- """A SmartClientMediumRequest that works with an HTTP medium."""
2851-
2852- def __init__(self, client_medium):
2853- medium.SmartClientMediumRequest.__init__(self, client_medium)
2854- self._buffer = b''
2855-
2856- def _accept_bytes(self, bytes):
2857- self._buffer += bytes
2858-
2859- def _finished_writing(self):
2860- data = self._medium.send_http_smart_request(self._buffer)
2861- self._response_body = data
2862-
2863- def _read_bytes(self, count):
2864- """See SmartClientMediumRequest._read_bytes."""
2865- return self._response_body.read(count)
2866-
2867- def _read_line(self):
2868- line, excess = medium._get_line(self._response_body.read)
2869- if excess != b'':
2870- raise AssertionError(
2871- '_get_line returned excess bytes, but this mediumrequest '
2872- 'cannot handle excess. (%r)' % (excess,))
2873- return line
2874-
2875- def _finished_reading(self):
2876- """See SmartClientMediumRequest._finished_reading."""
2877- pass
2878-
2879-
2880-def unhtml_roughly(maybe_html, length_limit=1000):
2881- """Very approximate html->text translation, for presenting error bodies.
2882-
2883- :param length_limit: Truncate the result to this many characters.
2884-
2885- >>> unhtml_roughly("<b>bad</b> things happened\\n")
2886- ' bad things happened '
2887- """
2888- return re.subn(r"(<[^>]*>|\n|&nbsp;)", " ", maybe_html)[0][:length_limit]
2889-
2890-
2891-def get_test_permutations():
2892- """Return the permutations to be used in testing."""
2893- from breezy.tests import (
2894- features,
2895- http_server,
2896- )
2897- permutations = [(HttpTransport, http_server.HttpServer), ]
2898- if features.HTTPSServerFeature.available():
2899- from breezy.tests import (
2900- https_server,
2901- ssl_certs,
2902- )
2903-
2904- class HTTPS_transport(HttpTransport):
2905-
2906- def __init__(self, base, _from_transport=None):
2907- super(HTTPS_transport, self).__init__(
2908- base, _from_transport=_from_transport,
2909- ca_certs=ssl_certs.build_path('ca.crt'))
2910-
2911- permutations.append((HTTPS_transport,
2912- https_server.HTTPSServer))
2913- return permutations
2914
2915=== added file 'breezy/transport/http/urllib.py'
2916--- breezy/transport/http/urllib.py 1970-01-01 00:00:00 +0000
2917+++ breezy/transport/http/urllib.py 2020-12-27 18:07:47 +0000
2918@@ -0,0 +1,2626 @@
2919+# Copyright (C) 2005-2010 Canonical Ltd
2920+#
2921+# This program is free software; you can redistribute it and/or modify
2922+# it under the terms of the GNU General Public License as published by
2923+# the Free Software Foundation; either version 2 of the License, or
2924+# (at your option) any later version.
2925+#
2926+# This program is distributed in the hope that it will be useful,
2927+# but WITHOUT ANY WARRANTY; without even the implied warranty of
2928+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2929+# GNU General Public License for more details.
2930+#
2931+# You should have received a copy of the GNU General Public License
2932+# along with this program; if not, write to the Free Software
2933+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
2934+
2935+"""Base implementation of Transport over http using urllib.
2936+
2937+There are separate implementation modules for each http client implementation.
2938+"""
2939+
2940+from __future__ import absolute_import
2941+
2942+DEBUG = 0
2943+
2944+import base64
2945+import cgi
2946+import errno
2947+import os
2948+import re
2949+import socket
2950+import ssl
2951+import sys
2952+import time
2953+import urllib
2954+import weakref
2955+
2956+try:
2957+ import http.client as http_client
2958+except ImportError:
2959+ import httplib as http_client
2960+try:
2961+ import urllib.request as urllib_request
2962+except ImportError: # python < 3
2963+ import urllib2 as urllib_request
2964+try:
2965+ from urllib.parse import urljoin, splitport, splittype, splithost, urlencode
2966+except ImportError:
2967+ from urlparse import urljoin
2968+ from urllib import splitport, splittype, splithost, urlencode
2969+
2970+# TODO: handle_response should be integrated into the http/__init__.py
2971+from .response import handle_response
2972+
2973+# FIXME: Oversimplifying, two kind of exceptions should be
2974+# raised, once a request is issued: URLError before we have been
2975+# able to process the response, HTTPError after that. Process the
2976+# response means we are able to leave the socket clean, so if we
2977+# are not able to do that, we should close the connection. The
2978+# actual code more or less do that, tests should be written to
2979+# ensure that.
2980+
2981+from ... import __version__ as breezy_version
2982+from ... import (
2983+ config,
2984+ debug,
2985+ errors,
2986+ lazy_import,
2987+ osutils,
2988+ trace,
2989+ transport,
2990+ ui,
2991+ urlutils,
2992+)
2993+from ...bzr.smart import medium
2994+from ...sixish import (
2995+ PY3,
2996+ reraise,
2997+ text_type,
2998+)
2999+from ...trace import mutter
3000+from ...transport import (
3001+ ConnectedTransport,
3002+ UnusableRedirect,
3003+ )
3004+
3005+from . import default_user_agent, ssl
3006+
3007+
3008+checked_kerberos = False
3009+kerberos = None
3010+
3011+
3012+class addinfourl(urllib_request.addinfourl):
3013+ '''Replacement addinfourl class compatible with python-2.7's xmlrpclib
3014+
3015+ In python-2.7, xmlrpclib expects that the response object that it receives
3016+ has a getheader method. http_client.HTTPResponse provides this but
3017+ urllib_request.addinfourl does not. Add the necessary functions here, ported to
3018+ use the internal data structures of addinfourl.
3019+ '''
3020+
3021+ def getheader(self, name, default=None):
3022+ if self.headers is None:
3023+ raise http_client.ResponseNotReady()
3024+ return self.headers.getheader(name, default)
3025+
3026+ def getheaders(self):
3027+ if self.headers is None:
3028+ raise http_client.ResponseNotReady()
3029+ return list(self.headers.items())
3030+
3031+
3032+class _ReportingFileSocket(object):
3033+
3034+ def __init__(self, filesock, report_activity=None):
3035+ self.filesock = filesock
3036+ self._report_activity = report_activity
3037+
3038+ def report_activity(self, size, direction):
3039+ if self._report_activity:
3040+ self._report_activity(size, direction)
3041+
3042+ def read(self, size=1):
3043+ s = self.filesock.read(size)
3044+ self.report_activity(len(s), 'read')
3045+ return s
3046+
3047+ def readline(self, size=-1):
3048+ s = self.filesock.readline(size)
3049+ self.report_activity(len(s), 'read')
3050+ return s
3051+
3052+ def readinto(self, b):
3053+ s = self.filesock.readinto(b)
3054+ self.report_activity(s, 'read')
3055+ return s
3056+
3057+ def __getattr__(self, name):
3058+ return getattr(self.filesock, name)
3059+
3060+
3061+class _ReportingSocket(object):
3062+
3063+ def __init__(self, sock, report_activity=None):
3064+ self.sock = sock
3065+ self._report_activity = report_activity
3066+
3067+ def report_activity(self, size, direction):
3068+ if self._report_activity:
3069+ self._report_activity(size, direction)
3070+
3071+ def sendall(self, s, *args):
3072+ self.sock.sendall(s, *args)
3073+ self.report_activity(len(s), 'write')
3074+
3075+ def recv(self, *args):
3076+ s = self.sock.recv(*args)
3077+ self.report_activity(len(s), 'read')
3078+ return s
3079+
3080+ def makefile(self, mode='r', bufsize=-1):
3081+ # http_client creates a fileobject that doesn't do buffering, which
3082+ # makes fp.readline() very expensive because it only reads one byte
3083+ # at a time. So we wrap the socket in an object that forces
3084+ # sock.makefile to make a buffered file.
3085+ fsock = self.sock.makefile(mode, 65536)
3086+ # And wrap that into a reporting kind of fileobject
3087+ return _ReportingFileSocket(fsock, self._report_activity)
3088+
3089+ def __getattr__(self, name):
3090+ return getattr(self.sock, name)
3091+
3092+
3093+# We define our own Response class to keep our http_client pipe clean
3094+class Response(http_client.HTTPResponse):
3095+ """Custom HTTPResponse, to avoid the need to decorate.
3096+
3097+ http_client prefers to decorate the returned objects, rather
3098+ than using a custom object.
3099+ """
3100+
3101+ # Some responses have bodies in which we have no interest
3102+ _body_ignored_responses = [301, 302, 303, 307, 308, 400, 401, 403, 404, 501]
3103+
3104+ # in finish() below, we may have to discard several MB in the worst
3105+ # case. To avoid buffering that much, we read and discard by chunks
3106+ # instead. The underlying file is either a socket or a StringIO, so reading
3107+ # 8k chunks should be fine.
3108+ _discarded_buf_size = 8192
3109+
3110+ if PY3:
3111+ def __init__(self, sock, debuglevel=0, method=None, url=None):
3112+ self.url = url
3113+ super(Response, self).__init__(
3114+ sock, debuglevel=debuglevel, method=method, url=url)
3115+
3116+ def begin(self):
3117+ """Begin to read the response from the server.
3118+
3119+ http_client assumes that some responses get no content and do
3120+ not even attempt to read the body in that case, leaving
3121+ the body in the socket, blocking the next request. Let's
3122+ try to workaround that.
3123+ """
3124+ http_client.HTTPResponse.begin(self)
3125+ if self.status in self._body_ignored_responses:
3126+ if self.debuglevel >= 2:
3127+ print("For status: [%s], will ready body, length: %s" % (
3128+ self.status, self.length))
3129+ if not (self.length is None or self.will_close):
3130+ # In some cases, we just can't read the body not
3131+ # even try or we may encounter a 104, 'Connection
3132+ # reset by peer' error if there is indeed no body
3133+ # and the server closed the connection just after
3134+ # having issued the response headers (even if the
3135+ # headers indicate a Content-Type...)
3136+ body = self.read(self.length)
3137+ if self.debuglevel >= 9:
3138+ # This one can be huge and is generally not interesting
3139+ print("Consumed body: [%s]" % body)
3140+ self.close()
3141+ elif self.status == 200:
3142+ # Whatever the request is, it went ok, so we surely don't want to
3143+ # close the connection. Some cases are not correctly detected by
3144+ # http_client.HTTPConnection.getresponse (called by
3145+ # http_client.HTTPResponse.begin). The CONNECT response for the https
3146+ # through proxy case is one. Note: the 'will_close' below refers
3147+ # to the "true" socket between us and the server, whereas the
3148+ # 'close()' above refers to the copy of that socket created by
3149+ # http_client for the response itself. So, in the if above we close the
3150+ # socket to indicate that we are done with the response whereas
3151+ # below we keep the socket with the server opened.
3152+ self.will_close = False
3153+
3154+ def finish(self):
3155+ """Finish reading the body.
3156+
3157+ In some cases, the client may have left some bytes to read in the
3158+ body. That will block the next request to succeed if we use a
3159+ persistent connection. If we don't use a persistent connection, well,
3160+ nothing will block the next request since a new connection will be
3161+ issued anyway.
3162+
3163+ :return: the number of bytes left on the socket (may be None)
3164+ """
3165+ pending = None
3166+ if not self.isclosed():
3167+ # Make sure nothing was left to be read on the socket
3168+ pending = 0
3169+ data = True
3170+ while data and self.length:
3171+ # read() will update self.length
3172+ data = self.read(min(self.length, self._discarded_buf_size))
3173+ pending += len(data)
3174+ if pending:
3175+ trace.mutter("%s bytes left on the HTTP socket", pending)
3176+ self.close()
3177+ return pending
3178+
3179+
3180+# Not inheriting from 'object' because http_client.HTTPConnection doesn't.
3181+class AbstractHTTPConnection:
3182+ """A custom HTTP(S) Connection, which can reset itself on a bad response"""
3183+
3184+ response_class = Response
3185+
3186+ # When we detect a server responding with the whole file to range requests,
3187+ # we want to warn. But not below a given thresold.
3188+ _range_warning_thresold = 1024 * 1024
3189+
3190+ def __init__(self, report_activity=None):
3191+ self._response = None
3192+ self._report_activity = report_activity
3193+ self._ranges_received_whole_file = None
3194+
3195+ def _mutter_connect(self):
3196+ netloc = '%s:%s' % (self.host, self.port)
3197+ if self.proxied_host is not None:
3198+ netloc += '(proxy for %s)' % self.proxied_host
3199+ trace.mutter('* About to connect() to %s' % netloc)
3200+
3201+ def getresponse(self):
3202+ """Capture the response to be able to cleanup"""
3203+ self._response = http_client.HTTPConnection.getresponse(self)
3204+ return self._response
3205+
3206+ def cleanup_pipe(self):
3207+ """Read the remaining bytes of the last response if any."""
3208+ if self._response is not None:
3209+ try:
3210+ pending = self._response.finish()
3211+ # Warn the user (once)
3212+ if (self._ranges_received_whole_file is None
3213+ and self._response.status == 200
3214+ and pending
3215+ and pending > self._range_warning_thresold):
3216+ self._ranges_received_whole_file = True
3217+ trace.warning(
3218+ 'Got a 200 response when asking for multiple ranges,'
3219+ ' does your server at %s:%s support range requests?',
3220+ self.host, self.port)
3221+ except socket.error as e:
3222+ # It's conceivable that the socket is in a bad state here
3223+ # (including some test cases) and in this case, it doesn't need
3224+ # cleaning anymore, so no need to fail, we just get rid of the
3225+ # socket and let callers reconnect
3226+ if (len(e.args) == 0
3227+ or e.args[0] not in (errno.ECONNRESET, errno.ECONNABORTED)):
3228+ raise
3229+ self.close()
3230+ self._response = None
3231+ # Preserve our preciousss
3232+ sock = self.sock
3233+ self.sock = None
3234+ # Let http_client.HTTPConnection do its housekeeping
3235+ self.close()
3236+ # Restore our preciousss
3237+ self.sock = sock
3238+
3239+ def _wrap_socket_for_reporting(self, sock):
3240+ """Wrap the socket before anybody use it."""
3241+ self.sock = _ReportingSocket(sock, self._report_activity)
3242+
3243+
3244+class HTTPConnection(AbstractHTTPConnection, http_client.HTTPConnection):
3245+
3246+ # XXX: Needs refactoring at the caller level.
3247+ def __init__(self, host, port=None, proxied_host=None,
3248+ report_activity=None, ca_certs=None):
3249+ AbstractHTTPConnection.__init__(self, report_activity=report_activity)
3250+ if PY3:
3251+ http_client.HTTPConnection.__init__(self, host, port)
3252+ else:
3253+ # Use strict=True since we don't support HTTP/0.9
3254+ http_client.HTTPConnection.__init__(self, host, port, strict=True)
3255+ self.proxied_host = proxied_host
3256+ # ca_certs is ignored, it's only relevant for https
3257+
3258+ def connect(self):
3259+ if 'http' in debug.debug_flags:
3260+ self._mutter_connect()
3261+ http_client.HTTPConnection.connect(self)
3262+ self._wrap_socket_for_reporting(self.sock)
3263+
3264+
3265+class HTTPSConnection(AbstractHTTPConnection, http_client.HTTPSConnection):
3266+
3267+ def __init__(self, host, port=None, key_file=None, cert_file=None,
3268+ proxied_host=None,
3269+ report_activity=None, ca_certs=None):
3270+ AbstractHTTPConnection.__init__(self, report_activity=report_activity)
3271+ if PY3:
3272+ http_client.HTTPSConnection.__init__(
3273+ self, host, port, key_file, cert_file)
3274+ else:
3275+ # Use strict=True since we don't support HTTP/0.9
3276+ http_client.HTTPSConnection.__init__(self, host, port,
3277+ key_file, cert_file, strict=True)
3278+ self.proxied_host = proxied_host
3279+ self.ca_certs = ca_certs
3280+
3281+ def connect(self):
3282+ if 'http' in debug.debug_flags:
3283+ self._mutter_connect()
3284+ http_client.HTTPConnection.connect(self)
3285+ self._wrap_socket_for_reporting(self.sock)
3286+ if self.proxied_host is None:
3287+ self.connect_to_origin()
3288+
3289+ def connect_to_origin(self):
3290+ # FIXME JRV 2011-12-18: Use location config here?
3291+ config_stack = config.GlobalStack()
3292+ cert_reqs = config_stack.get('ssl.cert_reqs')
3293+ if self.proxied_host is not None:
3294+ host = self.proxied_host.split(":", 1)[0]
3295+ else:
3296+ host = self.host
3297+ if cert_reqs == ssl.CERT_NONE:
3298+ ui.ui_factory.show_user_warning('not_checking_ssl_cert', host=host)
3299+ ui.ui_factory.suppressed_warnings.add('not_checking_ssl_cert')
3300+ ca_certs = None
3301+ else:
3302+ if self.ca_certs is None:
3303+ ca_certs = config_stack.get('ssl.ca_certs')
3304+ else:
3305+ ca_certs = self.ca_certs
3306+ if ca_certs is None:
3307+ trace.warning(
3308+ "No valid trusted SSL CA certificates file set. See "
3309+ "'brz help ssl.ca_certs' for more information on setting "
3310+ "trusted CAs.")
3311+ try:
3312+ ssl_context = ssl.create_default_context(
3313+ purpose=ssl.Purpose.SERVER_AUTH, cafile=ca_certs)
3314+ ssl_context.check_hostname = cert_reqs != ssl.CERT_NONE
3315+ if self.cert_file:
3316+ ssl_context.load_cert_chain(
3317+ keyfile=self.key_file, certfile=self.cert_file)
3318+ ssl_context.verify_mode = cert_reqs
3319+ ssl_sock = ssl_context.wrap_socket(
3320+ self.sock, server_hostname=self.host)
3321+ except ssl.SSLError:
3322+ trace.note(
3323+ "\n"
3324+ "See `brz help ssl.ca_certs` for how to specify trusted CA"
3325+ "certificates.\n"
3326+ "Pass -Ossl.cert_reqs=none to disable certificate "
3327+ "verification entirely.\n")
3328+ raise
3329+ # Wrap the ssl socket before anybody use it
3330+ self._wrap_socket_for_reporting(ssl_sock)
3331+
3332+
3333+class Request(urllib_request.Request):
3334+ """A custom Request object.
3335+
3336+ urllib_request determines the request method heuristically (based on
3337+ the presence or absence of data). We set the method
3338+ statically.
3339+
3340+ The Request object tracks:
3341+ - the connection the request will be made on.
3342+ - the authentication parameters needed to preventively set
3343+ the authentication header once a first authentication have
3344+ been made.
3345+ """
3346+
3347+ def __init__(self, method, url, data=None, headers={},
3348+ origin_req_host=None, unverifiable=False,
3349+ connection=None, parent=None):
3350+ urllib_request.Request.__init__(
3351+ self, url, data, headers,
3352+ origin_req_host, unverifiable)
3353+ self.method = method
3354+ self.connection = connection
3355+ # To handle redirections
3356+ self.parent = parent
3357+ self.redirected_to = None
3358+ # Unless told otherwise, redirections are not followed
3359+ self.follow_redirections = False
3360+ # auth and proxy_auth are dicts containing, at least
3361+ # (scheme, host, port, realm, user, password, protocol, path).
3362+ # The dict entries are mostly handled by the AuthHandler.
3363+ # Some authentication schemes may add more entries.
3364+ self.auth = {}
3365+ self.proxy_auth = {}
3366+ self.proxied_host = None
3367+
3368+ def get_method(self):
3369+ return self.method
3370+
3371+ def set_proxy(self, proxy, type):
3372+ """Set the proxy and remember the proxied host."""
3373+ if PY3:
3374+ host, port = splitport(self.host)
3375+ else:
3376+ host, port = splitport(self.get_host())
3377+ if port is None:
3378+ # We need to set the default port ourselves way before it gets set
3379+ # in the HTTP[S]Connection object at build time.
3380+ if self.type == 'https':
3381+ conn_class = HTTPSConnection
3382+ else:
3383+ conn_class = HTTPConnection
3384+ port = conn_class.default_port
3385+ self.proxied_host = '%s:%s' % (host, port)
3386+ urllib_request.Request.set_proxy(self, proxy, type)
3387+ # When urllib_request makes a https request with our wrapper code and a proxy,
3388+ # it sets Host to the https proxy, not the host we want to talk to.
3389+ # I'm fairly sure this is our fault, but what is the cause is an open
3390+ # question. -- Robert Collins May 8 2010.
3391+ self.add_unredirected_header('Host', self.proxied_host)
3392+
3393+
3394+class _ConnectRequest(Request):
3395+
3396+ def __init__(self, request):
3397+ """Constructor
3398+
3399+ :param request: the first request sent to the proxied host, already
3400+ processed by the opener (i.e. proxied_host is already set).
3401+ """
3402+ # We give a fake url and redefine selector or urllib_request will be
3403+ # confused
3404+ Request.__init__(self, 'CONNECT', request.get_full_url(),
3405+ connection=request.connection)
3406+ if request.proxied_host is None:
3407+ raise AssertionError()
3408+ self.proxied_host = request.proxied_host
3409+
3410+ @property
3411+ def selector(self):
3412+ return self.proxied_host
3413+
3414+ def get_selector(self):
3415+ return self.selector
3416+
3417+ def set_proxy(self, proxy, type):
3418+ """Set the proxy without remembering the proxied host.
3419+
3420+ We already know the proxied host by definition, the CONNECT request
3421+ occurs only when the connection goes through a proxy. The usual
3422+ processing (masquerade the request so that the connection is done to
3423+ the proxy while the request is targeted at another host) does not apply
3424+ here. In fact, the connection is already established with proxy and we
3425+ just want to enable the SSL tunneling.
3426+ """
3427+ urllib_request.Request.set_proxy(self, proxy, type)
3428+
3429+
3430+class ConnectionHandler(urllib_request.BaseHandler):
3431+ """Provides connection-sharing by pre-processing requests.
3432+
3433+ urllib_request provides no way to access the HTTPConnection object
3434+ internally used. But we need it in order to achieve
3435+ connection sharing. So, we add it to the request just before
3436+ it is processed, and then we override the do_open method for
3437+ http[s] requests in AbstractHTTPHandler.
3438+ """
3439+
3440+ handler_order = 1000 # after all pre-processings
3441+
3442+ def __init__(self, report_activity=None, ca_certs=None):
3443+ self._report_activity = report_activity
3444+ self.ca_certs = ca_certs
3445+
3446+ def create_connection(self, request, http_connection_class):
3447+ host = request.host
3448+ if not host:
3449+ # Just a bit of paranoia here, this should have been
3450+ # handled in the higher levels
3451+ raise urlutils.InvalidURL(request.get_full_url(), 'no host given.')
3452+
3453+ # We create a connection (but it will not connect until the first
3454+ # request is made)
3455+ try:
3456+ connection = http_connection_class(
3457+ host, proxied_host=request.proxied_host,
3458+ report_activity=self._report_activity,
3459+ ca_certs=self.ca_certs)
3460+ except http_client.InvalidURL as exception:
3461+ # There is only one occurrence of InvalidURL in http_client
3462+ raise urlutils.InvalidURL(request.get_full_url(),
3463+ extra='nonnumeric port')
3464+
3465+ return connection
3466+
3467+ def capture_connection(self, request, http_connection_class):
3468+ """Capture or inject the request connection.
3469+
3470+ Two cases:
3471+ - the request have no connection: create a new one,
3472+
3473+ - the request have a connection: this one have been used
3474+ already, let's capture it, so that we can give it to
3475+ another transport to be reused. We don't do that
3476+ ourselves: the Transport object get the connection from
3477+ a first request and then propagate it, from request to
3478+ request or to cloned transports.
3479+ """
3480+ connection = request.connection
3481+ if connection is None:
3482+ # Create a new one
3483+ connection = self.create_connection(request, http_connection_class)
3484+ request.connection = connection
3485+
3486+ # All connections will pass here, propagate debug level
3487+ connection.set_debuglevel(DEBUG)
3488+ return request
3489+
3490+ def http_request(self, request):
3491+ return self.capture_connection(request, HTTPConnection)
3492+
3493+ def https_request(self, request):
3494+ return self.capture_connection(request, HTTPSConnection)
3495+
3496+
3497+class AbstractHTTPHandler(urllib_request.AbstractHTTPHandler):
3498+ """A custom handler for HTTP(S) requests.
3499+
3500+ We overrive urllib_request.AbstractHTTPHandler to get a better
3501+ control of the connection, the ability to implement new
3502+ request types and return a response able to cope with
3503+ persistent connections.
3504+ """
3505+
3506+ # We change our order to be before urllib_request HTTP[S]Handlers
3507+ # and be chosen instead of them (the first http_open called
3508+ # wins).
3509+ handler_order = 400
3510+
3511+ _default_headers = {'Pragma': 'no-cache',
3512+ 'Cache-control': 'max-age=0',
3513+ 'Connection': 'Keep-Alive',
3514+ 'User-agent': default_user_agent(),
3515+ 'Accept': '*/*',
3516+ }
3517+
3518+ def __init__(self):
3519+ urllib_request.AbstractHTTPHandler.__init__(self, debuglevel=DEBUG)
3520+
3521+ def http_request(self, request):
3522+ """Common headers setting"""
3523+
3524+ for name, value in self._default_headers.items():
3525+ if name not in request.headers:
3526+ request.headers[name] = value
3527+ # FIXME: We may have to add the Content-Length header if
3528+ # we have data to send.
3529+ return request
3530+
3531+ def retry_or_raise(self, http_class, request, first_try):
3532+ """Retry the request (once) or raise the exception.
3533+
3534+ urllib_request raises exception of application level kind, we
3535+ just have to translate them.
3536+
3537+ http_client can raise exceptions of transport level (badly
3538+ formatted dialog, loss of connexion or socket level
3539+ problems). In that case we should issue the request again
3540+ (http_client will close and reopen a new connection if
3541+ needed).
3542+ """
3543+ # When an exception occurs, we give back the original
3544+ # Traceback or the bugs are hard to diagnose.
3545+ exc_type, exc_val, exc_tb = sys.exc_info()
3546+ if exc_type == socket.gaierror:
3547+ # No need to retry, that will not help
3548+ if PY3:
3549+ origin_req_host = request.origin_req_host
3550+ else:
3551+ origin_req_host = request.get_origin_req_host()
3552+ raise errors.ConnectionError("Couldn't resolve host '%s'"
3553+ % origin_req_host,
3554+ orig_error=exc_val)
3555+ elif isinstance(exc_val, http_client.ImproperConnectionState):
3556+ # The http_client pipeline is in incorrect state, it's a bug in our
3557+ # implementation.
3558+ reraise(exc_type, exc_val, exc_tb)
3559+ else:
3560+ if first_try:
3561+ if self._debuglevel >= 2:
3562+ print('Received exception: [%r]' % exc_val)
3563+ print(' On connection: [%r]' % request.connection)
3564+ method = request.get_method()
3565+ url = request.get_full_url()
3566+ print(' Will retry, %s %r' % (method, url))
3567+ request.connection.close()
3568+ response = self.do_open(http_class, request, False)
3569+ else:
3570+ if self._debuglevel >= 2:
3571+ print('Received second exception: [%r]' % exc_val)
3572+ print(' On connection: [%r]' % request.connection)
3573+ if exc_type in (http_client.BadStatusLine, http_client.UnknownProtocol):
3574+ # http_client.BadStatusLine and
3575+ # http_client.UnknownProtocol indicates that a
3576+ # bogus server was encountered or a bad
3577+ # connection (i.e. transient errors) is
3578+ # experimented, we have already retried once
3579+ # for that request so we raise the exception.
3580+ my_exception = errors.InvalidHttpResponse(
3581+ request.get_full_url(),
3582+ 'Bad status line received',
3583+ orig_error=exc_val)
3584+ elif (isinstance(exc_val, socket.error) and len(exc_val.args)
3585+ and exc_val.args[0] in (errno.ECONNRESET, 10053, 10054)):
3586+ # 10053 == WSAECONNABORTED
3587+ # 10054 == WSAECONNRESET
3588+ raise errors.ConnectionReset(
3589+ "Connection lost while sending request.")
3590+ else:
3591+ # All other exception are considered connection related.
3592+
3593+ # socket errors generally occurs for reasons
3594+ # far outside our scope, so closing the
3595+ # connection and retrying is the best we can
3596+ # do.
3597+ if PY3:
3598+ selector = request.selector
3599+ else:
3600+ selector = request.get_selector()
3601+ my_exception = errors.ConnectionError(
3602+ msg='while sending %s %s:' % (request.get_method(),
3603+ selector),
3604+ orig_error=exc_val)
3605+
3606+ if self._debuglevel >= 2:
3607+ print('On connection: [%r]' % request.connection)
3608+ method = request.get_method()
3609+ url = request.get_full_url()
3610+ print(' Failed again, %s %r' % (method, url))
3611+ print(' Will raise: [%r]' % my_exception)
3612+ reraise(type(my_exception), my_exception, exc_tb)
3613+ return response
3614+
3615+ def do_open(self, http_class, request, first_try=True):
3616+ """See urllib_request.AbstractHTTPHandler.do_open for the general idea.
3617+
3618+ The request will be retried once if it fails.
3619+ """
3620+ connection = request.connection
3621+ if connection is None:
3622+ raise AssertionError(
3623+ 'Cannot process a request without a connection')
3624+
3625+ # Get all the headers
3626+ headers = {}
3627+ headers.update(request.header_items())
3628+ headers.update(request.unredirected_hdrs)
3629+ # Some servers or proxies will choke on headers not properly
3630+ # cased. http_client/urllib/urllib_request all use capitalize to get canonical
3631+ # header names, but only python2.5 urllib_request use title() to fix them just
3632+ # before sending the request. And not all versions of python 2.5 do
3633+ # that. Since we replace urllib_request.AbstractHTTPHandler.do_open we do it
3634+ # ourself below.
3635+ headers = {name.title(): val for name, val in headers.items()}
3636+
3637+ try:
3638+ method = request.get_method()
3639+ if PY3:
3640+ url = request.selector
3641+ else:
3642+ url = request.get_selector()
3643+ if sys.version_info[:2] >= (3, 6):
3644+ connection._send_request(method, url,
3645+ # FIXME: implements 100-continue
3646+ # None, # We don't send the body yet
3647+ request.data,
3648+ headers, encode_chunked=False)
3649+ else:
3650+ connection._send_request(method, url,
3651+ # FIXME: implements 100-continue
3652+ # None, # We don't send the body yet
3653+ request.data,
3654+ headers)
3655+ if 'http' in debug.debug_flags:
3656+ trace.mutter('> %s %s' % (method, url))
3657+ hdrs = []
3658+ for k, v in headers.items():
3659+ # People are often told to paste -Dhttp output to help
3660+ # debug. Don't compromise credentials.
3661+ if k in ('Authorization', 'Proxy-Authorization'):
3662+ v = '<masked>'
3663+ hdrs.append('%s: %s' % (k, v))
3664+ trace.mutter('> ' + '\n> '.join(hdrs) + '\n')
3665+ if self._debuglevel >= 1:
3666+ print('Request sent: [%r] from (%s)'
3667+ % (request, request.connection.sock.getsockname()))
3668+ response = connection.getresponse()
3669+ convert_to_addinfourl = True
3670+ except (ssl.SSLError, ssl.CertificateError):
3671+ # Something is wrong with either the certificate or the hostname,
3672+ # re-trying won't help
3673+ raise
3674+ except (socket.gaierror, http_client.BadStatusLine, http_client.UnknownProtocol,
3675+ socket.error, http_client.HTTPException):
3676+ response = self.retry_or_raise(http_class, request, first_try)
3677+ convert_to_addinfourl = False
3678+
3679+ if PY3:
3680+ response.msg = response.reason
3681+ return response
3682+
3683+# FIXME: HTTPConnection does not fully support 100-continue (the
3684+# server responses are just ignored)
3685+
3686+# if code == 100:
3687+# mutter('Will send the body')
3688+# # We can send the body now
3689+# body = request.data
3690+# if body is None:
3691+# raise URLError("No data given")
3692+# connection.send(body)
3693+# response = connection.getresponse()
3694+
3695+ if self._debuglevel >= 2:
3696+ print('Receives response: %r' % response)
3697+ print(' For: %r(%r)' % (request.get_method(),
3698+ request.get_full_url()))
3699+
3700+ if convert_to_addinfourl:
3701+ # Shamelessly copied from urllib_request
3702+ req = request
3703+ r = response
3704+ r.recv = r.read
3705+ fp = socket._fileobject(r, bufsize=65536)
3706+ resp = addinfourl(fp, r.msg, req.get_full_url())
3707+ resp.code = r.status
3708+ resp.msg = r.reason
3709+ resp.version = r.version
3710+ if self._debuglevel >= 2:
3711+ print('Create addinfourl: %r' % resp)
3712+ print(' For: %r(%r)' % (request.get_method(),
3713+ request.get_full_url()))
3714+ if 'http' in debug.debug_flags:
3715+ version = 'HTTP/%d.%d'
3716+ try:
3717+ version = version % (resp.version / 10,
3718+ resp.version % 10)
3719+ except:
3720+ version = 'HTTP/%r' % resp.version
3721+ trace.mutter('< %s %s %s' % (version, resp.code,
3722+ resp.msg))
3723+ # Use the raw header lines instead of treating resp.info() as a
3724+ # dict since we may miss duplicated headers otherwise.
3725+ hdrs = [h.rstrip('\r\n') for h in resp.info().headers]
3726+ trace.mutter('< ' + '\n< '.join(hdrs) + '\n')
3727+ else:
3728+ resp = response
3729+ return resp
3730+
3731+
3732+class HTTPHandler(AbstractHTTPHandler):
3733+ """A custom handler that just thunks into HTTPConnection"""
3734+
3735+ def http_open(self, request):
3736+ return self.do_open(HTTPConnection, request)
3737+
3738+
3739+class HTTPSHandler(AbstractHTTPHandler):
3740+ """A custom handler that just thunks into HTTPSConnection"""
3741+
3742+ https_request = AbstractHTTPHandler.http_request
3743+
3744+ def https_open(self, request):
3745+ connection = request.connection
3746+ if connection.sock is None and \
3747+ connection.proxied_host is not None and \
3748+ request.get_method() != 'CONNECT': # Don't loop
3749+ # FIXME: We need a gazillion connection tests here, but we still
3750+ # miss a https server :-( :
3751+ # - with and without proxy
3752+ # - with and without certificate
3753+ # - with self-signed certificate
3754+ # - with and without authentication
3755+ # - with good and bad credentials (especially the proxy auth around
3756+ # CONNECT)
3757+ # - with basic and digest schemes
3758+ # - reconnection on errors
3759+ # - connection persistence behaviour (including reconnection)
3760+
3761+ # We are about to connect for the first time via a proxy, we must
3762+ # issue a CONNECT request first to establish the encrypted link
3763+ connect = _ConnectRequest(request)
3764+ response = self.parent.open(connect)
3765+ if response.code != 200:
3766+ raise errors.ConnectionError("Can't connect to %s via proxy %s" % (
3767+ connect.proxied_host, self.host))
3768+ # Housekeeping
3769+ connection.cleanup_pipe()
3770+ # Establish the connection encryption
3771+ connection.connect_to_origin()
3772+ # Propagate the connection to the original request
3773+ request.connection = connection
3774+ return self.do_open(HTTPSConnection, request)
3775+
3776+
3777+class HTTPRedirectHandler(urllib_request.HTTPRedirectHandler):
3778+ """Handles redirect requests.
3779+
3780+ We have to implement our own scheme because we use a specific
3781+ Request object and because we want to implement a specific
3782+ policy.
3783+ """
3784+ _debuglevel = DEBUG
3785+ # RFC2616 says that only read requests should be redirected
3786+ # without interacting with the user. But Breezy uses some
3787+ # shortcuts to optimize against roundtrips which can leads to
3788+ # write requests being issued before read requests of
3789+ # containing dirs can be redirected. So we redirect write
3790+ # requests in the same way which seems to respect the spirit
3791+ # of the RFC if not its letter.
3792+
3793+ def redirect_request(self, req, fp, code, msg, headers, newurl):
3794+ """See urllib_request.HTTPRedirectHandler.redirect_request"""
3795+ # We would have preferred to update the request instead
3796+ # of creating a new one, but the urllib_request.Request object
3797+ # has a too complicated creation process to provide a
3798+ # simple enough equivalent update process. Instead, when
3799+ # redirecting, we only update the following request in
3800+ # the redirect chain with a reference to the parent
3801+ # request .
3802+
3803+ # Some codes make no sense in our context and are treated
3804+ # as errors:
3805+
3806+ # 300: Multiple choices for different representations of
3807+ # the URI. Using that mechanisn with Breezy will violate the
3808+ # protocol neutrality of Transport.
3809+
3810+ # 304: Not modified (SHOULD only occurs with conditional
3811+ # GETs which are not used by our implementation)
3812+
3813+ # 305: Use proxy. I can't imagine this one occurring in
3814+ # our context-- vila/20060909
3815+
3816+ # 306: Unused (if the RFC says so...)
3817+
3818+ # If the code is 302 and the request is HEAD, some may
3819+ # think that it is a sufficent hint that the file exists
3820+ # and that we MAY avoid following the redirections. But
3821+ # if we want to be sure, we MUST follow them.
3822+
3823+ if PY3:
3824+ origin_req_host = req.origin_req_host
3825+ else:
3826+ origin_req_host = req.get_origin_req_host()
3827+
3828+ if code in (301, 302, 303, 307, 308):
3829+ return Request(req.get_method(), newurl,
3830+ headers=req.headers,
3831+ origin_req_host=origin_req_host,
3832+ unverifiable=True,
3833+ # TODO: It will be nice to be able to
3834+ # detect virtual hosts sharing the same
3835+ # IP address, that will allow us to
3836+ # share the same connection...
3837+ connection=None,
3838+ parent=req,
3839+ )
3840+ else:
3841+ raise urllib_request.HTTPError(
3842+ req.get_full_url(), code, msg, headers, fp)
3843+
3844+ def http_error_302(self, req, fp, code, msg, headers):
3845+ """Requests the redirected to URI.
3846+
3847+ Copied from urllib_request to be able to clean the pipe of the associated
3848+ connection, *before* issuing the redirected request but *after* having
3849+ eventually raised an error.
3850+ """
3851+ # Some servers (incorrectly) return multiple Location headers
3852+ # (so probably same goes for URI). Use first header.
3853+
3854+ # TODO: Once we get rid of addinfourl objects, the
3855+ # following will need to be updated to use correct case
3856+ # for headers.
3857+ if 'location' in headers:
3858+ newurl = headers.get('location')
3859+ elif 'uri' in headers:
3860+ newurl = headers.get('uri')
3861+ else:
3862+ return
3863+
3864+ newurl = urljoin(req.get_full_url(), newurl)
3865+
3866+ if self._debuglevel >= 1:
3867+ print('Redirected to: %s (followed: %r)' % (newurl,
3868+ req.follow_redirections))
3869+ if req.follow_redirections is False:
3870+ req.redirected_to = newurl
3871+ return fp
3872+
3873+ # This call succeeds or raise an error. urllib_request returns
3874+ # if redirect_request returns None, but our
3875+ # redirect_request never returns None.
3876+ redirected_req = self.redirect_request(req, fp, code, msg, headers,
3877+ newurl)
3878+
3879+ # loop detection
3880+ # .redirect_dict has a key url if url was previously visited.
3881+ if hasattr(req, 'redirect_dict'):
3882+ visited = redirected_req.redirect_dict = req.redirect_dict
3883+ if (visited.get(newurl, 0) >= self.max_repeats or
3884+ len(visited) >= self.max_redirections):
3885+ raise urllib_request.HTTPError(req.get_full_url(), code,
3886+ self.inf_msg + msg, headers, fp)
3887+ else:
3888+ visited = redirected_req.redirect_dict = req.redirect_dict = {}
3889+ visited[newurl] = visited.get(newurl, 0) + 1
3890+
3891+ # We can close the fp now that we are sure that we won't
3892+ # use it with HTTPError.
3893+ fp.close()
3894+ # We have all we need already in the response
3895+ req.connection.cleanup_pipe()
3896+
3897+ return self.parent.open(redirected_req)
3898+
3899+ http_error_301 = http_error_303 = http_error_307 = http_error_308 = http_error_302
3900+
3901+
3902+class ProxyHandler(urllib_request.ProxyHandler):
3903+ """Handles proxy setting.
3904+
3905+ Copied and modified from urllib_request to be able to modify the request during
3906+ the request pre-processing instead of modifying it at _open time. As we
3907+ capture (or create) the connection object during request processing, _open
3908+ time was too late.
3909+
3910+ The main task is to modify the request so that the connection is done to
3911+ the proxy while the request still refers to the destination host.
3912+
3913+ Note: the proxy handling *may* modify the protocol used; the request may be
3914+ against an https server proxied through an http proxy. So, https_request
3915+ will be called, but later it's really http_open that will be called. This
3916+ explains why we don't have to call self.parent.open as the urllib_request did.
3917+ """
3918+
3919+ # Proxies must be in front
3920+ handler_order = 100
3921+ _debuglevel = DEBUG
3922+
3923+ def __init__(self, proxies=None):
3924+ urllib_request.ProxyHandler.__init__(self, proxies)
3925+ # First, let's get rid of urllib_request implementation
3926+ for type, proxy in self.proxies.items():
3927+ if self._debuglevel >= 3:
3928+ print('Will unbind %s_open for %r' % (type, proxy))
3929+ delattr(self, '%s_open' % type)
3930+
3931+ def bind_scheme_request(proxy, scheme):
3932+ if proxy is None:
3933+ return
3934+ scheme_request = scheme + '_request'
3935+ if self._debuglevel >= 3:
3936+ print('Will bind %s for %r' % (scheme_request, proxy))
3937+ setattr(self, scheme_request,
3938+ lambda request: self.set_proxy(request, scheme))
3939+ # We are interested only by the http[s] proxies
3940+ http_proxy = self.get_proxy_env_var('http')
3941+ bind_scheme_request(http_proxy, 'http')
3942+ https_proxy = self.get_proxy_env_var('https')
3943+ bind_scheme_request(https_proxy, 'https')
3944+
3945+ def get_proxy_env_var(self, name, default_to='all'):
3946+ """Get a proxy env var.
3947+
3948+ Note that we indirectly rely on
3949+ urllib.getproxies_environment taking into account the
3950+ uppercased values for proxy variables.
3951+ """
3952+ try:
3953+ return self.proxies[name.lower()]
3954+ except KeyError:
3955+ if default_to is not None:
3956+ # Try to get the alternate environment variable
3957+ try:
3958+ return self.proxies[default_to]
3959+ except KeyError:
3960+ pass
3961+ return None
3962+
3963+ def proxy_bypass(self, host):
3964+ """Check if host should be proxied or not.
3965+
3966+ :returns: True to skip the proxy, False otherwise.
3967+ """
3968+ no_proxy = self.get_proxy_env_var('no', default_to=None)
3969+ bypass = self.evaluate_proxy_bypass(host, no_proxy)
3970+ if bypass is None:
3971+ # Nevertheless, there are platform-specific ways to
3972+ # ignore proxies...
3973+ return urllib_request.proxy_bypass(host)
3974+ else:
3975+ return bypass
3976+
3977+ def evaluate_proxy_bypass(self, host, no_proxy):
3978+ """Check the host against a comma-separated no_proxy list as a string.
3979+
3980+ :param host: ``host:port`` being requested
3981+
3982+ :param no_proxy: comma-separated list of hosts to access directly.
3983+
3984+ :returns: True to skip the proxy, False not to, or None to
3985+ leave it to urllib.
3986+ """
3987+ if no_proxy is None:
3988+ # All hosts are proxied
3989+ return False
3990+ hhost, hport = splitport(host)
3991+ # Does host match any of the domains mentioned in
3992+ # no_proxy ? The rules about what is authorized in no_proxy
3993+ # are fuzzy (to say the least). We try to allow most
3994+ # commonly seen values.
3995+ for domain in no_proxy.split(','):
3996+ domain = domain.strip()
3997+ if domain == '':
3998+ continue
3999+ dhost, dport = splitport(domain)
4000+ if hport == dport or dport is None:
4001+ # Protect glob chars
4002+ dhost = dhost.replace(".", r"\.")
4003+ dhost = dhost.replace("*", r".*")
4004+ dhost = dhost.replace("?", r".")
4005+ if re.match(dhost, hhost, re.IGNORECASE):
4006+ return True
4007+ # Nothing explicitly avoid the host
4008+ return None
4009+
4010+ def set_proxy(self, request, type):
4011+ if PY3:
4012+ host = request.host
4013+ else:
4014+ host = request.get_host()
4015+ if self.proxy_bypass(host):
4016+ return request
4017+
4018+ proxy = self.get_proxy_env_var(type)
4019+ if self._debuglevel >= 3:
4020+ print('set_proxy %s_request for %r' % (type, proxy))
4021+ # FIXME: python 2.5 urlparse provides a better _parse_proxy which can
4022+ # grok user:password@host:port as well as
4023+ # http://user:password@host:port
4024+
4025+ parsed_url = transport.ConnectedTransport._split_url(proxy)
4026+ if not parsed_url.host:
4027+ raise urlutils.InvalidURL(proxy, 'No host component')
4028+
4029+ if request.proxy_auth == {}:
4030+ # No proxy auth parameter are available, we are handling the first
4031+ # proxied request, intialize. scheme (the authentication scheme)
4032+ # and realm will be set by the AuthHandler
4033+ request.proxy_auth = {
4034+ 'host': parsed_url.host,
4035+ 'port': parsed_url.port,
4036+ 'user': parsed_url.user,
4037+ 'password': parsed_url.password,
4038+ 'protocol': parsed_url.scheme,
4039+ # We ignore path since we connect to a proxy
4040+ 'path': None}
4041+ if parsed_url.port is None:
4042+ phost = parsed_url.host
4043+ else:
4044+ phost = parsed_url.host + ':%d' % parsed_url.port
4045+ request.set_proxy(phost, type)
4046+ if self._debuglevel >= 3:
4047+ print('set_proxy: proxy set to %s://%s' % (type, phost))
4048+ return request
4049+
4050+
4051+class AbstractAuthHandler(urllib_request.BaseHandler):
4052+ """A custom abstract authentication handler for all http authentications.
4053+
4054+ Provides the meat to handle authentication errors and
4055+ preventively set authentication headers after the first
4056+ successful authentication.
4057+
4058+ This can be used for http and proxy, as well as for basic, negotiate and
4059+ digest authentications.
4060+
4061+ This provides an unified interface for all authentication handlers
4062+ (urllib_request provides far too many with different policies).
4063+
4064+ The interaction between this handler and the urllib_request
4065+ framework is not obvious, it works as follow:
4066+
4067+ opener.open(request) is called:
4068+
4069+ - that may trigger http_request which will add an authentication header
4070+ (self.build_header) if enough info is available.
4071+
4072+ - the request is sent to the server,
4073+
4074+ - if an authentication error is received self.auth_required is called,
4075+ we acquire the authentication info in the error headers and call
4076+ self.auth_match to check that we are able to try the
4077+ authentication and complete the authentication parameters,
4078+
4079+ - we call parent.open(request), that may trigger http_request
4080+ and will add a header (self.build_header), but here we have
4081+ all the required info (keep in mind that the request and
4082+ authentication used in the recursive calls are really (and must be)
4083+ the *same* objects).
4084+
4085+ - if the call returns a response, the authentication have been
4086+ successful and the request authentication parameters have been updated.
4087+ """
4088+
4089+ scheme = None
4090+ """The scheme as it appears in the server header (lower cased)"""
4091+
4092+ _max_retry = 3
4093+ """We don't want to retry authenticating endlessly"""
4094+
4095+ requires_username = True
4096+ """Whether the auth mechanism requires a username."""
4097+
4098+ # The following attributes should be defined by daughter
4099+ # classes:
4100+ # - auth_required_header: the header received from the server
4101+ # - auth_header: the header sent in the request
4102+
4103+ def __init__(self):
4104+ # We want to know when we enter into an try/fail cycle of
4105+ # authentications so we initialize to None to indicate that we aren't
4106+ # in such a cycle by default.
4107+ self._retry_count = None
4108+
4109+ def _parse_auth_header(self, server_header):
4110+ """Parse the authentication header.
4111+
4112+ :param server_header: The value of the header sent by the server
4113+ describing the authenticaion request.
4114+
4115+ :return: A tuple (scheme, remainder) scheme being the first word in the
4116+ given header (lower cased), remainder may be None.
4117+ """
4118+ try:
4119+ scheme, remainder = server_header.split(None, 1)
4120+ except ValueError:
4121+ scheme = server_header
4122+ remainder = None
4123+ return (scheme.lower(), remainder)
4124+
4125+ def update_auth(self, auth, key, value):
4126+ """Update a value in auth marking the auth as modified if needed"""
4127+ old_value = auth.get(key, None)
4128+ if old_value != value:
4129+ auth[key] = value
4130+ auth['modified'] = True
4131+
4132+ def auth_required(self, request, headers):
4133+ """Retry the request if the auth scheme is ours.
4134+
4135+ :param request: The request needing authentication.
4136+ :param headers: The headers for the authentication error response.
4137+ :return: None or the response for the authenticated request.
4138+ """
4139+ # Don't try to authenticate endlessly
4140+ if self._retry_count is None:
4141+ # The retry being recusrsive calls, None identify the first retry
4142+ self._retry_count = 1
4143+ else:
4144+ self._retry_count += 1
4145+ if self._retry_count > self._max_retry:
4146+ # Let's be ready for next round
4147+ self._retry_count = None
4148+ return None
4149+ if PY3:
4150+ server_headers = headers.get_all(self.auth_required_header)
4151+ else:
4152+ server_headers = headers.getheaders(self.auth_required_header)
4153+ if not server_headers:
4154+ # The http error MUST have the associated
4155+ # header. This must never happen in production code.
4156+ trace.mutter('%s not found', self.auth_required_header)
4157+ return None
4158+
4159+ auth = self.get_auth(request)
4160+ auth['modified'] = False
4161+ # Put some common info in auth if the caller didn't
4162+ if auth.get('path', None) is None:
4163+ parsed_url = urlutils.URL.from_string(request.get_full_url())
4164+ self.update_auth(auth, 'protocol', parsed_url.scheme)
4165+ self.update_auth(auth, 'host', parsed_url.host)
4166+ self.update_auth(auth, 'port', parsed_url.port)
4167+ self.update_auth(auth, 'path', parsed_url.path)
4168+ # FIXME: the auth handler should be selected at a single place instead
4169+ # of letting all handlers try to match all headers, but the current
4170+ # design doesn't allow a simple implementation.
4171+ for server_header in server_headers:
4172+ # Several schemes can be proposed by the server, try to match each
4173+ # one in turn
4174+ matching_handler = self.auth_match(server_header, auth)
4175+ if matching_handler:
4176+ # auth_match may have modified auth (by adding the
4177+ # password or changing the realm, for example)
4178+ if (request.get_header(self.auth_header, None) is not None
4179+ and not auth['modified']):
4180+ # We already tried that, give up
4181+ return None
4182+
4183+ # Only the most secure scheme proposed by the server should be
4184+ # used, since the handlers use 'handler_order' to describe that
4185+ # property, the first handler tried takes precedence, the
4186+ # others should not attempt to authenticate if the best one
4187+ # failed.
4188+ best_scheme = auth.get('best_scheme', None)
4189+ if best_scheme is None:
4190+ # At that point, if current handler should doesn't succeed
4191+ # the credentials are wrong (or incomplete), but we know
4192+ # that the associated scheme should be used.
4193+ best_scheme = auth['best_scheme'] = self.scheme
4194+ if best_scheme != self.scheme:
4195+ continue
4196+
4197+ if self.requires_username and auth.get('user', None) is None:
4198+ # Without a known user, we can't authenticate
4199+ return None
4200+
4201+ # Housekeeping
4202+ request.connection.cleanup_pipe()
4203+ # Retry the request with an authentication header added
4204+ response = self.parent.open(request)
4205+ if response:
4206+ self.auth_successful(request, response)
4207+ return response
4208+ # We are not qualified to handle the authentication.
4209+ # Note: the authentication error handling will try all
4210+ # available handlers. If one of them authenticates
4211+ # successfully, a response will be returned. If none of
4212+ # them succeeds, None will be returned and the error
4213+ # handler will raise the 401 'Unauthorized' or the 407
4214+ # 'Proxy Authentication Required' error.
4215+ return None
4216+
4217+ def add_auth_header(self, request, header):
4218+ """Add the authentication header to the request"""
4219+ request.add_unredirected_header(self.auth_header, header)
4220+
4221+ def auth_match(self, header, auth):
4222+ """Check that we are able to handle that authentication scheme.
4223+
4224+ The request authentication parameters may need to be
4225+ updated with info from the server. Some of these
4226+ parameters, when combined, are considered to be the
4227+ authentication key, if one of them change the
4228+ authentication result may change. 'user' and 'password'
4229+ are exampls, but some auth schemes may have others
4230+ (digest's nonce is an example, digest's nonce_count is a
4231+ *counter-example*). Such parameters must be updated by
4232+ using the update_auth() method.
4233+
4234+ :param header: The authentication header sent by the server.
4235+ :param auth: The auth parameters already known. They may be
4236+ updated.
4237+ :returns: True if we can try to handle the authentication.
4238+ """
4239+ raise NotImplementedError(self.auth_match)
4240+
4241+ def build_auth_header(self, auth, request):
4242+ """Build the value of the header used to authenticate.
4243+
4244+ :param auth: The auth parameters needed to build the header.
4245+ :param request: The request needing authentication.
4246+
4247+ :return: None or header.
4248+ """
4249+ raise NotImplementedError(self.build_auth_header)
4250+
4251+ def auth_successful(self, request, response):
4252+ """The authentification was successful for the request.
4253+
4254+ Additional infos may be available in the response.
4255+
4256+ :param request: The succesfully authenticated request.
4257+ :param response: The server response (may contain auth info).
4258+ """
4259+ # It may happen that we need to reconnect later, let's be ready
4260+ self._retry_count = None
4261+
4262+ def get_user_password(self, auth):
4263+ """Ask user for a password if none is already available.
4264+
4265+ :param auth: authentication info gathered so far (from the initial url
4266+ and then during dialog with the server).
4267+ """
4268+ auth_conf = config.AuthenticationConfig()
4269+ user = auth.get('user', None)
4270+ password = auth.get('password', None)
4271+ realm = auth['realm']
4272+ port = auth.get('port', None)
4273+
4274+ if user is None:
4275+ user = auth_conf.get_user(auth['protocol'], auth['host'],
4276+ port=port, path=auth['path'],
4277+ realm=realm, ask=True,
4278+ prompt=self.build_username_prompt(auth))
4279+ if user is not None and password is None:
4280+ password = auth_conf.get_password(
4281+ auth['protocol'], auth['host'], user,
4282+ port=port,
4283+ path=auth['path'], realm=realm,
4284+ prompt=self.build_password_prompt(auth))
4285+
4286+ return user, password
4287+
4288+ def _build_password_prompt(self, auth):
4289+ """Build a prompt taking the protocol used into account.
4290+
4291+ The AuthHandler is used by http and https, we want that information in
4292+ the prompt, so we build the prompt from the authentication dict which
4293+ contains all the needed parts.
4294+
4295+ Also, http and proxy AuthHandlers present different prompts to the
4296+ user. The daughter classes should implements a public
4297+ build_password_prompt using this method.
4298+ """
4299+ prompt = u'%s' % auth['protocol'].upper() + u' %(user)s@%(host)s'
4300+ realm = auth['realm']
4301+ if realm is not None:
4302+ prompt += u", Realm: '%s'" % realm
4303+ prompt += u' password'
4304+ return prompt
4305+
4306+ def _build_username_prompt(self, auth):
4307+ """Build a prompt taking the protocol used into account.
4308+
4309+ The AuthHandler is used by http and https, we want that information in
4310+ the prompt, so we build the prompt from the authentication dict which
4311+ contains all the needed parts.
4312+
4313+ Also, http and proxy AuthHandlers present different prompts to the
4314+ user. The daughter classes should implements a public
4315+ build_username_prompt using this method.
4316+ """
4317+ prompt = u'%s' % auth['protocol'].upper() + u' %(host)s'
4318+ realm = auth['realm']
4319+ if realm is not None:
4320+ prompt += u", Realm: '%s'" % realm
4321+ prompt += u' username'
4322+ return prompt
4323+
4324+ def http_request(self, request):
4325+ """Insert an authentication header if information is available"""
4326+ auth = self.get_auth(request)
4327+ if self.auth_params_reusable(auth):
4328+ self.add_auth_header(
4329+ request, self.build_auth_header(auth, request))
4330+ return request
4331+
4332+ https_request = http_request # FIXME: Need test
4333+
4334+
4335+class NegotiateAuthHandler(AbstractAuthHandler):
4336+ """A authentication handler that handles WWW-Authenticate: Negotiate.
4337+
4338+ At the moment this handler supports just Kerberos. In the future,
4339+ NTLM support may also be added.
4340+ """
4341+
4342+ scheme = 'negotiate'
4343+ handler_order = 480
4344+ requires_username = False
4345+
4346+ def auth_match(self, header, auth):
4347+ scheme, raw_auth = self._parse_auth_header(header)
4348+ if scheme != self.scheme:
4349+ return False
4350+ self.update_auth(auth, 'scheme', scheme)
4351+ resp = self._auth_match_kerberos(auth)
4352+ if resp is None:
4353+ return False
4354+ # Optionally should try to authenticate using NTLM here
4355+ self.update_auth(auth, 'negotiate_response', resp)
4356+ return True
4357+
4358+ def _auth_match_kerberos(self, auth):
4359+ """Try to create a GSSAPI response for authenticating against a host."""
4360+ global kerberos, checked_kerberos
4361+ if kerberos is None and not checked_kerberos:
4362+ try:
4363+ import kerberos
4364+ except ImportError:
4365+ kerberos = None
4366+ checked_kerberos = True
4367+ if kerberos is None:
4368+ return None
4369+ ret, vc = kerberos.authGSSClientInit("HTTP@%(host)s" % auth)
4370+ if ret < 1:
4371+ trace.warning('Unable to create GSSAPI context for %s: %d',
4372+ auth['host'], ret)
4373+ return None
4374+ ret = kerberos.authGSSClientStep(vc, "")
4375+ if ret < 0:
4376+ trace.mutter('authGSSClientStep failed: %d', ret)
4377+ return None
4378+ return kerberos.authGSSClientResponse(vc)
4379+
4380+ def build_auth_header(self, auth, request):
4381+ return "Negotiate %s" % auth['negotiate_response']
4382+
4383+ def auth_params_reusable(self, auth):
4384+ # If the auth scheme is known, it means a previous
4385+ # authentication was successful, all information is
4386+ # available, no further checks are needed.
4387+ return (auth.get('scheme', None) == 'negotiate' and
4388+ auth.get('negotiate_response', None) is not None)
4389+
4390+
4391+class BasicAuthHandler(AbstractAuthHandler):
4392+ """A custom basic authentication handler."""
4393+
4394+ scheme = 'basic'
4395+ handler_order = 500
4396+ auth_regexp = re.compile('realm="([^"]*)"', re.I)
4397+
4398+ def build_auth_header(self, auth, request):
4399+ raw = '%s:%s' % (auth['user'], auth['password'])
4400+ auth_header = 'Basic ' + \
4401+ base64.b64encode(raw.encode('utf-8')).decode('ascii')
4402+ return auth_header
4403+
4404+ def extract_realm(self, header_value):
4405+ match = self.auth_regexp.search(header_value)
4406+ realm = None
4407+ if match:
4408+ realm = match.group(1)
4409+ return match, realm
4410+
4411+ def auth_match(self, header, auth):
4412+ scheme, raw_auth = self._parse_auth_header(header)
4413+ if scheme != self.scheme:
4414+ return False
4415+
4416+ match, realm = self.extract_realm(raw_auth)
4417+ if match:
4418+ # Put useful info into auth
4419+ self.update_auth(auth, 'scheme', scheme)
4420+ self.update_auth(auth, 'realm', realm)
4421+ if (auth.get('user', None) is None
4422+ or auth.get('password', None) is None):
4423+ user, password = self.get_user_password(auth)
4424+ self.update_auth(auth, 'user', user)
4425+ self.update_auth(auth, 'password', password)
4426+ return match is not None
4427+
4428+ def auth_params_reusable(self, auth):
4429+ # If the auth scheme is known, it means a previous
4430+ # authentication was successful, all information is
4431+ # available, no further checks are needed.
4432+ return auth.get('scheme', None) == 'basic'
4433+
4434+
4435+def get_digest_algorithm_impls(algorithm):
4436+ H = None
4437+ KD = None
4438+ if algorithm == 'MD5':
4439+ def H(x): return osutils.md5(x).hexdigest()
4440+ elif algorithm == 'SHA':
4441+ H = osutils.sha_string
4442+ if H is not None:
4443+ def KD(secret, data): return H(
4444+ ("%s:%s" % (secret, data)).encode('utf-8'))
4445+ return H, KD
4446+
4447+
4448+def get_new_cnonce(nonce, nonce_count):
4449+ raw = '%s:%d:%s:%s' % (nonce, nonce_count, time.ctime(),
4450+ osutils.rand_chars(8))
4451+ return osutils.sha_string(raw.encode('utf-8'))[:16]
4452+
4453+
4454+class DigestAuthHandler(AbstractAuthHandler):
4455+ """A custom digest authentication handler."""
4456+
4457+ scheme = 'digest'
4458+ # Before basic as digest is a bit more secure and should be preferred
4459+ handler_order = 490
4460+
4461+ def auth_params_reusable(self, auth):
4462+ # If the auth scheme is known, it means a previous
4463+ # authentication was successful, all information is
4464+ # available, no further checks are needed.
4465+ return auth.get('scheme', None) == 'digest'
4466+
4467+ def auth_match(self, header, auth):
4468+ scheme, raw_auth = self._parse_auth_header(header)
4469+ if scheme != self.scheme:
4470+ return False
4471+
4472+ # Put the requested authentication info into a dict
4473+ req_auth = urllib_request.parse_keqv_list(
4474+ urllib_request.parse_http_list(raw_auth))
4475+
4476+ # Check that we can handle that authentication
4477+ qop = req_auth.get('qop', None)
4478+ if qop != 'auth': # No auth-int so far
4479+ return False
4480+
4481+ H, KD = get_digest_algorithm_impls(req_auth.get('algorithm', 'MD5'))
4482+ if H is None:
4483+ return False
4484+
4485+ realm = req_auth.get('realm', None)
4486+ # Put useful info into auth
4487+ self.update_auth(auth, 'scheme', scheme)
4488+ self.update_auth(auth, 'realm', realm)
4489+ if auth.get('user', None) is None or auth.get('password', None) is None:
4490+ user, password = self.get_user_password(auth)
4491+ self.update_auth(auth, 'user', user)
4492+ self.update_auth(auth, 'password', password)
4493+
4494+ try:
4495+ if req_auth.get('algorithm', None) is not None:
4496+ self.update_auth(auth, 'algorithm', req_auth.get('algorithm'))
4497+ nonce = req_auth['nonce']
4498+ if auth.get('nonce', None) != nonce:
4499+ # A new nonce, never used
4500+ self.update_auth(auth, 'nonce_count', 0)
4501+ self.update_auth(auth, 'nonce', nonce)
4502+ self.update_auth(auth, 'qop', qop)
4503+ auth['opaque'] = req_auth.get('opaque', None)
4504+ except KeyError:
4505+ # Some required field is not there
4506+ return False
4507+
4508+ return True
4509+
4510+ def build_auth_header(self, auth, request):
4511+ if PY3:
4512+ selector = request.selector
4513+ else:
4514+ selector = request.get_selector()
4515+ url_scheme, url_selector = splittype(selector)
4516+ sel_host, uri = splithost(url_selector)
4517+
4518+ A1 = ('%s:%s:%s' %
4519+ (auth['user'], auth['realm'], auth['password'])).encode('utf-8')
4520+ A2 = ('%s:%s' % (request.get_method(), uri)).encode('utf-8')
4521+
4522+ nonce = auth['nonce']
4523+ qop = auth['qop']
4524+
4525+ nonce_count = auth['nonce_count'] + 1
4526+ ncvalue = '%08x' % nonce_count
4527+ cnonce = get_new_cnonce(nonce, nonce_count)
4528+
4529+ H, KD = get_digest_algorithm_impls(auth.get('algorithm', 'MD5'))
4530+ nonce_data = '%s:%s:%s:%s:%s' % (nonce, ncvalue, cnonce, qop, H(A2))
4531+ request_digest = KD(H(A1), nonce_data)
4532+
4533+ header = 'Digest '
4534+ header += 'username="%s", realm="%s", nonce="%s"' % (auth['user'],
4535+ auth['realm'],
4536+ nonce)
4537+ header += ', uri="%s"' % uri
4538+ header += ', cnonce="%s", nc=%s' % (cnonce, ncvalue)
4539+ header += ', qop="%s"' % qop
4540+ header += ', response="%s"' % request_digest
4541+ # Append the optional fields
4542+ opaque = auth.get('opaque', None)
4543+ if opaque:
4544+ header += ', opaque="%s"' % opaque
4545+ if auth.get('algorithm', None):
4546+ header += ', algorithm="%s"' % auth.get('algorithm')
4547+
4548+ # We have used the nonce once more, update the count
4549+ auth['nonce_count'] = nonce_count
4550+
4551+ return header
4552+
4553+
4554+class HTTPAuthHandler(AbstractAuthHandler):
4555+ """Custom http authentication handler.
4556+
4557+ Send the authentication preventively to avoid the roundtrip
4558+ associated with the 401 error and keep the revelant info in
4559+ the auth request attribute.
4560+ """
4561+
4562+ auth_required_header = 'www-authenticate'
4563+ auth_header = 'Authorization'
4564+
4565+ def get_auth(self, request):
4566+ """Get the auth params from the request"""
4567+ return request.auth
4568+
4569+ def set_auth(self, request, auth):
4570+ """Set the auth params for the request"""
4571+ request.auth = auth
4572+
4573+ def build_password_prompt(self, auth):
4574+ return self._build_password_prompt(auth)
4575+
4576+ def build_username_prompt(self, auth):
4577+ return self._build_username_prompt(auth)
4578+
4579+ def http_error_401(self, req, fp, code, msg, headers):
4580+ return self.auth_required(req, headers)
4581+
4582+
4583+class ProxyAuthHandler(AbstractAuthHandler):
4584+ """Custom proxy authentication handler.
4585+
4586+ Send the authentication preventively to avoid the roundtrip
4587+ associated with the 407 error and keep the revelant info in
4588+ the proxy_auth request attribute..
4589+ """
4590+
4591+ auth_required_header = 'proxy-authenticate'
4592+ # FIXME: the correct capitalization is Proxy-Authorization,
4593+ # but python-2.4 urllib_request.Request insist on using capitalize()
4594+ # instead of title().
4595+ auth_header = 'Proxy-authorization'
4596+
4597+ def get_auth(self, request):
4598+ """Get the auth params from the request"""
4599+ return request.proxy_auth
4600+
4601+ def set_auth(self, request, auth):
4602+ """Set the auth params for the request"""
4603+ request.proxy_auth = auth
4604+
4605+ def build_password_prompt(self, auth):
4606+ prompt = self._build_password_prompt(auth)
4607+ prompt = u'Proxy ' + prompt
4608+ return prompt
4609+
4610+ def build_username_prompt(self, auth):
4611+ prompt = self._build_username_prompt(auth)
4612+ prompt = u'Proxy ' + prompt
4613+ return prompt
4614+
4615+ def http_error_407(self, req, fp, code, msg, headers):
4616+ return self.auth_required(req, headers)
4617+
4618+
4619+class HTTPBasicAuthHandler(BasicAuthHandler, HTTPAuthHandler):
4620+ """Custom http basic authentication handler"""
4621+
4622+
4623+class ProxyBasicAuthHandler(BasicAuthHandler, ProxyAuthHandler):
4624+ """Custom proxy basic authentication handler"""
4625+
4626+
4627+class HTTPDigestAuthHandler(DigestAuthHandler, HTTPAuthHandler):
4628+ """Custom http basic authentication handler"""
4629+
4630+
4631+class ProxyDigestAuthHandler(DigestAuthHandler, ProxyAuthHandler):
4632+ """Custom proxy basic authentication handler"""
4633+
4634+
4635+class HTTPNegotiateAuthHandler(NegotiateAuthHandler, HTTPAuthHandler):
4636+ """Custom http negotiate authentication handler"""
4637+
4638+
4639+class ProxyNegotiateAuthHandler(NegotiateAuthHandler, ProxyAuthHandler):
4640+ """Custom proxy negotiate authentication handler"""
4641+
4642+
4643+class HTTPErrorProcessor(urllib_request.HTTPErrorProcessor):
4644+ """Process HTTP error responses.
4645+
4646+ We don't really process the errors, quite the contrary
4647+ instead, we leave our Transport handle them.
4648+ """
4649+
4650+ accepted_errors = [200, # Ok
4651+ 201,
4652+ 202,
4653+ 204,
4654+ 206, # Partial content
4655+ 400,
4656+ 403,
4657+ 404, # Not found
4658+ 405, # Method not allowed
4659+ 406, # Not Acceptable
4660+ 409, # Conflict
4661+ 416, # Range not satisfiable
4662+ 422, # Unprocessible entity
4663+ 501, # Not implemented
4664+ ]
4665+ """The error codes the caller will handle.
4666+
4667+ This can be specialized in the request on a case-by case basis, but the
4668+ common cases are covered here.
4669+ """
4670+
4671+ def http_response(self, request, response):
4672+ code, msg, hdrs = response.code, response.msg, response.info()
4673+
4674+ if code not in self.accepted_errors:
4675+ response = self.parent.error('http', request, response,
4676+ code, msg, hdrs)
4677+ return response
4678+
4679+ https_response = http_response
4680+
4681+
4682+class HTTPDefaultErrorHandler(urllib_request.HTTPDefaultErrorHandler):
4683+ """Translate common errors into Breezy Exceptions"""
4684+
4685+ def http_error_default(self, req, fp, code, msg, hdrs):
4686+ if code == 403:
4687+ raise errors.TransportError(
4688+ 'Server refuses to fulfill the request (403 Forbidden)'
4689+ ' for %s' % req.get_full_url())
4690+ else:
4691+ raise errors.UnexpectedHttpStatus(
4692+ req.get_full_url(), code,
4693+ 'Unable to handle http code: %s' % msg)
4694+
4695+
4696+class Opener(object):
4697+ """A wrapper around urllib_request.build_opener
4698+
4699+ Daughter classes can override to build their own specific opener
4700+ """
4701+ # TODO: Provides hooks for daughter classes.
4702+
4703+ def __init__(self,
4704+ connection=ConnectionHandler,
4705+ redirect=HTTPRedirectHandler,
4706+ error=HTTPErrorProcessor,
4707+ report_activity=None,
4708+ ca_certs=None):
4709+ self._opener = urllib_request.build_opener(
4710+ connection(report_activity=report_activity, ca_certs=ca_certs),
4711+ redirect, error,
4712+ ProxyHandler(),
4713+ HTTPBasicAuthHandler(),
4714+ HTTPDigestAuthHandler(),
4715+ HTTPNegotiateAuthHandler(),
4716+ ProxyBasicAuthHandler(),
4717+ ProxyDigestAuthHandler(),
4718+ ProxyNegotiateAuthHandler(),
4719+ HTTPHandler,
4720+ HTTPSHandler,
4721+ HTTPDefaultErrorHandler,
4722+ )
4723+
4724+ self.open = self._opener.open
4725+ if DEBUG >= 9:
4726+ # When dealing with handler order, it's easy to mess
4727+ # things up, the following will help understand which
4728+ # handler is used, when and for what.
4729+ import pprint
4730+ pprint.pprint(self._opener.__dict__)
4731+
4732+
4733+class HttpTransport(ConnectedTransport):
4734+ """HTTP Client implementations.
4735+
4736+ The protocol can be given as e.g. http+urllib://host/ to use a particular
4737+ implementation.
4738+ """
4739+
4740+ # _unqualified_scheme: "http" or "https"
4741+ # _scheme: may have "+pycurl", etc
4742+
4743+ # In order to debug we have to issue our traces in sync with
4744+ # httplib, which use print :(
4745+ _debuglevel = 0
4746+
4747+ def __init__(self, base, _from_transport=None, ca_certs=None):
4748+ """Set the base path where files will be stored."""
4749+ proto_match = re.match(r'^(https?)(\+\w+)?://', base)
4750+ if not proto_match:
4751+ raise AssertionError("not a http url: %r" % base)
4752+ self._unqualified_scheme = proto_match.group(1)
4753+ super(HttpTransport, self).__init__(
4754+ base, _from_transport=_from_transport)
4755+ self._medium = None
4756+ # range hint is handled dynamically throughout the life
4757+ # of the transport object. We start by trying multi-range
4758+ # requests and if the server returns bogus results, we
4759+ # retry with single range requests and, finally, we
4760+ # forget about range if the server really can't
4761+ # understand. Once acquired, this piece of info is
4762+ # propagated to clones.
4763+ if _from_transport is not None:
4764+ self._range_hint = _from_transport._range_hint
4765+ self._opener = _from_transport._opener
4766+ else:
4767+ self._range_hint = 'multi'
4768+ self._opener = Opener(
4769+ report_activity=self._report_activity, ca_certs=ca_certs)
4770+
4771+ def request(self, method, url, fields=None, headers=None, **urlopen_kw):
4772+ body = urlopen_kw.pop('body', None)
4773+ if fields is not None:
4774+ data = urlencode(fields).encode()
4775+ if body is not None:
4776+ raise ValueError(
4777+ 'body and fields are mutually exclusive')
4778+ else:
4779+ data = body
4780+ if headers is None:
4781+ headers = {}
4782+ request = Request(method, url, data, headers)
4783+ request.follow_redirections = (urlopen_kw.pop('retries', 0) > 0)
4784+ if urlopen_kw:
4785+ raise NotImplementedError(
4786+ 'unknown arguments: %r' % urlopen_kw.keys())
4787+ connection = self._get_connection()
4788+ if connection is not None:
4789+ # Give back shared info
4790+ request.connection = connection
4791+ (auth, proxy_auth) = self._get_credentials()
4792+ # Clean the httplib.HTTPConnection pipeline in case the previous
4793+ # request couldn't do it
4794+ connection.cleanup_pipe()
4795+ else:
4796+ # First request, initialize credentials.
4797+ # scheme and realm will be set by the _urllib2_wrappers.AuthHandler
4798+ auth = self._create_auth()
4799+ # Proxy initialization will be done by the first proxied request
4800+ proxy_auth = dict()
4801+ # Ensure authentication info is provided
4802+ request.auth = auth
4803+ request.proxy_auth = proxy_auth
4804+
4805+ if self._debuglevel > 0:
4806+ print('perform: %s base: %s, url: %s' % (request.method, self.base,
4807+ request.get_full_url()))
4808+ response = self._opener.open(request)
4809+ if self._get_connection() is not request.connection:
4810+ # First connection or reconnection
4811+ self._set_connection(request.connection,
4812+ (request.auth, request.proxy_auth))
4813+ else:
4814+ # http may change the credentials while keeping the
4815+ # connection opened
4816+ self._update_credentials((request.auth, request.proxy_auth))
4817+
4818+ code = response.code
4819+ if (request.follow_redirections is False
4820+ and code in (301, 302, 303, 307, 308)):
4821+ raise errors.RedirectRequested(request.get_full_url(),
4822+ request.redirected_to,
4823+ is_permanent=(code in (301, 308)))
4824+
4825+ if request.redirected_to is not None:
4826+ trace.mutter('redirected from: %s to: %s' % (request.get_full_url(),
4827+ request.redirected_to))
4828+
4829+ class Urllib3LikeResponse(object):
4830+
4831+ def __init__(self, actual):
4832+ self._actual = actual
4833+ self._data = None
4834+
4835+ def getheader(self, name, default=None):
4836+ if self._actual.headers is None:
4837+ raise http_client.ResponseNotReady()
4838+ if PY3:
4839+ return self._actual.headers.get(name, default)
4840+ else:
4841+ return self._actual.headers.getheader(name, default)
4842+
4843+ def getheaders(self):
4844+ if self._actual.headers is None:
4845+ raise http_client.ResponseNotReady()
4846+ return list(self._actual.headers.items())
4847+
4848+ @property
4849+ def status(self):
4850+ return self._actual.code
4851+
4852+ @property
4853+ def reason(self):
4854+ return self._actual.reason
4855+
4856+ @property
4857+ def data(self):
4858+ if self._data is None:
4859+ self._data = self._actual.read()
4860+ return self._data
4861+
4862+ @property
4863+ def text(self):
4864+ if self.status == 204:
4865+ return None
4866+ charset = cgi.parse_header(
4867+ self._actual.headers['Content-Type'])[1].get('charset')
4868+ if charset:
4869+ return self.data.decode(charset)
4870+ else:
4871+ return self.data.decode()
4872+
4873+ def read(self, amt=None):
4874+ return self._actual.read(amt)
4875+
4876+ def readlines(self):
4877+ return self._actual.readlines()
4878+
4879+ def readline(self, size=-1):
4880+ return self._actual.readline(size)
4881+
4882+ return Urllib3LikeResponse(response)
4883+
4884+ def disconnect(self):
4885+ connection = self._get_connection()
4886+ if connection is not None:
4887+ connection.close()
4888+
4889+ def has(self, relpath):
4890+ """Does the target location exist?
4891+ """
4892+ response = self._head(relpath)
4893+
4894+ code = response.status
4895+ if code == 200: # "ok",
4896+ return True
4897+ else:
4898+ return False
4899+
4900+ def get(self, relpath):
4901+ """Get the file at the given relative path.
4902+
4903+ :param relpath: The relative path to the file
4904+ """
4905+ code, response_file = self._get(relpath, None)
4906+ return response_file
4907+
4908+ def _get(self, relpath, offsets, tail_amount=0):
4909+ """Get a file, or part of a file.
4910+
4911+ :param relpath: Path relative to transport base URL
4912+ :param offsets: None to get the whole file;
4913+ or a list of _CoalescedOffset to fetch parts of a file.
4914+ :param tail_amount: The amount to get from the end of the file.
4915+
4916+ :returns: (http_code, result_file)
4917+ """
4918+ abspath = self._remote_path(relpath)
4919+ headers = {}
4920+ if offsets or tail_amount:
4921+ range_header = self._attempted_range_header(offsets, tail_amount)
4922+ if range_header is not None:
4923+ bytes = 'bytes=' + range_header
4924+ headers = {'Range': bytes}
4925+ else:
4926+ range_header = None
4927+
4928+ response = self.request('GET', abspath, headers=headers)
4929+
4930+ if response.status == 404: # not found
4931+ raise errors.NoSuchFile(abspath)
4932+ elif response.status == 416:
4933+ # We don't know which, but one of the ranges we specified was
4934+ # wrong.
4935+ raise errors.InvalidHttpRange(abspath, range_header,
4936+ 'Server return code %d' % response.status)
4937+ elif response.status == 400:
4938+ if range_header:
4939+ # We don't know which, but one of the ranges we specified was
4940+ # wrong.
4941+ raise errors.InvalidHttpRange(
4942+ abspath, range_header,
4943+ 'Server return code %d' % response.status)
4944+ else:
4945+ raise errors.BadHttpRequest(abspath, response.reason)
4946+ elif response.status not in (200, 206):
4947+ raise errors.UnexpectedHttpStatus(abspath, response.status)
4948+
4949+ data = handle_response(
4950+ abspath, response.status, response.getheader, response)
4951+ return response.status, data
4952+
4953+ def _remote_path(self, relpath):
4954+ """See ConnectedTransport._remote_path.
4955+
4956+ user and passwords are not embedded in the path provided to the server.
4957+ """
4958+ url = self._parsed_url.clone(relpath)
4959+ url.user = url.quoted_user = None
4960+ url.password = url.quoted_password = None
4961+ url.scheme = self._unqualified_scheme
4962+ return str(url)
4963+
4964+ def _create_auth(self):
4965+ """Returns a dict containing the credentials provided at build time."""
4966+ auth = dict(host=self._parsed_url.host, port=self._parsed_url.port,
4967+ user=self._parsed_url.user, password=self._parsed_url.password,
4968+ protocol=self._unqualified_scheme,
4969+ path=self._parsed_url.path)
4970+ return auth
4971+
4972+ def get_smart_medium(self):
4973+ """See Transport.get_smart_medium."""
4974+ if self._medium is None:
4975+ # Since medium holds some state (smart server probing at least), we
4976+ # need to keep it around. Note that this is needed because medium
4977+ # has the same 'base' attribute as the transport so it can't be
4978+ # shared between transports having different bases.
4979+ self._medium = SmartClientHTTPMedium(self)
4980+ return self._medium
4981+
4982+ def _degrade_range_hint(self, relpath, ranges):
4983+ if self._range_hint == 'multi':
4984+ self._range_hint = 'single'
4985+ mutter('Retry "%s" with single range request' % relpath)
4986+ elif self._range_hint == 'single':
4987+ self._range_hint = None
4988+ mutter('Retry "%s" without ranges' % relpath)
4989+ else:
4990+ # We tried all the tricks, but nothing worked, caller must reraise.
4991+ return False
4992+ return True
4993+
4994+ # _coalesce_offsets is a helper for readv, it try to combine ranges without
4995+ # degrading readv performances. _bytes_to_read_before_seek is the value
4996+ # used for the limit parameter and has been tuned for other transports. For
4997+ # HTTP, the name is inappropriate but the parameter is still useful and
4998+ # helps reduce the number of chunks in the response. The overhead for a
4999+ # chunk (headers, length, footer around the data itself is variable but
5000+ # around 50 bytes. We use 128 to reduce the range specifiers that appear in
The diff has been truncated for viewing.

Subscribers

People subscribed via source and target branches