Merge lp:~jelmer/brz/merge-3.1 into lp:brz

Proposed by Jelmer Vernooij on 2021-01-10
Status: Merged
Approved by: Jelmer Vernooij on 2021-01-10
Approved revision: no longer in the source branch.
Merge reported by: The Breezy Bot
Merged at revision: not available
Proposed branch: lp:~jelmer/brz/merge-3.1
Merge into: lp:brz
Diff against target: 5888 lines (+2723/-2543)
37 files modified
breezy/__main__.py (+0/-2)
breezy/_static_tuple_c.c (+2/-2)
breezy/bzr/tests/test_bzrdir.py (+1/-1)
breezy/bzr/tests/test_smart_transport.py (+2/-2)
breezy/git/branch.py (+1/-1)
breezy/git/git-remote-bzr.1 (+1/-1)
breezy/git/git_remote_helper.py (+1/-2)
breezy/git/interrepo.py (+3/-3)
breezy/git/repository.py (+1/-0)
breezy/git/tests/test_git_remote_helper.py (+0/-1)
breezy/gpg.py (+2/-0)
breezy/location.py (+5/-3)
breezy/patches.py (+28/-6)
breezy/plugins/bash_completion/bashcomp.py (+0/-2)
breezy/plugins/fossil/__init__.py (+1/-1)
breezy/plugins/zsh_completion/zshcomp.py (+0/-2)
breezy/python-compat.h (+4/-0)
breezy/repository.py (+1/-0)
breezy/tests/__init__.py (+1/-1)
breezy/tests/per_repository/test_repository.py (+5/-0)
breezy/tests/ssl_certs/create_ssls.py (+2/-2)
breezy/tests/test_http.py (+24/-17)
breezy/tests/test_http_response.py (+3/-3)
breezy/tests/test_location.py (+6/-0)
breezy/tests/test_patches.py (+16/-5)
breezy/tests/test_patches_data/binary.patch (+1/-1)
breezy/tests/test_selftest.py (+1/-1)
breezy/tests/test_source.py (+8/-1)
breezy/tests/test_transport.py (+2/-1)
breezy/tests/test_urlutils.py (+0/-2)
breezy/transport/__init__.py (+4/-4)
breezy/transport/http/__init__.py (+3/-2471)
breezy/transport/http/urllib.py (+2583/-0)
doc/developers/integration.txt (+1/-1)
doc/developers/repository-stream.txt (+2/-2)
doc/en/release-notes/brz-3.1.txt (+6/-0)
doc/en/user-guide/hooks.txt (+2/-2)
To merge this branch: bzr merge lp:~jelmer/brz/merge-3.1
Reviewer Review Type Date Requested Status
Jelmer Vernooij Approve on 2021-01-10
Review via email: mp+396037@code.launchpad.net

Commit message

Description of the change

To post a comment you must log in.
Jelmer Vernooij (jelmer) :
review: Approve
lp:~jelmer/brz/merge-3.1 updated on 2021-01-10
7527. By Jelmer Vernooij on 2021-01-10

Merge lp:brz/3.1

Merged from https://code.launchpad.net/~jelmer/brz/merge-3.1/+merge/396037

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'breezy/__main__.py'
2--- breezy/__main__.py 2020-05-05 23:32:39 +0000
3+++ breezy/__main__.py 2021-01-10 01:22:43 +0000
4@@ -1,5 +1,3 @@
5-#! /usr/bin/env python3
6-
7 # Copyright (C) 2005-2013, 2016, 2017 Canonical Ltd
8 # Copyright (C) 2018-2020 Breezy Developers
9 #
10
11=== modified file 'breezy/_static_tuple_c.c'
12--- breezy/_static_tuple_c.c 2019-11-18 01:30:13 +0000
13+++ breezy/_static_tuple_c.c 2021-01-10 01:22:43 +0000
14@@ -97,7 +97,7 @@
15 self->flags |= STATIC_TUPLE_INTERNED_FLAG;
16 // The two references in the dict do not count, so that the StaticTuple
17 // object does not become immortal just because it was interned.
18- Py_REFCNT(self) -= 1;
19+ Py_SET_REFCNT(self, Py_REFCNT(self) - 1);
20 return self;
21 }
22
23@@ -116,7 +116,7 @@
24
25 if (_StaticTuple_is_interned(self)) {
26 /* revive dead object temporarily for Discard */
27- Py_REFCNT(self) = 2;
28+ Py_SET_REFCNT(self, 2);
29 if (SimpleSet_Discard(_interned_tuples, (PyObject*)self) != 1)
30 Py_FatalError("deletion of interned StaticTuple failed");
31 self->flags &= ~STATIC_TUPLE_INTERNED_FLAG;
32
33=== modified file 'breezy/bzr/tests/test_bzrdir.py'
34--- breezy/bzr/tests/test_bzrdir.py 2020-07-26 15:29:07 +0000
35+++ breezy/bzr/tests/test_bzrdir.py 2021-01-10 01:22:43 +0000
36@@ -68,7 +68,7 @@
37 memory,
38 pathfilter,
39 )
40-from ...transport.http import HttpTransport
41+from ...transport.http.urllib import HttpTransport
42 from ...transport.nosmart import NoSmartTransportDecorator
43 from ...transport.readonly import ReadonlyTransportDecorator
44 from .. import knitrepo, knitpack_repo
45
46=== modified file 'breezy/bzr/tests/test_smart_transport.py'
47--- breezy/bzr/tests/test_smart_transport.py 2020-06-10 23:47:24 +0000
48+++ breezy/bzr/tests/test_smart_transport.py 2021-01-10 01:22:43 +0000
49@@ -56,12 +56,12 @@
50 test_server,
51 )
52 from ...transport import (
53- http,
54 local,
55 memory,
56 remote,
57 ssh,
58 )
59+from ...transport.http import urllib
60 from . import (
61 test_smart,
62 )
63@@ -4273,7 +4273,7 @@
64
65 def test_smart_http_medium_request_accept_bytes(self):
66 medium = FakeHTTPMedium()
67- request = http.SmartClientHTTPMediumRequest(medium)
68+ request = urllib.SmartClientHTTPMediumRequest(medium)
69 request.accept_bytes(b'abc')
70 request.accept_bytes(b'def')
71 self.assertEqual(None, medium.written_request)
72
73=== modified file 'breezy/git/branch.py'
74--- breezy/git/branch.py 2020-07-18 23:14:00 +0000
75+++ breezy/git/branch.py 2021-01-10 01:22:43 +0000
76@@ -1451,7 +1451,7 @@
77 ret.append((None, v))
78 ret.append((None, stop_revision))
79 try:
80- revidmap = self.interrepo.fetch_objects(ret, lossy=lossy, limit=limit)
81+ revidmap = self.interrepo.fetch_revs(ret, lossy=lossy, limit=limit)
82 except NoPushSupport:
83 raise errors.NoRoundtrippingSupport(self.source, self.target)
84 return _mod_repository.FetchResult(revidmap={
85
86=== modified file 'breezy/git/git-remote-bzr.1'
87--- breezy/git/git-remote-bzr.1 2018-03-26 22:28:24 +0000
88+++ breezy/git/git-remote-bzr.1 2021-01-10 01:22:43 +0000
89@@ -19,7 +19,7 @@
90 This command provides support for using \fIbzr\fR repositories as Git remotes, through the breezy-git plugin. At the moment it supports cloning from, fetching from and pushing into Bazaar repositories. Fetch support is still experimental, and may be slow.
91 .SH "BUGS"
92 .sp
93-Please report bugs at \fUhttps://launchpad.net/brz-git/+filebug\fR
94+Please report bugs at \fUhttps://launchpad.net/brz/+filebug\fR
95 .SH "LICENSE"
96 breezy-git and git-remote-bzr are licensed under the GNU GPL, version 2 or later.
97 .SH "SEE ALSO"
98
99=== modified file 'breezy/git/git_remote_helper.py'
100--- breezy/git/git_remote_helper.py 2020-02-18 01:57:45 +0000
101+++ breezy/git/git_remote_helper.py 2021-01-10 01:22:43 +0000
102@@ -1,4 +1,3 @@
103-#!/usr/bin/env python3
104 # vim: expandtab
105
106 # Copyright (C) 2011-2018 Jelmer Vernooij <jelmer@jelmer.uk>
107@@ -78,7 +77,7 @@
108 lossy = False
109 else:
110 lossy = True
111- inter.fetch_objects(revs, lossy=lossy)
112+ inter.fetch_revs(revs, lossy=lossy)
113 outf.write(b"\n")
114
115
116
117=== modified file 'breezy/git/interrepo.py'
118--- breezy/git/interrepo.py 2020-11-18 02:15:43 +0000
119+++ breezy/git/interrepo.py 2021-01-10 01:22:43 +0000
120@@ -262,7 +262,7 @@
121 with self.source_store.lock_read():
122 old_refs = self._get_target_bzr_refs()
123 new_refs = update_refs(old_refs)
124- revidmap = self.fetch_objects(
125+ revidmap = self.fetch_revs(
126 [(git_sha, bzr_revid)
127 for (git_sha, bzr_revid) in new_refs.values()
128 if git_sha is None or not git_sha.startswith(SYMREF)],
129@@ -286,7 +286,7 @@
130 result_refs[name] = (gitid, revid if not lossy else self.mapping.revision_id_foreign_to_bzr(gitid))
131 return revidmap, old_refs, result_refs
132
133- def fetch_objects(self, revs, lossy, limit=None):
134+ def fetch_revs(self, revs, lossy, limit=None):
135 if not lossy and not self.mapping.roundtripping:
136 for git_sha, bzr_revid in revs:
137 if (bzr_revid is not None and
138@@ -332,7 +332,7 @@
139 for revid in self.source.all_revision_ids()]
140 self._warn_slow()
141 try:
142- revidmap = self.fetch_objects(stop_revisions, lossy=lossy)
143+ revidmap = self.fetch_revs(stop_revisions, lossy=lossy)
144 except NoPushSupport:
145 raise NoRoundtrippingSupport(self.source, self.target)
146 return FetchResult(revidmap)
147
148=== modified file 'breezy/git/repository.py'
149--- breezy/git/repository.py 2020-11-18 02:15:43 +0000
150+++ breezy/git/repository.py 2021-01-10 01:22:43 +0000
151@@ -575,6 +575,7 @@
152 supports_overriding_transport = False
153 supports_custom_revision_properties = False
154 records_per_file_revision = False
155+ supports_multiple_authors = False
156
157 @property
158 def _matchingcontroldir(self):
159
160=== modified file 'breezy/git/tests/test_git_remote_helper.py'
161--- breezy/git/tests/test_git_remote_helper.py 2020-06-19 21:26:53 +0000
162+++ breezy/git/tests/test_git_remote_helper.py 2021-01-10 01:22:43 +0000
163@@ -1,4 +1,3 @@
164-#!/usr/bin/env python3
165 # vim: expandtab
166
167 # Copyright (C) 2011-2018 Jelmer Vernooij <jelmer@jelmer.uk>
168
169=== modified file 'breezy/gpg.py'
170--- breezy/gpg.py 2020-02-18 01:57:45 +0000
171+++ breezy/gpg.py 2021-01-10 01:22:43 +0000
172@@ -251,6 +251,8 @@
173 }[mode])
174 except gpg.errors.GPGMEError as error:
175 raise SigningFailed(str(error))
176+ except gpg.errors.InvalidSigners as error:
177+ raise SigningFailed(str(error))
178
179 return output
180
181
182=== modified file 'breezy/location.py'
183--- breezy/location.py 2020-07-18 23:14:00 +0000
184+++ breezy/location.py 2021-01-10 01:22:43 +0000
185@@ -77,14 +77,16 @@
186
187 def parse_cvs_location(location):
188 parts = location.split(':')
189- if parts[0] or parts[1] not in ('pserver', 'ssh'):
190- raise ValueError('not a valid pserver location string')
191+ if parts[0] or parts[1] not in ('pserver', 'ssh', 'extssh'):
192+ raise ValueError('not a valid CVS location string')
193 try:
194 (username, hostname) = parts[2].split('@', 1)
195 except IndexError:
196 hostname = parts[2]
197 username = None
198 scheme = parts[1]
199+ if scheme == 'extssh':
200+ scheme = 'ssh'
201 path = parts[3]
202 return (scheme, hostname, username, path)
203
204@@ -119,7 +121,7 @@
205 if not isinstance(location, str):
206 raise AssertionError("location not a byte or unicode string")
207
208- if location.startswith(':pserver:'):
209+ if location.startswith(':pserver:') or location.startswith(':extssh:'):
210 return cvs_to_url(location)
211
212 from .directory_service import directories
213
214=== modified file 'breezy/patches.py'
215--- breezy/patches.py 2020-11-18 02:15:43 +0000
216+++ breezy/patches.py 2021-01-10 01:22:43 +0000
217@@ -87,6 +87,10 @@
218 raise MalformedPatchHeader("No orig name", line)
219 else:
220 orig_name = line[4:].rstrip(b"\n")
221+ try:
222+ (orig_name, orig_ts) = orig_name.split(b'\t')
223+ except ValueError:
224+ orig_ts = None
225 except StopIteration:
226 raise MalformedPatchHeader("No orig line", "")
227 try:
228@@ -95,9 +99,13 @@
229 raise PatchSyntax("No mod name")
230 else:
231 mod_name = line[4:].rstrip(b"\n")
232+ try:
233+ (mod_name, mod_ts) = mod_name.split(b'\t')
234+ except ValueError:
235+ mod_ts = None
236 except StopIteration:
237 raise MalformedPatchHeader("No mod line", "")
238- return (orig_name, mod_name)
239+ return ((orig_name, orig_ts), (mod_name, mod_ts))
240
241
242 def parse_range(textrange):
243@@ -318,13 +326,16 @@
244 self.newname = newname
245
246 def as_bytes(self):
247- return b'Binary files %s and %s differ\n' % (self.oldname, self.newname)
248+ return b'Binary files %s and %s differ\n' % (
249+ self.oldname, self.newname)
250
251
252 class Patch(BinaryPatch):
253
254- def __init__(self, oldname, newname):
255+ def __init__(self, oldname, newname, oldts=None, newts=None):
256 BinaryPatch.__init__(self, oldname, newname)
257+ self.oldts = oldts
258+ self.newts = newts
259 self.hunks = []
260
261 def as_bytes(self):
262@@ -332,8 +343,18 @@
263 ret += b"".join([h.as_bytes() for h in self.hunks])
264 return ret
265
266+ @classmethod
267+ def _headerline(cls, start, name, ts):
268+ l = start + b' ' + name
269+ if ts is not None:
270+ l += b'\t%s' % ts
271+ l += b'\n'
272+ return l
273+
274 def get_header(self):
275- return b"--- %s\n+++ %s\n" % (self.oldname, self.newname)
276+ return (
277+ self._headerline(b'---', self.oldname, self.oldts) +
278+ self._headerline(b'+++', self.newname, self.newts))
279
280 def stats_values(self):
281 """Calculate the number of inserts and removes."""
282@@ -385,11 +406,12 @@
283 '''
284 iter_lines = iter_lines_handle_nl(iter_lines)
285 try:
286- (orig_name, mod_name) = get_patch_names(iter_lines)
287+ ((orig_name, orig_ts), (mod_name, mod_ts)) = get_patch_names(
288+ iter_lines)
289 except BinaryFiles as e:
290 return BinaryPatch(e.orig_name, e.mod_name)
291 else:
292- patch = Patch(orig_name, mod_name)
293+ patch = Patch(orig_name, mod_name, orig_ts, mod_ts)
294 for hunk in iter_hunks(iter_lines, allow_dirty):
295 patch.hunks.append(hunk)
296 return patch
297
298=== modified file 'breezy/plugins/bash_completion/bashcomp.py'
299--- breezy/plugins/bash_completion/bashcomp.py 2020-02-18 01:57:45 +0000
300+++ breezy/plugins/bash_completion/bashcomp.py 2021-01-10 01:22:43 +0000
301@@ -1,5 +1,3 @@
302-#!/usr/bin/env python3
303-
304 # Copyright (C) 2009, 2010 Canonical Ltd
305 #
306 # This program is free software; you can redistribute it and/or modify
307
308=== modified file 'breezy/plugins/fossil/__init__.py'
309--- breezy/plugins/fossil/__init__.py 2020-08-10 15:00:17 +0000
310+++ breezy/plugins/fossil/__init__.py 2021-01-10 01:22:43 +0000
311@@ -69,7 +69,7 @@
312
313 @classmethod
314 def probe_transport(klass, transport):
315- from breezy.transport.http import HttpTransport
316+ from breezy.transport.http.urllib import HttpTransport
317 if not isinstance(transport, HttpTransport):
318 raise errors.NotBranchError(path=transport.base)
319 response = transport.request(
320
321=== modified file 'breezy/plugins/zsh_completion/zshcomp.py'
322--- breezy/plugins/zsh_completion/zshcomp.py 2020-02-18 01:57:45 +0000
323+++ breezy/plugins/zsh_completion/zshcomp.py 2021-01-10 01:22:43 +0000
324@@ -1,5 +1,3 @@
325-#!/usr/bin/env python3
326-
327 # Copyright (C) 2009, 2010 Canonical Ltd
328 #
329 # This program is free software; you can redistribute it and/or modify
330
331=== modified file 'breezy/python-compat.h'
332--- breezy/python-compat.h 2019-03-05 07:56:11 +0000
333+++ breezy/python-compat.h 2021-01-10 01:22:43 +0000
334@@ -119,4 +119,8 @@
335 #define strtoull _strtoui64
336 #endif
337
338+#if PY_VERSION_HEX < 0x030900A4
339+# define Py_SET_REFCNT(obj, refcnt) ((Py_REFCNT(obj) = (refcnt)), (void)0)
340+#endif
341+
342 #endif /* _BZR_PYTHON_COMPAT_H */
343
344=== modified file 'breezy/repository.py'
345--- breezy/repository.py 2020-07-18 23:14:00 +0000
346+++ breezy/repository.py 2021-01-10 01:22:43 +0000
347@@ -1331,6 +1331,7 @@
348 supports_custom_revision_properties = True
349 # Does the format record per-file revision metadata?
350 records_per_file_revision = True
351+ supports_multiple_authors = True
352
353 def __repr__(self):
354 return "%s()" % self.__class__.__name__
355
356=== modified file 'breezy/tests/__init__.py'
357--- breezy/tests/__init__.py 2020-07-28 02:11:05 +0000
358+++ breezy/tests/__init__.py 2021-01-10 01:22:43 +0000
359@@ -4169,7 +4169,7 @@
360 'breezy.tests',
361 'breezy.tests.fixtures',
362 'breezy.timestamp',
363- 'breezy.transport.http',
364+ 'breezy.transport.http.urllib',
365 'breezy.version_info_formats.format_custom',
366 ]
367
368
369=== modified file 'breezy/tests/per_repository/test_repository.py'
370--- breezy/tests/per_repository/test_repository.py 2020-07-18 23:14:00 +0000
371+++ breezy/tests/per_repository/test_repository.py 2021-01-10 01:22:43 +0000
372@@ -83,6 +83,11 @@
373 self.assertFormatAttribute('supports_nesting_repositories',
374 (True, False))
375
376+ def test_attribute_supports_multiple_authors(self):
377+ """Test the format.supports_multiple_authors."""
378+ self.assertFormatAttribute('supports_multiple_authors',
379+ (True, False))
380+
381 def test_attribute_supports_unreferenced_revisions(self):
382 """Test the format.supports_unreferenced_revisions."""
383 self.assertFormatAttribute('supports_unreferenced_revisions',
384
385=== modified file 'breezy/tests/ssl_certs/create_ssls.py'
386--- breezy/tests/ssl_certs/create_ssls.py 2018-11-23 02:12:35 +0000
387+++ breezy/tests/ssl_certs/create_ssls.py 2021-01-10 01:22:43 +0000
388@@ -1,4 +1,4 @@
389-#! /usr/bin/env python
390+#! /usr/bin/env python3
391
392 # Copyright (C) 2007, 2008, 2009, 2017 Canonical Ltd
393 #
394@@ -84,7 +84,7 @@
395 """
396 cmd = ['openssl'] + args
397 proc = Popen(cmd, stdin=PIPE)
398- (stdout, stderr) = proc.communicate(input)
399+ (stdout, stderr) = proc.communicate(input.encode('utf-8'))
400 if proc.returncode:
401 # Basic error handling, all commands should succeed
402 raise CalledProcessError(proc.returncode, cmd)
403
404=== modified file 'breezy/tests/test_http.py'
405--- breezy/tests/test_http.py 2020-02-07 02:14:30 +0000
406+++ breezy/tests/test_http.py 2021-01-10 01:22:43 +0000
407@@ -57,11 +57,18 @@
408 multiply_scenarios,
409 )
410 from ..transport import (
411- http,
412 remote,
413 )
414-from ..transport.http import (
415+from ..transport.http import urllib
416+from ..transport.http.urllib import (
417+ AbstractAuthHandler,
418+ BasicAuthHandler,
419 HttpTransport,
420+ HTTPAuthHandler,
421+ HTTPConnection,
422+ HTTPSConnection,
423+ ProxyHandler,
424+ Request,
425 )
426
427
428@@ -220,7 +227,7 @@
429
430 def parse_header(self, header, auth_handler_class=None):
431 if auth_handler_class is None:
432- auth_handler_class = http.AbstractAuthHandler
433+ auth_handler_class = AbstractAuthHandler
434 self.auth_handler = auth_handler_class()
435 return self.auth_handler._parse_auth_header(header)
436
437@@ -241,7 +248,7 @@
438 self.assertEqual('realm="Thou should not pass"', remainder)
439
440 def test_build_basic_header_with_long_creds(self):
441- handler = http.BasicAuthHandler()
442+ handler = BasicAuthHandler()
443 user = 'user' * 10 # length 40
444 password = 'password' * 5 # length 40
445 header = handler.build_auth_header(
446@@ -253,7 +260,7 @@
447 def test_basic_extract_realm(self):
448 scheme, remainder = self.parse_header(
449 'Basic realm="Thou should not pass"',
450- http.BasicAuthHandler)
451+ BasicAuthHandler)
452 match, realm = self.auth_handler.extract_realm(remainder)
453 self.assertTrue(match is not None)
454 self.assertEqual(u'Thou should not pass', realm)
455@@ -509,7 +516,7 @@
456 offsets = [(start, end - start + 1) for start, end in ranges]
457 coalesce = transport.Transport._coalesce_offsets
458 coalesced = list(coalesce(offsets, limit=0, fudge_factor=0))
459- range_header = http.HttpTransport._range_header
460+ range_header = HttpTransport._range_header
461 self.assertEqual(value, range_header(coalesced, tail))
462
463 def test_range_header_single(self):
464@@ -1124,13 +1131,13 @@
465 """
466
467 def _proxied_request(self):
468- handler = http.ProxyHandler()
469- request = http.Request('GET', 'http://baz/buzzle')
470+ handler = ProxyHandler()
471+ request = Request('GET', 'http://baz/buzzle')
472 handler.set_proxy(request, 'http')
473 return request
474
475 def assertEvaluateProxyBypass(self, expected, host, no_proxy):
476- handler = http.ProxyHandler()
477+ handler = ProxyHandler()
478 self.assertEqual(expected,
479 handler.evaluate_proxy_bypass(host, no_proxy))
480
481@@ -1322,24 +1329,24 @@
482 self.get_new_transport().get('a').read())
483
484
485-class RedirectedRequest(http.Request):
486+class RedirectedRequest(Request):
487 """Request following redirections. """
488
489- init_orig = http.Request.__init__
490+ init_orig = Request.__init__
491
492 def __init__(self, method, url, *args, **kwargs):
493 """Constructor.
494
495 """
496 # Since the tests using this class will replace
497- # http.Request, we can't just call the base class __init__
498+ # Request, we can't just call the base class __init__
499 # or we'll loop.
500 RedirectedRequest.init_orig(self, method, url, *args, **kwargs)
501 self.follow_redirections = True
502
503
504 def install_redirected_request(test):
505- test.overrideAttr(http, 'Request', RedirectedRequest)
506+ test.overrideAttr(urllib, 'Request', RedirectedRequest)
507
508
509 def cleanup_http_redirection_connections(test):
510@@ -1356,13 +1363,13 @@
511 test.http_connect_orig(connection)
512 test.addCleanup(socket_disconnect, connection.sock)
513 test.http_connect_orig = test.overrideAttr(
514- http.HTTPConnection, 'connect', connect)
515+ HTTPConnection, 'connect', connect)
516
517 def connect(connection):
518 test.https_connect_orig(connection)
519 test.addCleanup(socket_disconnect, connection.sock)
520 test.https_connect_orig = test.overrideAttr(
521- http.HTTPSConnection, 'connect', connect)
522+ HTTPSConnection, 'connect', connect)
523
524
525 class TestHTTPSilentRedirections(http_utils.TestCaseWithRedirectedWebserver):
526@@ -1370,7 +1377,7 @@
527
528 http implementations do not redirect silently anymore (they
529 do not redirect at all in fact). The mechanism is still in
530- place at the http.Request level and these tests
531+ place at the Request level and these tests
532 exercise it.
533 """
534
535@@ -1494,7 +1501,7 @@
536 password = 'foo'
537 _setup_authentication_config(scheme='http', host='localhost',
538 user=user, password=password)
539- handler = http.HTTPAuthHandler()
540+ handler = HTTPAuthHandler()
541 got_pass = handler.get_user_password(dict(
542 user='joe',
543 protocol='http',
544
545=== modified file 'breezy/tests/test_http_response.py'
546--- breezy/tests/test_http_response.py 2020-06-10 23:47:24 +0000
547+++ breezy/tests/test_http_response.py 2021-01-10 01:22:43 +0000
548@@ -49,7 +49,7 @@
549 )
550 from ..transport.http import (
551 response,
552- HTTPConnection,
553+ urllib,
554 )
555 from .file_utils import (
556 FakeReadFile,
557@@ -66,10 +66,10 @@
558 return self.readfile
559
560
561-class FakeHTTPConnection(HTTPConnection):
562+class FakeHTTPConnection(urllib.HTTPConnection):
563
564 def __init__(self, sock):
565- HTTPConnection.__init__(self, 'localhost')
566+ urllib.HTTPConnection.__init__(self, 'localhost')
567 # Set the socket to bypass the connection
568 self.sock = sock
569
570
571=== modified file 'breezy/tests/test_location.py'
572--- breezy/tests/test_location.py 2020-06-28 23:13:22 +0000
573+++ breezy/tests/test_location.py 2021-01-10 01:22:43 +0000
574@@ -83,6 +83,12 @@
575 ':pserver:anonymous@odessa.cvs.sourceforge.net:/cvsroot/odess'))
576 self.assertRaises(ValueError, location_to_url, ':pserver:blah')
577
578+ def test_extssh(self):
579+ self.assertEqual(
580+ 'cvs+ssh://anonymous@odessa.cvs.sourceforge.net/cvsroot/odess',
581+ location_to_url(
582+ ':extssh:anonymous@odessa.cvs.sourceforge.net:/cvsroot/odess'))
583+
584 def test_missing_scheme(self):
585 self.skipTest('need clever guessing of scheme')
586 self.assertEqual(
587
588=== modified file 'breezy/tests/test_patches.py'
589--- breezy/tests/test_patches.py 2019-12-28 14:10:36 +0000
590+++ breezy/tests/test_patches.py 2021-01-10 01:22:43 +0000
591@@ -86,10 +86,21 @@
592
593 def testValidPatchHeader(self):
594 """Parse a valid patch header"""
595+ lines = (
596+ b"--- orig/commands.py\t2020-09-09 23:39:35 +0000\n"
597+ b"+++ mod/dommands.py\t2020-09-09 23:39:35 +0000\n").split(b'\n')
598+ (orig, mod) = get_patch_names(lines.__iter__())
599+ self.assertEqual(
600+ orig, (b"orig/commands.py", b'2020-09-09 23:39:35 +0000'))
601+ self.assertEqual(
602+ mod, (b"mod/dommands.py", b'2020-09-09 23:39:35 +0000'))
603+
604+ def testValidPatchHeaderMissingTimestamps(self):
605+ """Parse a valid patch header"""
606 lines = b"--- orig/commands.py\n+++ mod/dommands.py\n".split(b'\n')
607 (orig, mod) = get_patch_names(lines.__iter__())
608- self.assertEqual(orig, b"orig/commands.py")
609- self.assertEqual(mod, b"mod/dommands.py")
610+ self.assertEqual(orig, (b"orig/commands.py", None))
611+ self.assertEqual(mod, (b"mod/dommands.py", None))
612
613 def testInvalidPatchHeader(self):
614 """Parse an invalid patch header"""
615@@ -181,10 +192,10 @@
616 patches = list(parse_patches(self.data_lines("binary.patch")))
617 self.assertIs(BinaryPatch, patches[0].__class__)
618 self.assertIs(Patch, patches[1].__class__)
619- self.assertContainsRe(patches[0].oldname, b'^bar\t')
620- self.assertContainsRe(patches[0].newname, b'^qux\t')
621+ self.assertEqual(patches[0].oldname, b'bar')
622+ self.assertEqual(patches[0].newname, b'qux')
623 self.assertContainsRe(patches[0].as_bytes(),
624- b'Binary files bar\t.* and qux\t.* differ\n')
625+ b'Binary files bar and qux differ\n')
626
627 def test_parse_binary_after_normal(self):
628 patches = list(parse_patches(
629
630=== modified file 'breezy/tests/test_patches_data/binary.patch'
631--- breezy/tests/test_patches_data/binary.patch 2009-10-14 22:08:45 +0000
632+++ breezy/tests/test_patches_data/binary.patch 2021-01-10 01:22:43 +0000
633@@ -1,4 +1,4 @@
634-Binary files bar 2009-10-14 19:49:59 +0000 and qux 2009-10-14 19:50:35 +0000 differ
635+Binary files bar and qux differ
636 --- baz 2009-10-14 19:49:59 +0000
637 +++ quxx 2009-10-14 19:51:00 +0000
638 @@ -1 +1 @@
639
640=== modified file 'breezy/tests/test_selftest.py'
641--- breezy/tests/test_selftest.py 2020-06-23 01:02:30 +0000
642+++ breezy/tests/test_selftest.py 2021-01-10 01:22:43 +0000
643@@ -687,7 +687,7 @@
644
645 def test_get_readonly_url_http(self):
646 from .http_server import HttpServer
647- from ..transport.http import HttpTransport
648+ from ..transport.http.urllib import HttpTransport
649 self.transport_server = test_server.LocalURLServer
650 self.transport_readonly_server = HttpServer
651 # calling get_readonly_transport() gives us a HTTP server instance.
652
653=== modified file 'breezy/tests/test_source.py'
654--- breezy/tests/test_source.py 2020-06-01 19:35:12 +0000
655+++ breezy/tests/test_source.py 2021-01-10 01:22:43 +0000
656@@ -325,7 +325,14 @@
657 self.fail('\n\n'.join(problems))
658
659 def test_flake8(self):
660- self.requireFeature(features.flake8)
661+ try:
662+ self.requireFeature(features.flake8)
663+ except (SyntaxError, NameError):
664+ # importlib_metadata uses ModuleNotFoundError, which is
665+ # python 3.6 only
666+ if sys.version_info[:2] <= (3, 5):
667+ self.skipTest('python version too old')
668+ raise
669 # Older versions of flake8 don't support the 'paths'
670 # variable
671 new_path = list(sys.path)
672
673=== modified file 'breezy/tests/test_transport.py'
674--- breezy/tests/test_transport.py 2020-02-07 02:14:30 +0000
675+++ breezy/tests/test_transport.py 2021-01-10 01:22:43 +0000
676@@ -38,6 +38,7 @@
677 pathfilter,
678 readonly,
679 )
680+from ..transport.http import urllib
681 import breezy.transport.trace
682 from . import (
683 features,
684@@ -1102,6 +1103,6 @@
685
686 def test_truncation(self):
687 fake_html = "<p>something!\n" * 1000
688- result = http.unhtml_roughly(fake_html)
689+ result = urllib.unhtml_roughly(fake_html)
690 self.assertEqual(len(result), 1000)
691 self.assertStartsWith(result, " something!")
692
693=== modified file 'breezy/tests/test_urlutils.py'
694--- breezy/tests/test_urlutils.py 2020-02-07 02:14:30 +0000
695+++ breezy/tests/test_urlutils.py 2021-01-10 01:22:43 +0000
696@@ -715,8 +715,6 @@
697 self.assertEqual('%', urlutils.unescape('%25'))
698 self.assertEqual(u'\xe5', urlutils.unescape('%C3%A5'))
699
700- self.assertRaises((TypeError, urlutils.InvalidURL),
701- urlutils.unescape, b'\xe5')
702 self.assertEqual('\xe5', urlutils.unescape('%C3%A5'))
703
704 def test_escape_unescape(self):
705
706=== modified file 'breezy/transport/__init__.py'
707--- breezy/transport/__init__.py 2020-06-23 01:02:30 +0000
708+++ breezy/transport/__init__.py 2021-01-10 01:22:43 +0000
709@@ -1655,12 +1655,12 @@
710 register_transport_proto('http+urllib://',
711 # help="Read-only access of branches exported on the web."
712 register_netloc=True)
713-register_lazy_transport('http+urllib://', 'breezy.transport.http',
714+register_lazy_transport('http+urllib://', 'breezy.transport.http.urllib',
715 'HttpTransport')
716 register_transport_proto('https+urllib://',
717 # help="Read-only access of branches exported on the web using SSL."
718 register_netloc=True)
719-register_lazy_transport('https+urllib://', 'breezy.transport.http',
720+register_lazy_transport('https+urllib://', 'breezy.transport.http.urllib',
721 'HttpTransport')
722 # Default http transports (last declared wins (if it can be imported))
723 register_transport_proto('http://',
724@@ -1668,9 +1668,9 @@
725 register_transport_proto('https://',
726 help="Read-only access of branches exported on the web using SSL.")
727 # The default http implementation is urllib
728-register_lazy_transport('http://', 'breezy.transport.http',
729+register_lazy_transport('http://', 'breezy.transport.http.urllib',
730 'HttpTransport')
731-register_lazy_transport('https://', 'breezy.transport.http',
732+register_lazy_transport('https://', 'breezy.transport.http.urllib',
733 'HttpTransport')
734
735 register_transport_proto(
736
737=== modified file 'breezy/transport/http/__init__.py'
738--- breezy/transport/http/__init__.py 2020-08-10 15:00:17 +0000
739+++ breezy/transport/http/__init__.py 2021-01-10 01:22:43 +0000
740@@ -21,50 +21,14 @@
741
742 DEBUG = 0
743
744-import base64
745-import cgi
746-import errno
747 import os
748-import re
749-import socket
750 import ssl
751 import sys
752-import time
753-import urllib
754-import weakref
755-
756-import http.client as http_client
757-import urllib.request as urllib_request
758-from urllib.parse import urljoin, splitport, splittype, splithost, urlencode
759-
760-# TODO: handle_response should be integrated into the http/__init__.py
761-from .response import handle_response
762-
763-# FIXME: Oversimplifying, two kind of exceptions should be
764-# raised, once a request is issued: URLError before we have been
765-# able to process the response, HTTPError after that. Process the
766-# response means we are able to leave the socket clean, so if we
767-# are not able to do that, we should close the connection. The
768-# actual code more or less do that, tests should be written to
769-# ensure that.
770-
771-from ... import __version__ as breezy_version
772+
773+
774 from ... import (
775+ version_string as breezy_version,
776 config,
777- debug,
778- errors,
779- lazy_import,
780- osutils,
781- trace,
782- transport,
783- ui,
784- urlutils,
785-)
786-from ...bzr.smart import medium
787-from ...trace import mutter
788-from ...transport import (
789- ConnectedTransport,
790- UnusableRedirect,
791 )
792
793
794@@ -152,2435 +116,3 @@
795 * none: Certificates ignored
796 * required: Certificates required and validated
797 """)
798-
799-checked_kerberos = False
800-kerberos = None
801-
802-
803-class _ReportingFileSocket(object):
804-
805- def __init__(self, filesock, report_activity=None):
806- self.filesock = filesock
807- self._report_activity = report_activity
808-
809- def report_activity(self, size, direction):
810- if self._report_activity:
811- self._report_activity(size, direction)
812-
813- def read(self, size=1):
814- s = self.filesock.read(size)
815- self.report_activity(len(s), 'read')
816- return s
817-
818- def readline(self, size=-1):
819- s = self.filesock.readline(size)
820- self.report_activity(len(s), 'read')
821- return s
822-
823- def readinto(self, b):
824- s = self.filesock.readinto(b)
825- self.report_activity(s, 'read')
826- return s
827-
828- def __getattr__(self, name):
829- return getattr(self.filesock, name)
830-
831-
832-class _ReportingSocket(object):
833-
834- def __init__(self, sock, report_activity=None):
835- self.sock = sock
836- self._report_activity = report_activity
837-
838- def report_activity(self, size, direction):
839- if self._report_activity:
840- self._report_activity(size, direction)
841-
842- def sendall(self, s, *args):
843- self.sock.sendall(s, *args)
844- self.report_activity(len(s), 'write')
845-
846- def recv(self, *args):
847- s = self.sock.recv(*args)
848- self.report_activity(len(s), 'read')
849- return s
850-
851- def makefile(self, mode='r', bufsize=-1):
852- # http_client creates a fileobject that doesn't do buffering, which
853- # makes fp.readline() very expensive because it only reads one byte
854- # at a time. So we wrap the socket in an object that forces
855- # sock.makefile to make a buffered file.
856- fsock = self.sock.makefile(mode, 65536)
857- # And wrap that into a reporting kind of fileobject
858- return _ReportingFileSocket(fsock, self._report_activity)
859-
860- def __getattr__(self, name):
861- return getattr(self.sock, name)
862-
863-
864-# We define our own Response class to keep our http_client pipe clean
865-class Response(http_client.HTTPResponse):
866- """Custom HTTPResponse, to avoid the need to decorate.
867-
868- http_client prefers to decorate the returned objects, rather
869- than using a custom object.
870- """
871-
872- # Some responses have bodies in which we have no interest
873- _body_ignored_responses = [301, 302, 303, 307, 308, 400, 401, 403, 404, 501]
874-
875- # in finish() below, we may have to discard several MB in the worst
876- # case. To avoid buffering that much, we read and discard by chunks
877- # instead. The underlying file is either a socket or a StringIO, so reading
878- # 8k chunks should be fine.
879- _discarded_buf_size = 8192
880-
881- def __init__(self, sock, debuglevel=0, method=None, url=None):
882- self.url = url
883- super(Response, self).__init__(
884- sock, debuglevel=debuglevel, method=method, url=url)
885-
886- def begin(self):
887- """Begin to read the response from the server.
888-
889- http_client assumes that some responses get no content and do
890- not even attempt to read the body in that case, leaving
891- the body in the socket, blocking the next request. Let's
892- try to workaround that.
893- """
894- http_client.HTTPResponse.begin(self)
895- if self.status in self._body_ignored_responses:
896- if self.debuglevel >= 2:
897- print("For status: [%s], will ready body, length: %s" % (
898- self.status, self.length))
899- if not (self.length is None or self.will_close):
900- # In some cases, we just can't read the body not
901- # even try or we may encounter a 104, 'Connection
902- # reset by peer' error if there is indeed no body
903- # and the server closed the connection just after
904- # having issued the response headers (even if the
905- # headers indicate a Content-Type...)
906- body = self.read(self.length)
907- if self.debuglevel >= 9:
908- # This one can be huge and is generally not interesting
909- print("Consumed body: [%s]" % body)
910- self.close()
911- elif self.status == 200:
912- # Whatever the request is, it went ok, so we surely don't want to
913- # close the connection. Some cases are not correctly detected by
914- # http_client.HTTPConnection.getresponse (called by
915- # http_client.HTTPResponse.begin). The CONNECT response for the https
916- # through proxy case is one. Note: the 'will_close' below refers
917- # to the "true" socket between us and the server, whereas the
918- # 'close()' above refers to the copy of that socket created by
919- # http_client for the response itself. So, in the if above we close the
920- # socket to indicate that we are done with the response whereas
921- # below we keep the socket with the server opened.
922- self.will_close = False
923-
924- def finish(self):
925- """Finish reading the body.
926-
927- In some cases, the client may have left some bytes to read in the
928- body. That will block the next request to succeed if we use a
929- persistent connection. If we don't use a persistent connection, well,
930- nothing will block the next request since a new connection will be
931- issued anyway.
932-
933- :return: the number of bytes left on the socket (may be None)
934- """
935- pending = None
936- if not self.isclosed():
937- # Make sure nothing was left to be read on the socket
938- pending = 0
939- data = True
940- while data and self.length:
941- # read() will update self.length
942- data = self.read(min(self.length, self._discarded_buf_size))
943- pending += len(data)
944- if pending:
945- trace.mutter("%s bytes left on the HTTP socket", pending)
946- self.close()
947- return pending
948-
949-
950-# Not inheriting from 'object' because http_client.HTTPConnection doesn't.
951-class AbstractHTTPConnection:
952- """A custom HTTP(S) Connection, which can reset itself on a bad response"""
953-
954- response_class = Response
955-
956- # When we detect a server responding with the whole file to range requests,
957- # we want to warn. But not below a given thresold.
958- _range_warning_thresold = 1024 * 1024
959-
960- def __init__(self, report_activity=None):
961- self._response = None
962- self._report_activity = report_activity
963- self._ranges_received_whole_file = None
964-
965- def _mutter_connect(self):
966- netloc = '%s:%s' % (self.host, self.port)
967- if self.proxied_host is not None:
968- netloc += '(proxy for %s)' % self.proxied_host
969- trace.mutter('* About to connect() to %s' % netloc)
970-
971- def getresponse(self):
972- """Capture the response to be able to cleanup"""
973- self._response = http_client.HTTPConnection.getresponse(self)
974- return self._response
975-
976- def cleanup_pipe(self):
977- """Read the remaining bytes of the last response if any."""
978- if self._response is not None:
979- try:
980- pending = self._response.finish()
981- # Warn the user (once)
982- if (self._ranges_received_whole_file is None
983- and self._response.status == 200
984- and pending
985- and pending > self._range_warning_thresold):
986- self._ranges_received_whole_file = True
987- trace.warning(
988- 'Got a 200 response when asking for multiple ranges,'
989- ' does your server at %s:%s support range requests?',
990- self.host, self.port)
991- except socket.error as e:
992- # It's conceivable that the socket is in a bad state here
993- # (including some test cases) and in this case, it doesn't need
994- # cleaning anymore, so no need to fail, we just get rid of the
995- # socket and let callers reconnect
996- if (len(e.args) == 0
997- or e.args[0] not in (errno.ECONNRESET, errno.ECONNABORTED)):
998- raise
999- self.close()
1000- self._response = None
1001- # Preserve our preciousss
1002- sock = self.sock
1003- self.sock = None
1004- # Let http_client.HTTPConnection do its housekeeping
1005- self.close()
1006- # Restore our preciousss
1007- self.sock = sock
1008-
1009- def _wrap_socket_for_reporting(self, sock):
1010- """Wrap the socket before anybody use it."""
1011- self.sock = _ReportingSocket(sock, self._report_activity)
1012-
1013-
1014-class HTTPConnection(AbstractHTTPConnection, http_client.HTTPConnection):
1015-
1016- # XXX: Needs refactoring at the caller level.
1017- def __init__(self, host, port=None, proxied_host=None,
1018- report_activity=None, ca_certs=None):
1019- AbstractHTTPConnection.__init__(self, report_activity=report_activity)
1020- http_client.HTTPConnection.__init__(self, host, port)
1021- self.proxied_host = proxied_host
1022- # ca_certs is ignored, it's only relevant for https
1023-
1024- def connect(self):
1025- if 'http' in debug.debug_flags:
1026- self._mutter_connect()
1027- http_client.HTTPConnection.connect(self)
1028- self._wrap_socket_for_reporting(self.sock)
1029-
1030-
1031-class HTTPSConnection(AbstractHTTPConnection, http_client.HTTPSConnection):
1032-
1033- def __init__(self, host, port=None, key_file=None, cert_file=None,
1034- proxied_host=None,
1035- report_activity=None, ca_certs=None):
1036- AbstractHTTPConnection.__init__(self, report_activity=report_activity)
1037- http_client.HTTPSConnection.__init__(
1038- self, host, port, key_file, cert_file)
1039- self.proxied_host = proxied_host
1040- self.ca_certs = ca_certs
1041-
1042- def connect(self):
1043- if 'http' in debug.debug_flags:
1044- self._mutter_connect()
1045- http_client.HTTPConnection.connect(self)
1046- self._wrap_socket_for_reporting(self.sock)
1047- if self.proxied_host is None:
1048- self.connect_to_origin()
1049-
1050- def connect_to_origin(self):
1051- # FIXME JRV 2011-12-18: Use location config here?
1052- config_stack = config.GlobalStack()
1053- cert_reqs = config_stack.get('ssl.cert_reqs')
1054- if self.proxied_host is not None:
1055- host = self.proxied_host.split(":", 1)[0]
1056- else:
1057- host = self.host
1058- if cert_reqs == ssl.CERT_NONE:
1059- ui.ui_factory.show_user_warning('not_checking_ssl_cert', host=host)
1060- ui.ui_factory.suppressed_warnings.add('not_checking_ssl_cert')
1061- ca_certs = None
1062- else:
1063- if self.ca_certs is None:
1064- ca_certs = config_stack.get('ssl.ca_certs')
1065- else:
1066- ca_certs = self.ca_certs
1067- if ca_certs is None:
1068- trace.warning(
1069- "No valid trusted SSL CA certificates file set. See "
1070- "'brz help ssl.ca_certs' for more information on setting "
1071- "trusted CAs.")
1072- try:
1073- ssl_context = ssl.create_default_context(
1074- purpose=ssl.Purpose.SERVER_AUTH, cafile=ca_certs)
1075- ssl_context.check_hostname = cert_reqs != ssl.CERT_NONE
1076- if self.cert_file:
1077- ssl_context.load_cert_chain(
1078- keyfile=self.key_file, certfile=self.cert_file)
1079- ssl_context.verify_mode = cert_reqs
1080- ssl_sock = ssl_context.wrap_socket(
1081- self.sock, server_hostname=self.host)
1082- except ssl.SSLError:
1083- trace.note(
1084- "\n"
1085- "See `brz help ssl.ca_certs` for how to specify trusted CA"
1086- "certificates.\n"
1087- "Pass -Ossl.cert_reqs=none to disable certificate "
1088- "verification entirely.\n")
1089- raise
1090- # Wrap the ssl socket before anybody use it
1091- self._wrap_socket_for_reporting(ssl_sock)
1092-
1093-
1094-class Request(urllib_request.Request):
1095- """A custom Request object.
1096-
1097- urllib_request determines the request method heuristically (based on
1098- the presence or absence of data). We set the method
1099- statically.
1100-
1101- The Request object tracks:
1102- - the connection the request will be made on.
1103- - the authentication parameters needed to preventively set
1104- the authentication header once a first authentication have
1105- been made.
1106- """
1107-
1108- def __init__(self, method, url, data=None, headers={},
1109- origin_req_host=None, unverifiable=False,
1110- connection=None, parent=None):
1111- urllib_request.Request.__init__(
1112- self, url, data, headers,
1113- origin_req_host, unverifiable)
1114- self.method = method
1115- self.connection = connection
1116- # To handle redirections
1117- self.parent = parent
1118- self.redirected_to = None
1119- # Unless told otherwise, redirections are not followed
1120- self.follow_redirections = False
1121- # auth and proxy_auth are dicts containing, at least
1122- # (scheme, host, port, realm, user, password, protocol, path).
1123- # The dict entries are mostly handled by the AuthHandler.
1124- # Some authentication schemes may add more entries.
1125- self.auth = {}
1126- self.proxy_auth = {}
1127- self.proxied_host = None
1128-
1129- def get_method(self):
1130- return self.method
1131-
1132- def set_proxy(self, proxy, type):
1133- """Set the proxy and remember the proxied host."""
1134- host, port = splitport(self.host)
1135- if port is None:
1136- # We need to set the default port ourselves way before it gets set
1137- # in the HTTP[S]Connection object at build time.
1138- if self.type == 'https':
1139- conn_class = HTTPSConnection
1140- else:
1141- conn_class = HTTPConnection
1142- port = conn_class.default_port
1143- self.proxied_host = '%s:%s' % (host, port)
1144- urllib_request.Request.set_proxy(self, proxy, type)
1145- # When urllib_request makes a https request with our wrapper code and a proxy,
1146- # it sets Host to the https proxy, not the host we want to talk to.
1147- # I'm fairly sure this is our fault, but what is the cause is an open
1148- # question. -- Robert Collins May 8 2010.
1149- self.add_unredirected_header('Host', self.proxied_host)
1150-
1151-
1152-class _ConnectRequest(Request):
1153-
1154- def __init__(self, request):
1155- """Constructor
1156-
1157- :param request: the first request sent to the proxied host, already
1158- processed by the opener (i.e. proxied_host is already set).
1159- """
1160- # We give a fake url and redefine selector or urllib_request will be
1161- # confused
1162- Request.__init__(self, 'CONNECT', request.get_full_url(),
1163- connection=request.connection)
1164- if request.proxied_host is None:
1165- raise AssertionError()
1166- self.proxied_host = request.proxied_host
1167-
1168- @property
1169- def selector(self):
1170- return self.proxied_host
1171-
1172- def get_selector(self):
1173- return self.selector
1174-
1175- def set_proxy(self, proxy, type):
1176- """Set the proxy without remembering the proxied host.
1177-
1178- We already know the proxied host by definition, the CONNECT request
1179- occurs only when the connection goes through a proxy. The usual
1180- processing (masquerade the request so that the connection is done to
1181- the proxy while the request is targeted at another host) does not apply
1182- here. In fact, the connection is already established with proxy and we
1183- just want to enable the SSL tunneling.
1184- """
1185- urllib_request.Request.set_proxy(self, proxy, type)
1186-
1187-
1188-class ConnectionHandler(urllib_request.BaseHandler):
1189- """Provides connection-sharing by pre-processing requests.
1190-
1191- urllib_request provides no way to access the HTTPConnection object
1192- internally used. But we need it in order to achieve
1193- connection sharing. So, we add it to the request just before
1194- it is processed, and then we override the do_open method for
1195- http[s] requests in AbstractHTTPHandler.
1196- """
1197-
1198- handler_order = 1000 # after all pre-processings
1199-
1200- def __init__(self, report_activity=None, ca_certs=None):
1201- self._report_activity = report_activity
1202- self.ca_certs = ca_certs
1203-
1204- def create_connection(self, request, http_connection_class):
1205- host = request.host
1206- if not host:
1207- # Just a bit of paranoia here, this should have been
1208- # handled in the higher levels
1209- raise urlutils.InvalidURL(request.get_full_url(), 'no host given.')
1210-
1211- # We create a connection (but it will not connect until the first
1212- # request is made)
1213- try:
1214- connection = http_connection_class(
1215- host, proxied_host=request.proxied_host,
1216- report_activity=self._report_activity,
1217- ca_certs=self.ca_certs)
1218- except http_client.InvalidURL as exception:
1219- # There is only one occurrence of InvalidURL in http_client
1220- raise urlutils.InvalidURL(request.get_full_url(),
1221- extra='nonnumeric port')
1222-
1223- return connection
1224-
1225- def capture_connection(self, request, http_connection_class):
1226- """Capture or inject the request connection.
1227-
1228- Two cases:
1229- - the request have no connection: create a new one,
1230-
1231- - the request have a connection: this one have been used
1232- already, let's capture it, so that we can give it to
1233- another transport to be reused. We don't do that
1234- ourselves: the Transport object get the connection from
1235- a first request and then propagate it, from request to
1236- request or to cloned transports.
1237- """
1238- connection = request.connection
1239- if connection is None:
1240- # Create a new one
1241- connection = self.create_connection(request, http_connection_class)
1242- request.connection = connection
1243-
1244- # All connections will pass here, propagate debug level
1245- connection.set_debuglevel(DEBUG)
1246- return request
1247-
1248- def http_request(self, request):
1249- return self.capture_connection(request, HTTPConnection)
1250-
1251- def https_request(self, request):
1252- return self.capture_connection(request, HTTPSConnection)
1253-
1254-
1255-class AbstractHTTPHandler(urllib_request.AbstractHTTPHandler):
1256- """A custom handler for HTTP(S) requests.
1257-
1258- We overrive urllib_request.AbstractHTTPHandler to get a better
1259- control of the connection, the ability to implement new
1260- request types and return a response able to cope with
1261- persistent connections.
1262- """
1263-
1264- # We change our order to be before urllib_request HTTP[S]Handlers
1265- # and be chosen instead of them (the first http_open called
1266- # wins).
1267- handler_order = 400
1268-
1269- _default_headers = {'Pragma': 'no-cache',
1270- 'Cache-control': 'max-age=0',
1271- 'Connection': 'Keep-Alive',
1272- 'User-agent': default_user_agent(),
1273- 'Accept': '*/*',
1274- }
1275-
1276- def __init__(self):
1277- urllib_request.AbstractHTTPHandler.__init__(self, debuglevel=DEBUG)
1278-
1279- def http_request(self, request):
1280- """Common headers setting"""
1281-
1282- for name, value in self._default_headers.items():
1283- if name not in request.headers:
1284- request.headers[name] = value
1285- # FIXME: We may have to add the Content-Length header if
1286- # we have data to send.
1287- return request
1288-
1289- def retry_or_raise(self, http_class, request, first_try):
1290- """Retry the request (once) or raise the exception.
1291-
1292- urllib_request raises exception of application level kind, we
1293- just have to translate them.
1294-
1295- http_client can raise exceptions of transport level (badly
1296- formatted dialog, loss of connexion or socket level
1297- problems). In that case we should issue the request again
1298- (http_client will close and reopen a new connection if
1299- needed).
1300- """
1301- # When an exception occurs, we give back the original
1302- # Traceback or the bugs are hard to diagnose.
1303- exc_type, exc_val, exc_tb = sys.exc_info()
1304- if exc_type == socket.gaierror:
1305- # No need to retry, that will not help
1306- origin_req_host = request.origin_req_host
1307- raise errors.ConnectionError("Couldn't resolve host '%s'"
1308- % origin_req_host,
1309- orig_error=exc_val)
1310- elif isinstance(exc_val, http_client.ImproperConnectionState):
1311- # The http_client pipeline is in incorrect state, it's a bug in our
1312- # implementation.
1313- raise exc_val
1314- else:
1315- if first_try:
1316- if self._debuglevel >= 2:
1317- print('Received exception: [%r]' % exc_val)
1318- print(' On connection: [%r]' % request.connection)
1319- method = request.get_method()
1320- url = request.get_full_url()
1321- print(' Will retry, %s %r' % (method, url))
1322- request.connection.close()
1323- response = self.do_open(http_class, request, False)
1324- else:
1325- if self._debuglevel >= 2:
1326- print('Received second exception: [%r]' % exc_val)
1327- print(' On connection: [%r]' % request.connection)
1328- if exc_type in (http_client.BadStatusLine, http_client.UnknownProtocol):
1329- # http_client.BadStatusLine and
1330- # http_client.UnknownProtocol indicates that a
1331- # bogus server was encountered or a bad
1332- # connection (i.e. transient errors) is
1333- # experimented, we have already retried once
1334- # for that request so we raise the exception.
1335- my_exception = errors.InvalidHttpResponse(
1336- request.get_full_url(),
1337- 'Bad status line received',
1338- orig_error=exc_val)
1339- elif (isinstance(exc_val, socket.error) and len(exc_val.args)
1340- and exc_val.args[0] in (errno.ECONNRESET, 10053, 10054)):
1341- # 10053 == WSAECONNABORTED
1342- # 10054 == WSAECONNRESET
1343- raise errors.ConnectionReset(
1344- "Connection lost while sending request.")
1345- else:
1346- # All other exception are considered connection related.
1347-
1348- # socket errors generally occurs for reasons
1349- # far outside our scope, so closing the
1350- # connection and retrying is the best we can
1351- # do.
1352- selector = request.selector
1353- my_exception = errors.ConnectionError(
1354- msg='while sending %s %s:' % (request.get_method(),
1355- selector),
1356- orig_error=exc_val)
1357-
1358- if self._debuglevel >= 2:
1359- print('On connection: [%r]' % request.connection)
1360- method = request.get_method()
1361- url = request.get_full_url()
1362- print(' Failed again, %s %r' % (method, url))
1363- print(' Will raise: [%r]' % my_exception)
1364- raise my_exception.with_traceback(exc_tb)
1365- return response
1366-
1367- def do_open(self, http_class, request, first_try=True):
1368- """See urllib_request.AbstractHTTPHandler.do_open for the general idea.
1369-
1370- The request will be retried once if it fails.
1371- """
1372- connection = request.connection
1373- if connection is None:
1374- raise AssertionError(
1375- 'Cannot process a request without a connection')
1376-
1377- # Get all the headers
1378- headers = {}
1379- headers.update(request.header_items())
1380- headers.update(request.unredirected_hdrs)
1381- # Some servers or proxies will choke on headers not properly
1382- # cased. http_client/urllib/urllib_request all use capitalize to get canonical
1383- # header names, but only python2.5 urllib_request use title() to fix them just
1384- # before sending the request. And not all versions of python 2.5 do
1385- # that. Since we replace urllib_request.AbstractHTTPHandler.do_open we do it
1386- # ourself below.
1387- headers = {name.title(): val for name, val in headers.items()}
1388-
1389- try:
1390- method = request.get_method()
1391- url = request.selector
1392- if sys.version_info[:2] >= (3, 6):
1393- connection._send_request(method, url,
1394- # FIXME: implements 100-continue
1395- # None, # We don't send the body yet
1396- request.data,
1397- headers, encode_chunked=False)
1398- else:
1399- connection._send_request(method, url,
1400- # FIXME: implements 100-continue
1401- # None, # We don't send the body yet
1402- request.data,
1403- headers)
1404- if 'http' in debug.debug_flags:
1405- trace.mutter('> %s %s' % (method, url))
1406- hdrs = []
1407- for k, v in headers.items():
1408- # People are often told to paste -Dhttp output to help
1409- # debug. Don't compromise credentials.
1410- if k in ('Authorization', 'Proxy-Authorization'):
1411- v = '<masked>'
1412- hdrs.append('%s: %s' % (k, v))
1413- trace.mutter('> ' + '\n> '.join(hdrs) + '\n')
1414- if self._debuglevel >= 1:
1415- print('Request sent: [%r] from (%s)'
1416- % (request, request.connection.sock.getsockname()))
1417- response = connection.getresponse()
1418- convert_to_addinfourl = True
1419- except (ssl.SSLError, ssl.CertificateError):
1420- # Something is wrong with either the certificate or the hostname,
1421- # re-trying won't help
1422- raise
1423- except (socket.gaierror, http_client.BadStatusLine, http_client.UnknownProtocol,
1424- socket.error, http_client.HTTPException):
1425- response = self.retry_or_raise(http_class, request, first_try)
1426- convert_to_addinfourl = False
1427-
1428- response.msg = response.reason
1429- return response
1430-
1431-
1432-class HTTPHandler(AbstractHTTPHandler):
1433- """A custom handler that just thunks into HTTPConnection"""
1434-
1435- def http_open(self, request):
1436- return self.do_open(HTTPConnection, request)
1437-
1438-
1439-class HTTPSHandler(AbstractHTTPHandler):
1440- """A custom handler that just thunks into HTTPSConnection"""
1441-
1442- https_request = AbstractHTTPHandler.http_request
1443-
1444- def https_open(self, request):
1445- connection = request.connection
1446- if connection.sock is None and \
1447- connection.proxied_host is not None and \
1448- request.get_method() != 'CONNECT': # Don't loop
1449- # FIXME: We need a gazillion connection tests here, but we still
1450- # miss a https server :-( :
1451- # - with and without proxy
1452- # - with and without certificate
1453- # - with self-signed certificate
1454- # - with and without authentication
1455- # - with good and bad credentials (especially the proxy auth around
1456- # CONNECT)
1457- # - with basic and digest schemes
1458- # - reconnection on errors
1459- # - connection persistence behaviour (including reconnection)
1460-
1461- # We are about to connect for the first time via a proxy, we must
1462- # issue a CONNECT request first to establish the encrypted link
1463- connect = _ConnectRequest(request)
1464- response = self.parent.open(connect)
1465- if response.code != 200:
1466- raise errors.ConnectionError("Can't connect to %s via proxy %s" % (
1467- connect.proxied_host, self.host))
1468- # Housekeeping
1469- connection.cleanup_pipe()
1470- # Establish the connection encryption
1471- connection.connect_to_origin()
1472- # Propagate the connection to the original request
1473- request.connection = connection
1474- return self.do_open(HTTPSConnection, request)
1475-
1476-
1477-class HTTPRedirectHandler(urllib_request.HTTPRedirectHandler):
1478- """Handles redirect requests.
1479-
1480- We have to implement our own scheme because we use a specific
1481- Request object and because we want to implement a specific
1482- policy.
1483- """
1484- _debuglevel = DEBUG
1485- # RFC2616 says that only read requests should be redirected
1486- # without interacting with the user. But Breezy uses some
1487- # shortcuts to optimize against roundtrips which can leads to
1488- # write requests being issued before read requests of
1489- # containing dirs can be redirected. So we redirect write
1490- # requests in the same way which seems to respect the spirit
1491- # of the RFC if not its letter.
1492-
1493- def redirect_request(self, req, fp, code, msg, headers, newurl):
1494- """See urllib_request.HTTPRedirectHandler.redirect_request"""
1495- # We would have preferred to update the request instead
1496- # of creating a new one, but the urllib_request.Request object
1497- # has a too complicated creation process to provide a
1498- # simple enough equivalent update process. Instead, when
1499- # redirecting, we only update the following request in
1500- # the redirect chain with a reference to the parent
1501- # request .
1502-
1503- # Some codes make no sense in our context and are treated
1504- # as errors:
1505-
1506- # 300: Multiple choices for different representations of
1507- # the URI. Using that mechanisn with Breezy will violate the
1508- # protocol neutrality of Transport.
1509-
1510- # 304: Not modified (SHOULD only occurs with conditional
1511- # GETs which are not used by our implementation)
1512-
1513- # 305: Use proxy. I can't imagine this one occurring in
1514- # our context-- vila/20060909
1515-
1516- # 306: Unused (if the RFC says so...)
1517-
1518- # If the code is 302 and the request is HEAD, some may
1519- # think that it is a sufficent hint that the file exists
1520- # and that we MAY avoid following the redirections. But
1521- # if we want to be sure, we MUST follow them.
1522-
1523- origin_req_host = req.origin_req_host
1524-
1525- if code in (301, 302, 303, 307, 308):
1526- return Request(req.get_method(), newurl,
1527- headers=req.headers,
1528- origin_req_host=origin_req_host,
1529- unverifiable=True,
1530- # TODO: It will be nice to be able to
1531- # detect virtual hosts sharing the same
1532- # IP address, that will allow us to
1533- # share the same connection...
1534- connection=None,
1535- parent=req,
1536- )
1537- else:
1538- raise urllib_request.HTTPError(
1539- req.get_full_url(), code, msg, headers, fp)
1540-
1541- def http_error_302(self, req, fp, code, msg, headers):
1542- """Requests the redirected to URI.
1543-
1544- Copied from urllib_request to be able to clean the pipe of the associated
1545- connection, *before* issuing the redirected request but *after* having
1546- eventually raised an error.
1547- """
1548- # Some servers (incorrectly) return multiple Location headers
1549- # (so probably same goes for URI). Use first header.
1550-
1551- # TODO: Once we get rid of addinfourl objects, the
1552- # following will need to be updated to use correct case
1553- # for headers.
1554- if 'location' in headers:
1555- newurl = headers.get('location')
1556- elif 'uri' in headers:
1557- newurl = headers.get('uri')
1558- else:
1559- return
1560-
1561- newurl = urljoin(req.get_full_url(), newurl)
1562-
1563- if self._debuglevel >= 1:
1564- print('Redirected to: %s (followed: %r)' % (newurl,
1565- req.follow_redirections))
1566- if req.follow_redirections is False:
1567- req.redirected_to = newurl
1568- return fp
1569-
1570- # This call succeeds or raise an error. urllib_request returns
1571- # if redirect_request returns None, but our
1572- # redirect_request never returns None.
1573- redirected_req = self.redirect_request(req, fp, code, msg, headers,
1574- newurl)
1575-
1576- # loop detection
1577- # .redirect_dict has a key url if url was previously visited.
1578- if hasattr(req, 'redirect_dict'):
1579- visited = redirected_req.redirect_dict = req.redirect_dict
1580- if (visited.get(newurl, 0) >= self.max_repeats or
1581- len(visited) >= self.max_redirections):
1582- raise urllib_request.HTTPError(req.get_full_url(), code,
1583- self.inf_msg + msg, headers, fp)
1584- else:
1585- visited = redirected_req.redirect_dict = req.redirect_dict = {}
1586- visited[newurl] = visited.get(newurl, 0) + 1
1587-
1588- # We can close the fp now that we are sure that we won't
1589- # use it with HTTPError.
1590- fp.close()
1591- # We have all we need already in the response
1592- req.connection.cleanup_pipe()
1593-
1594- return self.parent.open(redirected_req)
1595-
1596- http_error_301 = http_error_303 = http_error_307 = http_error_308 = http_error_302
1597-
1598-
1599-class ProxyHandler(urllib_request.ProxyHandler):
1600- """Handles proxy setting.
1601-
1602- Copied and modified from urllib_request to be able to modify the request during
1603- the request pre-processing instead of modifying it at _open time. As we
1604- capture (or create) the connection object during request processing, _open
1605- time was too late.
1606-
1607- The main task is to modify the request so that the connection is done to
1608- the proxy while the request still refers to the destination host.
1609-
1610- Note: the proxy handling *may* modify the protocol used; the request may be
1611- against an https server proxied through an http proxy. So, https_request
1612- will be called, but later it's really http_open that will be called. This
1613- explains why we don't have to call self.parent.open as the urllib_request did.
1614- """
1615-
1616- # Proxies must be in front
1617- handler_order = 100
1618- _debuglevel = DEBUG
1619-
1620- def __init__(self, proxies=None):
1621- urllib_request.ProxyHandler.__init__(self, proxies)
1622- # First, let's get rid of urllib_request implementation
1623- for type, proxy in self.proxies.items():
1624- if self._debuglevel >= 3:
1625- print('Will unbind %s_open for %r' % (type, proxy))
1626- delattr(self, '%s_open' % type)
1627-
1628- def bind_scheme_request(proxy, scheme):
1629- if proxy is None:
1630- return
1631- scheme_request = scheme + '_request'
1632- if self._debuglevel >= 3:
1633- print('Will bind %s for %r' % (scheme_request, proxy))
1634- setattr(self, scheme_request,
1635- lambda request: self.set_proxy(request, scheme))
1636- # We are interested only by the http[s] proxies
1637- http_proxy = self.get_proxy_env_var('http')
1638- bind_scheme_request(http_proxy, 'http')
1639- https_proxy = self.get_proxy_env_var('https')
1640- bind_scheme_request(https_proxy, 'https')
1641-
1642- def get_proxy_env_var(self, name, default_to='all'):
1643- """Get a proxy env var.
1644-
1645- Note that we indirectly rely on
1646- urllib.getproxies_environment taking into account the
1647- uppercased values for proxy variables.
1648- """
1649- try:
1650- return self.proxies[name.lower()]
1651- except KeyError:
1652- if default_to is not None:
1653- # Try to get the alternate environment variable
1654- try:
1655- return self.proxies[default_to]
1656- except KeyError:
1657- pass
1658- return None
1659-
1660- def proxy_bypass(self, host):
1661- """Check if host should be proxied or not.
1662-
1663- :returns: True to skip the proxy, False otherwise.
1664- """
1665- no_proxy = self.get_proxy_env_var('no', default_to=None)
1666- bypass = self.evaluate_proxy_bypass(host, no_proxy)
1667- if bypass is None:
1668- # Nevertheless, there are platform-specific ways to
1669- # ignore proxies...
1670- return urllib_request.proxy_bypass(host)
1671- else:
1672- return bypass
1673-
1674- def evaluate_proxy_bypass(self, host, no_proxy):
1675- """Check the host against a comma-separated no_proxy list as a string.
1676-
1677- :param host: ``host:port`` being requested
1678-
1679- :param no_proxy: comma-separated list of hosts to access directly.
1680-
1681- :returns: True to skip the proxy, False not to, or None to
1682- leave it to urllib.
1683- """
1684- if no_proxy is None:
1685- # All hosts are proxied
1686- return False
1687- hhost, hport = splitport(host)
1688- # Does host match any of the domains mentioned in
1689- # no_proxy ? The rules about what is authorized in no_proxy
1690- # are fuzzy (to say the least). We try to allow most
1691- # commonly seen values.
1692- for domain in no_proxy.split(','):
1693- domain = domain.strip()
1694- if domain == '':
1695- continue
1696- dhost, dport = splitport(domain)
1697- if hport == dport or dport is None:
1698- # Protect glob chars
1699- dhost = dhost.replace(".", r"\.")
1700- dhost = dhost.replace("*", r".*")
1701- dhost = dhost.replace("?", r".")
1702- if re.match(dhost, hhost, re.IGNORECASE):
1703- return True
1704- # Nothing explicitly avoid the host
1705- return None
1706-
1707- def set_proxy(self, request, type):
1708- host = request.host
1709- if self.proxy_bypass(host):
1710- return request
1711-
1712- proxy = self.get_proxy_env_var(type)
1713- if self._debuglevel >= 3:
1714- print('set_proxy %s_request for %r' % (type, proxy))
1715- # FIXME: python 2.5 urlparse provides a better _parse_proxy which can
1716- # grok user:password@host:port as well as
1717- # http://user:password@host:port
1718-
1719- parsed_url = transport.ConnectedTransport._split_url(proxy)
1720- if not parsed_url.host:
1721- raise urlutils.InvalidURL(proxy, 'No host component')
1722-
1723- if request.proxy_auth == {}:
1724- # No proxy auth parameter are available, we are handling the first
1725- # proxied request, intialize. scheme (the authentication scheme)
1726- # and realm will be set by the AuthHandler
1727- request.proxy_auth = {
1728- 'host': parsed_url.host,
1729- 'port': parsed_url.port,
1730- 'user': parsed_url.user,
1731- 'password': parsed_url.password,
1732- 'protocol': parsed_url.scheme,
1733- # We ignore path since we connect to a proxy
1734- 'path': None}
1735- if parsed_url.port is None:
1736- phost = parsed_url.host
1737- else:
1738- phost = parsed_url.host + ':%d' % parsed_url.port
1739- request.set_proxy(phost, type)
1740- if self._debuglevel >= 3:
1741- print('set_proxy: proxy set to %s://%s' % (type, phost))
1742- return request
1743-
1744-
1745-class AbstractAuthHandler(urllib_request.BaseHandler):
1746- """A custom abstract authentication handler for all http authentications.
1747-
1748- Provides the meat to handle authentication errors and
1749- preventively set authentication headers after the first
1750- successful authentication.
1751-
1752- This can be used for http and proxy, as well as for basic, negotiate and
1753- digest authentications.
1754-
1755- This provides an unified interface for all authentication handlers
1756- (urllib_request provides far too many with different policies).
1757-
1758- The interaction between this handler and the urllib_request
1759- framework is not obvious, it works as follow:
1760-
1761- opener.open(request) is called:
1762-
1763- - that may trigger http_request which will add an authentication header
1764- (self.build_header) if enough info is available.
1765-
1766- - the request is sent to the server,
1767-
1768- - if an authentication error is received self.auth_required is called,
1769- we acquire the authentication info in the error headers and call
1770- self.auth_match to check that we are able to try the
1771- authentication and complete the authentication parameters,
1772-
1773- - we call parent.open(request), that may trigger http_request
1774- and will add a header (self.build_header), but here we have
1775- all the required info (keep in mind that the request and
1776- authentication used in the recursive calls are really (and must be)
1777- the *same* objects).
1778-
1779- - if the call returns a response, the authentication have been
1780- successful and the request authentication parameters have been updated.
1781- """
1782-
1783- scheme = None
1784- """The scheme as it appears in the server header (lower cased)"""
1785-
1786- _max_retry = 3
1787- """We don't want to retry authenticating endlessly"""
1788-
1789- requires_username = True
1790- """Whether the auth mechanism requires a username."""
1791-
1792- # The following attributes should be defined by daughter
1793- # classes:
1794- # - auth_required_header: the header received from the server
1795- # - auth_header: the header sent in the request
1796-
1797- def __init__(self):
1798- # We want to know when we enter into an try/fail cycle of
1799- # authentications so we initialize to None to indicate that we aren't
1800- # in such a cycle by default.
1801- self._retry_count = None
1802-
1803- def _parse_auth_header(self, server_header):
1804- """Parse the authentication header.
1805-
1806- :param server_header: The value of the header sent by the server
1807- describing the authenticaion request.
1808-
1809- :return: A tuple (scheme, remainder) scheme being the first word in the
1810- given header (lower cased), remainder may be None.
1811- """
1812- try:
1813- scheme, remainder = server_header.split(None, 1)
1814- except ValueError:
1815- scheme = server_header
1816- remainder = None
1817- return (scheme.lower(), remainder)
1818-
1819- def update_auth(self, auth, key, value):
1820- """Update a value in auth marking the auth as modified if needed"""
1821- old_value = auth.get(key, None)
1822- if old_value != value:
1823- auth[key] = value
1824- auth['modified'] = True
1825-
1826- def auth_required(self, request, headers):
1827- """Retry the request if the auth scheme is ours.
1828-
1829- :param request: The request needing authentication.
1830- :param headers: The headers for the authentication error response.
1831- :return: None or the response for the authenticated request.
1832- """
1833- # Don't try to authenticate endlessly
1834- if self._retry_count is None:
1835- # The retry being recusrsive calls, None identify the first retry
1836- self._retry_count = 1
1837- else:
1838- self._retry_count += 1
1839- if self._retry_count > self._max_retry:
1840- # Let's be ready for next round
1841- self._retry_count = None
1842- return None
1843- server_headers = headers.get_all(self.auth_required_header)
1844- if not server_headers:
1845- # The http error MUST have the associated
1846- # header. This must never happen in production code.
1847- trace.mutter('%s not found', self.auth_required_header)
1848- return None
1849-
1850- auth = self.get_auth(request)
1851- auth['modified'] = False
1852- # Put some common info in auth if the caller didn't
1853- if auth.get('path', None) is None:
1854- parsed_url = urlutils.URL.from_string(request.get_full_url())
1855- self.update_auth(auth, 'protocol', parsed_url.scheme)
1856- self.update_auth(auth, 'host', parsed_url.host)
1857- self.update_auth(auth, 'port', parsed_url.port)
1858- self.update_auth(auth, 'path', parsed_url.path)
1859- # FIXME: the auth handler should be selected at a single place instead
1860- # of letting all handlers try to match all headers, but the current
1861- # design doesn't allow a simple implementation.
1862- for server_header in server_headers:
1863- # Several schemes can be proposed by the server, try to match each
1864- # one in turn
1865- matching_handler = self.auth_match(server_header, auth)
1866- if matching_handler:
1867- # auth_match may have modified auth (by adding the
1868- # password or changing the realm, for example)
1869- if (request.get_header(self.auth_header, None) is not None
1870- and not auth['modified']):
1871- # We already tried that, give up
1872- return None
1873-
1874- # Only the most secure scheme proposed by the server should be
1875- # used, since the handlers use 'handler_order' to describe that
1876- # property, the first handler tried takes precedence, the
1877- # others should not attempt to authenticate if the best one
1878- # failed.
1879- best_scheme = auth.get('best_scheme', None)
1880- if best_scheme is None:
1881- # At that point, if current handler should doesn't succeed
1882- # the credentials are wrong (or incomplete), but we know
1883- # that the associated scheme should be used.
1884- best_scheme = auth['best_scheme'] = self.scheme
1885- if best_scheme != self.scheme:
1886- continue
1887-
1888- if self.requires_username and auth.get('user', None) is None:
1889- # Without a known user, we can't authenticate
1890- return None
1891-
1892- # Housekeeping
1893- request.connection.cleanup_pipe()
1894- # Retry the request with an authentication header added
1895- response = self.parent.open(request)
1896- if response:
1897- self.auth_successful(request, response)
1898- return response
1899- # We are not qualified to handle the authentication.
1900- # Note: the authentication error handling will try all
1901- # available handlers. If one of them authenticates
1902- # successfully, a response will be returned. If none of
1903- # them succeeds, None will be returned and the error
1904- # handler will raise the 401 'Unauthorized' or the 407
1905- # 'Proxy Authentication Required' error.
1906- return None
1907-
1908- def add_auth_header(self, request, header):
1909- """Add the authentication header to the request"""
1910- request.add_unredirected_header(self.auth_header, header)
1911-
1912- def auth_match(self, header, auth):
1913- """Check that we are able to handle that authentication scheme.
1914-
1915- The request authentication parameters may need to be
1916- updated with info from the server. Some of these
1917- parameters, when combined, are considered to be the
1918- authentication key, if one of them change the
1919- authentication result may change. 'user' and 'password'
1920- are exampls, but some auth schemes may have others
1921- (digest's nonce is an example, digest's nonce_count is a
1922- *counter-example*). Such parameters must be updated by
1923- using the update_auth() method.
1924-
1925- :param header: The authentication header sent by the server.
1926- :param auth: The auth parameters already known. They may be
1927- updated.
1928- :returns: True if we can try to handle the authentication.
1929- """
1930- raise NotImplementedError(self.auth_match)
1931-
1932- def build_auth_header(self, auth, request):
1933- """Build the value of the header used to authenticate.
1934-
1935- :param auth: The auth parameters needed to build the header.
1936- :param request: The request needing authentication.
1937-
1938- :return: None or header.
1939- """
1940- raise NotImplementedError(self.build_auth_header)
1941-
1942- def auth_successful(self, request, response):
1943- """The authentification was successful for the request.
1944-
1945- Additional infos may be available in the response.
1946-
1947- :param request: The succesfully authenticated request.
1948- :param response: The server response (may contain auth info).
1949- """
1950- # It may happen that we need to reconnect later, let's be ready
1951- self._retry_count = None
1952-
1953- def get_user_password(self, auth):
1954- """Ask user for a password if none is already available.
1955-
1956- :param auth: authentication info gathered so far (from the initial url
1957- and then during dialog with the server).
1958- """
1959- auth_conf = config.AuthenticationConfig()
1960- user = auth.get('user', None)
1961- password = auth.get('password', None)
1962- realm = auth['realm']
1963- port = auth.get('port', None)
1964-
1965- if user is None:
1966- user = auth_conf.get_user(auth['protocol'], auth['host'],
1967- port=port, path=auth['path'],
1968- realm=realm, ask=True,
1969- prompt=self.build_username_prompt(auth))
1970- if user is not None and password is None:
1971- password = auth_conf.get_password(
1972- auth['protocol'], auth['host'], user,
1973- port=port,
1974- path=auth['path'], realm=realm,
1975- prompt=self.build_password_prompt(auth))
1976-
1977- return user, password
1978-
1979- def _build_password_prompt(self, auth):
1980- """Build a prompt taking the protocol used into account.
1981-
1982- The AuthHandler is used by http and https, we want that information in
1983- the prompt, so we build the prompt from the authentication dict which
1984- contains all the needed parts.
1985-
1986- Also, http and proxy AuthHandlers present different prompts to the
1987- user. The daughter classes should implements a public
1988- build_password_prompt using this method.
1989- """
1990- prompt = u'%s' % auth['protocol'].upper() + u' %(user)s@%(host)s'
1991- realm = auth['realm']
1992- if realm is not None:
1993- prompt += u", Realm: '%s'" % realm
1994- prompt += u' password'
1995- return prompt
1996-
1997- def _build_username_prompt(self, auth):
1998- """Build a prompt taking the protocol used into account.
1999-
2000- The AuthHandler is used by http and https, we want that information in
2001- the prompt, so we build the prompt from the authentication dict which
2002- contains all the needed parts.
2003-
2004- Also, http and proxy AuthHandlers present different prompts to the
2005- user. The daughter classes should implements a public
2006- build_username_prompt using this method.
2007- """
2008- prompt = u'%s' % auth['protocol'].upper() + u' %(host)s'
2009- realm = auth['realm']
2010- if realm is not None:
2011- prompt += u", Realm: '%s'" % realm
2012- prompt += u' username'
2013- return prompt
2014-
2015- def http_request(self, request):
2016- """Insert an authentication header if information is available"""
2017- auth = self.get_auth(request)
2018- if self.auth_params_reusable(auth):
2019- self.add_auth_header(
2020- request, self.build_auth_header(auth, request))
2021- return request
2022-
2023- https_request = http_request # FIXME: Need test
2024-
2025-
2026-class NegotiateAuthHandler(AbstractAuthHandler):
2027- """A authentication handler that handles WWW-Authenticate: Negotiate.
2028-
2029- At the moment this handler supports just Kerberos. In the future,
2030- NTLM support may also be added.
2031- """
2032-
2033- scheme = 'negotiate'
2034- handler_order = 480
2035- requires_username = False
2036-
2037- def auth_match(self, header, auth):
2038- scheme, raw_auth = self._parse_auth_header(header)
2039- if scheme != self.scheme:
2040- return False
2041- self.update_auth(auth, 'scheme', scheme)
2042- resp = self._auth_match_kerberos(auth)
2043- if resp is None:
2044- return False
2045- # Optionally should try to authenticate using NTLM here
2046- self.update_auth(auth, 'negotiate_response', resp)
2047- return True
2048-
2049- def _auth_match_kerberos(self, auth):
2050- """Try to create a GSSAPI response for authenticating against a host."""
2051- global kerberos, checked_kerberos
2052- if kerberos is None and not checked_kerberos:
2053- try:
2054- import kerberos
2055- except ImportError:
2056- kerberos = None
2057- checked_kerberos = True
2058- if kerberos is None:
2059- return None
2060- ret, vc = kerberos.authGSSClientInit("HTTP@%(host)s" % auth)
2061- if ret < 1:
2062- trace.warning('Unable to create GSSAPI context for %s: %d',
2063- auth['host'], ret)
2064- return None
2065- ret = kerberos.authGSSClientStep(vc, "")
2066- if ret < 0:
2067- trace.mutter('authGSSClientStep failed: %d', ret)
2068- return None
2069- return kerberos.authGSSClientResponse(vc)
2070-
2071- def build_auth_header(self, auth, request):
2072- return "Negotiate %s" % auth['negotiate_response']
2073-
2074- def auth_params_reusable(self, auth):
2075- # If the auth scheme is known, it means a previous
2076- # authentication was successful, all information is
2077- # available, no further checks are needed.
2078- return (auth.get('scheme', None) == 'negotiate' and
2079- auth.get('negotiate_response', None) is not None)
2080-
2081-
2082-class BasicAuthHandler(AbstractAuthHandler):
2083- """A custom basic authentication handler."""
2084-
2085- scheme = 'basic'
2086- handler_order = 500
2087- auth_regexp = re.compile('realm="([^"]*)"', re.I)
2088-
2089- def build_auth_header(self, auth, request):
2090- raw = '%s:%s' % (auth['user'], auth['password'])
2091- auth_header = 'Basic ' + \
2092- base64.b64encode(raw.encode('utf-8')).decode('ascii')
2093- return auth_header
2094-
2095- def extract_realm(self, header_value):
2096- match = self.auth_regexp.search(header_value)
2097- realm = None
2098- if match:
2099- realm = match.group(1)
2100- return match, realm
2101-
2102- def auth_match(self, header, auth):
2103- scheme, raw_auth = self._parse_auth_header(header)
2104- if scheme != self.scheme:
2105- return False
2106-
2107- match, realm = self.extract_realm(raw_auth)
2108- if match:
2109- # Put useful info into auth
2110- self.update_auth(auth, 'scheme', scheme)
2111- self.update_auth(auth, 'realm', realm)
2112- if (auth.get('user', None) is None
2113- or auth.get('password', None) is None):
2114- user, password = self.get_user_password(auth)
2115- self.update_auth(auth, 'user', user)
2116- self.update_auth(auth, 'password', password)
2117- return match is not None
2118-
2119- def auth_params_reusable(self, auth):
2120- # If the auth scheme is known, it means a previous
2121- # authentication was successful, all information is
2122- # available, no further checks are needed.
2123- return auth.get('scheme', None) == 'basic'
2124-
2125-
2126-def get_digest_algorithm_impls(algorithm):
2127- H = None
2128- KD = None
2129- if algorithm == 'MD5':
2130- def H(x): return osutils.md5(x).hexdigest()
2131- elif algorithm == 'SHA':
2132- H = osutils.sha_string
2133- if H is not None:
2134- def KD(secret, data): return H(
2135- ("%s:%s" % (secret, data)).encode('utf-8'))
2136- return H, KD
2137-
2138-
2139-def get_new_cnonce(nonce, nonce_count):
2140- raw = '%s:%d:%s:%s' % (nonce, nonce_count, time.ctime(),
2141- osutils.rand_chars(8))
2142- return osutils.sha_string(raw.encode('utf-8'))[:16]
2143-
2144-
2145-class DigestAuthHandler(AbstractAuthHandler):
2146- """A custom digest authentication handler."""
2147-
2148- scheme = 'digest'
2149- # Before basic as digest is a bit more secure and should be preferred
2150- handler_order = 490
2151-
2152- def auth_params_reusable(self, auth):
2153- # If the auth scheme is known, it means a previous
2154- # authentication was successful, all information is
2155- # available, no further checks are needed.
2156- return auth.get('scheme', None) == 'digest'
2157-
2158- def auth_match(self, header, auth):
2159- scheme, raw_auth = self._parse_auth_header(header)
2160- if scheme != self.scheme:
2161- return False
2162-
2163- # Put the requested authentication info into a dict
2164- req_auth = urllib_request.parse_keqv_list(
2165- urllib_request.parse_http_list(raw_auth))
2166-
2167- # Check that we can handle that authentication
2168- qop = req_auth.get('qop', None)
2169- if qop != 'auth': # No auth-int so far
2170- return False
2171-
2172- H, KD = get_digest_algorithm_impls(req_auth.get('algorithm', 'MD5'))
2173- if H is None:
2174- return False
2175-
2176- realm = req_auth.get('realm', None)
2177- # Put useful info into auth
2178- self.update_auth(auth, 'scheme', scheme)
2179- self.update_auth(auth, 'realm', realm)
2180- if auth.get('user', None) is None or auth.get('password', None) is None:
2181- user, password = self.get_user_password(auth)
2182- self.update_auth(auth, 'user', user)
2183- self.update_auth(auth, 'password', password)
2184-
2185- try:
2186- if req_auth.get('algorithm', None) is not None:
2187- self.update_auth(auth, 'algorithm', req_auth.get('algorithm'))
2188- nonce = req_auth['nonce']
2189- if auth.get('nonce', None) != nonce:
2190- # A new nonce, never used
2191- self.update_auth(auth, 'nonce_count', 0)
2192- self.update_auth(auth, 'nonce', nonce)
2193- self.update_auth(auth, 'qop', qop)
2194- auth['opaque'] = req_auth.get('opaque', None)
2195- except KeyError:
2196- # Some required field is not there
2197- return False
2198-
2199- return True
2200-
2201- def build_auth_header(self, auth, request):
2202- selector = request.selector
2203- url_scheme, url_selector = splittype(selector)
2204- sel_host, uri = splithost(url_selector)
2205-
2206- A1 = ('%s:%s:%s' %
2207- (auth['user'], auth['realm'], auth['password'])).encode('utf-8')
2208- A2 = ('%s:%s' % (request.get_method(), uri)).encode('utf-8')
2209-
2210- nonce = auth['nonce']
2211- qop = auth['qop']
2212-
2213- nonce_count = auth['nonce_count'] + 1
2214- ncvalue = '%08x' % nonce_count
2215- cnonce = get_new_cnonce(nonce, nonce_count)
2216-
2217- H, KD = get_digest_algorithm_impls(auth.get('algorithm', 'MD5'))
2218- nonce_data = '%s:%s:%s:%s:%s' % (nonce, ncvalue, cnonce, qop, H(A2))
2219- request_digest = KD(H(A1), nonce_data)
2220-
2221- header = 'Digest '
2222- header += 'username="%s", realm="%s", nonce="%s"' % (auth['user'],
2223- auth['realm'],
2224- nonce)
2225- header += ', uri="%s"' % uri
2226- header += ', cnonce="%s", nc=%s' % (cnonce, ncvalue)
2227- header += ', qop="%s"' % qop
2228- header += ', response="%s"' % request_digest
2229- # Append the optional fields
2230- opaque = auth.get('opaque', None)
2231- if opaque:
2232- header += ', opaque="%s"' % opaque
2233- if auth.get('algorithm', None):
2234- header += ', algorithm="%s"' % auth.get('algorithm')
2235-
2236- # We have used the nonce once more, update the count
2237- auth['nonce_count'] = nonce_count
2238-
2239- return header
2240-
2241-
2242-class HTTPAuthHandler(AbstractAuthHandler):
2243- """Custom http authentication handler.
2244-
2245- Send the authentication preventively to avoid the roundtrip
2246- associated with the 401 error and keep the revelant info in
2247- the auth request attribute.
2248- """
2249-
2250- auth_required_header = 'www-authenticate'
2251- auth_header = 'Authorization'
2252-
2253- def get_auth(self, request):
2254- """Get the auth params from the request"""
2255- return request.auth
2256-
2257- def set_auth(self, request, auth):
2258- """Set the auth params for the request"""
2259- request.auth = auth
2260-
2261- def build_password_prompt(self, auth):
2262- return self._build_password_prompt(auth)
2263-
2264- def build_username_prompt(self, auth):
2265- return self._build_username_prompt(auth)
2266-
2267- def http_error_401(self, req, fp, code, msg, headers):
2268- return self.auth_required(req, headers)
2269-
2270-
2271-class ProxyAuthHandler(AbstractAuthHandler):
2272- """Custom proxy authentication handler.
2273-
2274- Send the authentication preventively to avoid the roundtrip
2275- associated with the 407 error and keep the revelant info in
2276- the proxy_auth request attribute..
2277- """
2278-
2279- auth_required_header = 'proxy-authenticate'
2280- # FIXME: the correct capitalization is Proxy-Authorization,
2281- # but python-2.4 urllib_request.Request insist on using capitalize()
2282- # instead of title().
2283- auth_header = 'Proxy-authorization'
2284-
2285- def get_auth(self, request):
2286- """Get the auth params from the request"""
2287- return request.proxy_auth
2288-
2289- def set_auth(self, request, auth):
2290- """Set the auth params for the request"""
2291- request.proxy_auth = auth
2292-
2293- def build_password_prompt(self, auth):
2294- prompt = self._build_password_prompt(auth)
2295- prompt = u'Proxy ' + prompt
2296- return prompt
2297-
2298- def build_username_prompt(self, auth):
2299- prompt = self._build_username_prompt(auth)
2300- prompt = u'Proxy ' + prompt
2301- return prompt
2302-
2303- def http_error_407(self, req, fp, code, msg, headers):
2304- return self.auth_required(req, headers)
2305-
2306-
2307-class HTTPBasicAuthHandler(BasicAuthHandler, HTTPAuthHandler):
2308- """Custom http basic authentication handler"""
2309-
2310-
2311-class ProxyBasicAuthHandler(BasicAuthHandler, ProxyAuthHandler):
2312- """Custom proxy basic authentication handler"""
2313-
2314-
2315-class HTTPDigestAuthHandler(DigestAuthHandler, HTTPAuthHandler):
2316- """Custom http basic authentication handler"""
2317-
2318-
2319-class ProxyDigestAuthHandler(DigestAuthHandler, ProxyAuthHandler):
2320- """Custom proxy basic authentication handler"""
2321-
2322-
2323-class HTTPNegotiateAuthHandler(NegotiateAuthHandler, HTTPAuthHandler):
2324- """Custom http negotiate authentication handler"""
2325-
2326-
2327-class ProxyNegotiateAuthHandler(NegotiateAuthHandler, ProxyAuthHandler):
2328- """Custom proxy negotiate authentication handler"""
2329-
2330-
2331-class HTTPErrorProcessor(urllib_request.HTTPErrorProcessor):
2332- """Process HTTP error responses.
2333-
2334- We don't really process the errors, quite the contrary
2335- instead, we leave our Transport handle them.
2336- """
2337-
2338- accepted_errors = [200, # Ok
2339- 201,
2340- 202,
2341- 204,
2342- 206, # Partial content
2343- 400,
2344- 403,
2345- 404, # Not found
2346- 405, # Method not allowed
2347- 406, # Not Acceptable
2348- 409, # Conflict
2349- 416, # Range not satisfiable
2350- 422, # Unprocessible entity
2351- 501, # Not implemented
2352- ]
2353- """The error codes the caller will handle.
2354-
2355- This can be specialized in the request on a case-by case basis, but the
2356- common cases are covered here.
2357- """
2358-
2359- def http_response(self, request, response):
2360- code, msg, hdrs = response.code, response.msg, response.info()
2361-
2362- if code not in self.accepted_errors:
2363- response = self.parent.error('http', request, response,
2364- code, msg, hdrs)
2365- return response
2366-
2367- https_response = http_response
2368-
2369-
2370-class HTTPDefaultErrorHandler(urllib_request.HTTPDefaultErrorHandler):
2371- """Translate common errors into Breezy Exceptions"""
2372-
2373- def http_error_default(self, req, fp, code, msg, hdrs):
2374- if code == 403:
2375- raise errors.TransportError(
2376- 'Server refuses to fulfill the request (403 Forbidden)'
2377- ' for %s' % req.get_full_url())
2378- else:
2379- raise errors.UnexpectedHttpStatus(
2380- req.get_full_url(), code,
2381- 'Unable to handle http code: %s' % msg)
2382-
2383-
2384-class Opener(object):
2385- """A wrapper around urllib_request.build_opener
2386-
2387- Daughter classes can override to build their own specific opener
2388- """
2389- # TODO: Provides hooks for daughter classes.
2390-
2391- def __init__(self,
2392- connection=ConnectionHandler,
2393- redirect=HTTPRedirectHandler,
2394- error=HTTPErrorProcessor,
2395- report_activity=None,
2396- ca_certs=None):
2397- self._opener = urllib_request.build_opener(
2398- connection(report_activity=report_activity, ca_certs=ca_certs),
2399- redirect, error,
2400- ProxyHandler(),
2401- HTTPBasicAuthHandler(),
2402- HTTPDigestAuthHandler(),
2403- HTTPNegotiateAuthHandler(),
2404- ProxyBasicAuthHandler(),
2405- ProxyDigestAuthHandler(),
2406- ProxyNegotiateAuthHandler(),
2407- HTTPHandler,
2408- HTTPSHandler,
2409- HTTPDefaultErrorHandler,
2410- )
2411-
2412- self.open = self._opener.open
2413- if DEBUG >= 9:
2414- # When dealing with handler order, it's easy to mess
2415- # things up, the following will help understand which
2416- # handler is used, when and for what.
2417- import pprint
2418- pprint.pprint(self._opener.__dict__)
2419-
2420-
2421-class HttpTransport(ConnectedTransport):
2422- """HTTP Client implementations.
2423-
2424- The protocol can be given as e.g. http+urllib://host/ to use a particular
2425- implementation.
2426- """
2427-
2428- # _unqualified_scheme: "http" or "https"
2429- # _scheme: may have "+pycurl", etc
2430-
2431- # In order to debug we have to issue our traces in sync with
2432- # httplib, which use print :(
2433- _debuglevel = 0
2434-
2435- def __init__(self, base, _from_transport=None, ca_certs=None):
2436- """Set the base path where files will be stored."""
2437- proto_match = re.match(r'^(https?)(\+\w+)?://', base)
2438- if not proto_match:
2439- raise AssertionError("not a http url: %r" % base)
2440- self._unqualified_scheme = proto_match.group(1)
2441- super(HttpTransport, self).__init__(
2442- base, _from_transport=_from_transport)
2443- self._medium = None
2444- # range hint is handled dynamically throughout the life
2445- # of the transport object. We start by trying multi-range
2446- # requests and if the server returns bogus results, we
2447- # retry with single range requests and, finally, we
2448- # forget about range if the server really can't
2449- # understand. Once acquired, this piece of info is
2450- # propagated to clones.
2451- if _from_transport is not None:
2452- self._range_hint = _from_transport._range_hint
2453- self._opener = _from_transport._opener
2454- else:
2455- self._range_hint = 'multi'
2456- self._opener = Opener(
2457- report_activity=self._report_activity, ca_certs=ca_certs)
2458-
2459- def request(self, method, url, fields=None, headers=None, **urlopen_kw):
2460- body = urlopen_kw.pop('body', None)
2461- if fields is not None:
2462- data = urlencode(fields).encode()
2463- if body is not None:
2464- raise ValueError(
2465- 'body and fields are mutually exclusive')
2466- else:
2467- data = body
2468- if headers is None:
2469- headers = {}
2470- request = Request(method, url, data, headers)
2471- request.follow_redirections = (urlopen_kw.pop('retries', 0) > 0)
2472- if urlopen_kw:
2473- raise NotImplementedError(
2474- 'unknown arguments: %r' % urlopen_kw.keys())
2475- connection = self._get_connection()
2476- if connection is not None:
2477- # Give back shared info
2478- request.connection = connection
2479- (auth, proxy_auth) = self._get_credentials()
2480- # Clean the httplib.HTTPConnection pipeline in case the previous
2481- # request couldn't do it
2482- connection.cleanup_pipe()
2483- else:
2484- # First request, initialize credentials.
2485- # scheme and realm will be set by the _urllib2_wrappers.AuthHandler
2486- auth = self._create_auth()
2487- # Proxy initialization will be done by the first proxied request
2488- proxy_auth = dict()
2489- # Ensure authentication info is provided
2490- request.auth = auth
2491- request.proxy_auth = proxy_auth
2492-
2493- if self._debuglevel > 0:
2494- print('perform: %s base: %s, url: %s' % (request.method, self.base,
2495- request.get_full_url()))
2496- response = self._opener.open(request)
2497- if self._get_connection() is not request.connection:
2498- # First connection or reconnection
2499- self._set_connection(request.connection,
2500- (request.auth, request.proxy_auth))
2501- else:
2502- # http may change the credentials while keeping the
2503- # connection opened
2504- self._update_credentials((request.auth, request.proxy_auth))
2505-
2506- code = response.code
2507- if (request.follow_redirections is False
2508- and code in (301, 302, 303, 307, 308)):
2509- raise errors.RedirectRequested(request.get_full_url(),
2510- request.redirected_to,
2511- is_permanent=(code in (301, 308)))
2512-
2513- if request.redirected_to is not None:
2514- trace.mutter('redirected from: %s to: %s' % (request.get_full_url(),
2515- request.redirected_to))
2516-
2517- class Urllib3LikeResponse(object):
2518-
2519- def __init__(self, actual):
2520- self._actual = actual
2521- self._data = None
2522-
2523- def getheader(self, name, default=None):
2524- if self._actual.headers is None:
2525- raise http_client.ResponseNotReady()
2526- return self._actual.headers.get(name, default)
2527-
2528- def getheaders(self):
2529- if self._actual.headers is None:
2530- raise http_client.ResponseNotReady()
2531- return list(self._actual.headers.items())
2532-
2533- @property
2534- def status(self):
2535- return self._actual.code
2536-
2537- @property
2538- def reason(self):
2539- return self._actual.reason
2540-
2541- @property
2542- def data(self):
2543- if self._data is None:
2544- self._data = self._actual.read()
2545- return self._data
2546-
2547- @property
2548- def text(self):
2549- if self.status == 204:
2550- return None
2551- charset = cgi.parse_header(
2552- self._actual.headers['Content-Type'])[1].get('charset')
2553- if charset:
2554- return self.data.decode(charset)
2555- else:
2556- return self.data.decode()
2557-
2558- def read(self, amt=None):
2559- return self._actual.read(amt)
2560-
2561- def readlines(self):
2562- return self._actual.readlines()
2563-
2564- def readline(self, size=-1):
2565- return self._actual.readline(size)
2566-
2567- return Urllib3LikeResponse(response)
2568-
2569- def disconnect(self):
2570- connection = self._get_connection()
2571- if connection is not None:
2572- connection.close()
2573-
2574- def has(self, relpath):
2575- """Does the target location exist?
2576- """
2577- response = self._head(relpath)
2578-
2579- code = response.status
2580- if code == 200: # "ok",
2581- return True
2582- else:
2583- return False
2584-
2585- def get(self, relpath):
2586- """Get the file at the given relative path.
2587-
2588- :param relpath: The relative path to the file
2589- """
2590- code, response_file = self._get(relpath, None)
2591- return response_file
2592-
2593- def _get(self, relpath, offsets, tail_amount=0):
2594- """Get a file, or part of a file.
2595-
2596- :param relpath: Path relative to transport base URL
2597- :param offsets: None to get the whole file;
2598- or a list of _CoalescedOffset to fetch parts of a file.
2599- :param tail_amount: The amount to get from the end of the file.
2600-
2601- :returns: (http_code, result_file)
2602- """
2603- abspath = self._remote_path(relpath)
2604- headers = {}
2605- if offsets or tail_amount:
2606- range_header = self._attempted_range_header(offsets, tail_amount)
2607- if range_header is not None:
2608- bytes = 'bytes=' + range_header
2609- headers = {'Range': bytes}
2610- else:
2611- range_header = None
2612-
2613- response = self.request('GET', abspath, headers=headers)
2614-
2615- if response.status == 404: # not found
2616- raise errors.NoSuchFile(abspath)
2617- elif response.status == 416:
2618- # We don't know which, but one of the ranges we specified was
2619- # wrong.
2620- raise errors.InvalidHttpRange(abspath, range_header,
2621- 'Server return code %d' % response.status)
2622- elif response.status == 400:
2623- if range_header:
2624- # We don't know which, but one of the ranges we specified was
2625- # wrong.
2626- raise errors.InvalidHttpRange(
2627- abspath, range_header,
2628- 'Server return code %d' % response.status)
2629- else:
2630- raise errors.BadHttpRequest(abspath, response.reason)
2631- elif response.status not in (200, 206):
2632- raise errors.UnexpectedHttpStatus(abspath, response.status)
2633-
2634- data = handle_response(
2635- abspath, response.status, response.getheader, response)
2636- return response.status, data
2637-
2638- def _remote_path(self, relpath):
2639- """See ConnectedTransport._remote_path.
2640-
2641- user and passwords are not embedded in the path provided to the server.
2642- """
2643- url = self._parsed_url.clone(relpath)
2644- url.user = url.quoted_user = None
2645- url.password = url.quoted_password = None
2646- url.scheme = self._unqualified_scheme
2647- return str(url)
2648-
2649- def _create_auth(self):
2650- """Returns a dict containing the credentials provided at build time."""
2651- auth = dict(host=self._parsed_url.host, port=self._parsed_url.port,
2652- user=self._parsed_url.user, password=self._parsed_url.password,
2653- protocol=self._unqualified_scheme,
2654- path=self._parsed_url.path)
2655- return auth
2656-
2657- def get_smart_medium(self):
2658- """See Transport.get_smart_medium."""
2659- if self._medium is None:
2660- # Since medium holds some state (smart server probing at least), we
2661- # need to keep it around. Note that this is needed because medium
2662- # has the same 'base' attribute as the transport so it can't be
2663- # shared between transports having different bases.
2664- self._medium = SmartClientHTTPMedium(self)
2665- return self._medium
2666-
2667- def _degrade_range_hint(self, relpath, ranges):
2668- if self._range_hint == 'multi':
2669- self._range_hint = 'single'
2670- mutter('Retry "%s" with single range request' % relpath)
2671- elif self._range_hint == 'single':
2672- self._range_hint = None
2673- mutter('Retry "%s" without ranges' % relpath)
2674- else:
2675- # We tried all the tricks, but nothing worked, caller must reraise.
2676- return False
2677- return True
2678-
2679- # _coalesce_offsets is a helper for readv, it try to combine ranges without
2680- # degrading readv performances. _bytes_to_read_before_seek is the value
2681- # used for the limit parameter and has been tuned for other transports. For
2682- # HTTP, the name is inappropriate but the parameter is still useful and
2683- # helps reduce the number of chunks in the response. The overhead for a
2684- # chunk (headers, length, footer around the data itself is variable but
2685- # around 50 bytes. We use 128 to reduce the range specifiers that appear in
2686- # the header, some servers (notably Apache) enforce a maximum length for a
2687- # header and issue a '400: Bad request' error when too much ranges are
2688- # specified.
2689- _bytes_to_read_before_seek = 128
2690- # No limit on the offset number that get combined into one, we are trying
2691- # to avoid downloading the whole file.
2692- _max_readv_combine = 0
2693- # By default Apache has a limit of ~400 ranges before replying with a 400
2694- # Bad Request. So we go underneath that amount to be safe.
2695- _max_get_ranges = 200
2696- # We impose no limit on the range size. But see _pycurl.py for a different
2697- # use.
2698- _get_max_size = 0
2699-
2700- def _readv(self, relpath, offsets):
2701- """Get parts of the file at the given relative path.
2702-
2703- :param offsets: A list of (offset, size) tuples.
2704- :param return: A list or generator of (offset, data) tuples
2705- """
2706- # offsets may be a generator, we will iterate it several times, so
2707- # build a list
2708- offsets = list(offsets)
2709-
2710- try_again = True
2711- retried_offset = None
2712- while try_again:
2713- try_again = False
2714-
2715- # Coalesce the offsets to minimize the GET requests issued
2716- sorted_offsets = sorted(offsets)
2717- coalesced = self._coalesce_offsets(
2718- sorted_offsets, limit=self._max_readv_combine,
2719- fudge_factor=self._bytes_to_read_before_seek,
2720- max_size=self._get_max_size)
2721-
2722- # Turn it into a list, we will iterate it several times
2723- coalesced = list(coalesced)
2724- if 'http' in debug.debug_flags:
2725- mutter('http readv of %s offsets => %s collapsed %s',
2726- relpath, len(offsets), len(coalesced))
2727-
2728- # Cache the data read, but only until it's been used
2729- data_map = {}
2730- # We will iterate on the data received from the GET requests and
2731- # serve the corresponding offsets respecting the initial order. We
2732- # need an offset iterator for that.
2733- iter_offsets = iter(offsets)
2734- try:
2735- cur_offset_and_size = next(iter_offsets)
2736- except StopIteration:
2737- return
2738-
2739- try:
2740- for cur_coal, rfile in self._coalesce_readv(relpath, coalesced):
2741- # Split the received chunk
2742- for offset, size in cur_coal.ranges:
2743- start = cur_coal.start + offset
2744- rfile.seek(start, os.SEEK_SET)
2745- data = rfile.read(size)
2746- data_len = len(data)
2747- if data_len != size:
2748- raise errors.ShortReadvError(relpath, start, size,
2749- actual=data_len)
2750- if (start, size) == cur_offset_and_size:
2751- # The offset requested are sorted as the coalesced
2752- # ones, no need to cache. Win !
2753- yield cur_offset_and_size[0], data
2754- try:
2755- cur_offset_and_size = next(iter_offsets)
2756- except StopIteration:
2757- return
2758- else:
2759- # Different sorting. We need to cache.
2760- data_map[(start, size)] = data
2761-
2762- # Yield everything we can
2763- while cur_offset_and_size in data_map:
2764- # Clean the cached data since we use it
2765- # XXX: will break if offsets contains duplicates --
2766- # vila20071129
2767- this_data = data_map.pop(cur_offset_and_size)
2768- yield cur_offset_and_size[0], this_data
2769- try:
2770- cur_offset_and_size = next(iter_offsets)
2771- except StopIteration:
2772- return
2773-
2774- except (errors.ShortReadvError, errors.InvalidRange,
2775- errors.InvalidHttpRange, errors.HttpBoundaryMissing) as e:
2776- mutter('Exception %r: %s during http._readv', e, e)
2777- if (not isinstance(e, errors.ShortReadvError)
2778- or retried_offset == cur_offset_and_size):
2779- # We don't degrade the range hint for ShortReadvError since
2780- # they do not indicate a problem with the server ability to
2781- # handle ranges. Except when we fail to get back a required
2782- # offset twice in a row. In that case, falling back to
2783- # single range or whole file should help.
2784- if not self._degrade_range_hint(relpath, coalesced):
2785- raise
2786- # Some offsets may have been already processed, so we retry
2787- # only the unsuccessful ones.
2788- offsets = [cur_offset_and_size] + [o for o in iter_offsets]
2789- retried_offset = cur_offset_and_size
2790- try_again = True
2791-
2792- def _coalesce_readv(self, relpath, coalesced):
2793- """Issue several GET requests to satisfy the coalesced offsets"""
2794-
2795- def get_and_yield(relpath, coalesced):
2796- if coalesced:
2797- # Note that the _get below may raise
2798- # errors.InvalidHttpRange. It's the caller's responsibility to
2799- # decide how to retry since it may provide different coalesced
2800- # offsets.
2801- code, rfile = self._get(relpath, coalesced)
2802- for coal in coalesced:
2803- yield coal, rfile
2804-
2805- if self._range_hint is None:
2806- # Download whole file
2807- for c, rfile in get_and_yield(relpath, coalesced):
2808- yield c, rfile
2809- else:
2810- total = len(coalesced)
2811- if self._range_hint == 'multi':
2812- max_ranges = self._max_get_ranges
2813- elif self._range_hint == 'single':
2814- max_ranges = total
2815- else:
2816- raise AssertionError("Unknown _range_hint %r"
2817- % (self._range_hint,))
2818- # TODO: Some web servers may ignore the range requests and return
2819- # the whole file, we may want to detect that and avoid further
2820- # requests.
2821- # Hint: test_readv_multiple_get_requests will fail once we do that
2822- cumul = 0
2823- ranges = []
2824- for coal in coalesced:
2825- if ((self._get_max_size > 0
2826- and cumul + coal.length > self._get_max_size) or
2827- len(ranges) >= max_ranges):
2828- # Get that much and yield
2829- for c, rfile in get_and_yield(relpath, ranges):
2830- yield c, rfile
2831- # Restart with the current offset
2832- ranges = [coal]
2833- cumul = coal.length
2834- else:
2835- ranges.append(coal)
2836- cumul += coal.length
2837- # Get the rest and yield
2838- for c, rfile in get_and_yield(relpath, ranges):
2839- yield c, rfile
2840-
2841- def recommended_page_size(self):
2842- """See Transport.recommended_page_size().
2843-
2844- For HTTP we suggest a large page size to reduce the overhead
2845- introduced by latency.
2846- """
2847- return 64 * 1024
2848-
2849- def _post(self, body_bytes):
2850- """POST body_bytes to .bzr/smart on this transport.
2851-
2852- :returns: (response code, response body file-like object).
2853- """
2854- # TODO: Requiring all the body_bytes to be available at the beginning of
2855- # the POST may require large client buffers. It would be nice to have
2856- # an interface that allows streaming via POST when possible (and
2857- # degrades to a local buffer when not).
2858- abspath = self._remote_path('.bzr/smart')
2859- response = self.request(
2860- 'POST', abspath, body=body_bytes,
2861- headers={'Content-Type': 'application/octet-stream'})
2862- if response.status not in (200, 403):
2863- raise errors.UnexpectedHttpStatus(abspath, response.status)
2864- code = response.status
2865- data = handle_response(
2866- abspath, code, response.getheader, response)
2867- return code, data
2868-
2869- def _head(self, relpath):
2870- """Request the HEAD of a file.
2871-
2872- Performs the request and leaves callers handle the results.
2873- """
2874- abspath = self._remote_path(relpath)
2875- response = self.request('HEAD', abspath)
2876- if response.status not in (200, 404):
2877- raise errors.UnexpectedHttpStatus(abspath, response.status)
2878-
2879- return response
2880-
2881- raise NotImplementedError(self._post)
2882-
2883- def put_file(self, relpath, f, mode=None):
2884- """Copy the file-like object into the location.
2885-
2886- :param relpath: Location to put the contents, relative to base.
2887- :param f: File-like object.
2888- """
2889- raise errors.TransportNotPossible('http PUT not supported')
2890-
2891- def mkdir(self, relpath, mode=None):
2892- """Create a directory at the given path."""
2893- raise errors.TransportNotPossible('http does not support mkdir()')
2894-
2895- def rmdir(self, relpath):
2896- """See Transport.rmdir."""
2897- raise errors.TransportNotPossible('http does not support rmdir()')
2898-
2899- def append_file(self, relpath, f, mode=None):
2900- """Append the text in the file-like object into the final
2901- location.
2902- """
2903- raise errors.TransportNotPossible('http does not support append()')
2904-
2905- def copy(self, rel_from, rel_to):
2906- """Copy the item at rel_from to the location at rel_to"""
2907- raise errors.TransportNotPossible('http does not support copy()')
2908-
2909- def copy_to(self, relpaths, other, mode=None, pb=None):
2910- """Copy a set of entries from self into another Transport.
2911-
2912- :param relpaths: A list/generator of entries to be copied.
2913-
2914- TODO: if other is LocalTransport, is it possible to
2915- do better than put(get())?
2916- """
2917- # At this point HttpTransport might be able to check and see if
2918- # the remote location is the same, and rather than download, and
2919- # then upload, it could just issue a remote copy_this command.
2920- if isinstance(other, HttpTransport):
2921- raise errors.TransportNotPossible(
2922- 'http cannot be the target of copy_to()')
2923- else:
2924- return super(HttpTransport, self).\
2925- copy_to(relpaths, other, mode=mode, pb=pb)
2926-
2927- def move(self, rel_from, rel_to):
2928- """Move the item at rel_from to the location at rel_to"""
2929- raise errors.TransportNotPossible('http does not support move()')
2930-
2931- def delete(self, relpath):
2932- """Delete the item at relpath"""
2933- raise errors.TransportNotPossible('http does not support delete()')
2934-
2935- def external_url(self):
2936- """See breezy.transport.Transport.external_url."""
2937- # HTTP URL's are externally usable as long as they don't mention their
2938- # implementation qualifier
2939- url = self._parsed_url.clone()
2940- url.scheme = self._unqualified_scheme
2941- return str(url)
2942-
2943- def is_readonly(self):
2944- """See Transport.is_readonly."""
2945- return True
2946-
2947- def listable(self):
2948- """See Transport.listable."""
2949- return False
2950-
2951- def stat(self, relpath):
2952- """Return the stat information for a file.
2953- """
2954- raise errors.TransportNotPossible('http does not support stat()')
2955-
2956- def lock_read(self, relpath):
2957- """Lock the given file for shared (read) access.
2958- :return: A lock object, which should be passed to Transport.unlock()
2959- """
2960- # The old RemoteBranch ignore lock for reading, so we will
2961- # continue that tradition and return a bogus lock object.
2962- class BogusLock(object):
2963- def __init__(self, path):
2964- self.path = path
2965-
2966- def unlock(self):
2967- pass
2968- return BogusLock(relpath)
2969-
2970- def lock_write(self, relpath):
2971- """Lock the given file for exclusive (write) access.
2972- WARNING: many transports do not support this, so trying avoid using it
2973-
2974- :return: A lock object, which should be passed to Transport.unlock()
2975- """
2976- raise errors.TransportNotPossible('http does not support lock_write()')
2977-
2978- def _attempted_range_header(self, offsets, tail_amount):
2979- """Prepare a HTTP Range header at a level the server should accept.
2980-
2981- :return: the range header representing offsets/tail_amount or None if
2982- no header can be built.
2983- """
2984-
2985- if self._range_hint == 'multi':
2986- # Generate the header describing all offsets
2987- return self._range_header(offsets, tail_amount)
2988- elif self._range_hint == 'single':
2989- # Combine all the requested ranges into a single
2990- # encompassing one
2991- if len(offsets) > 0:
2992- if tail_amount not in (0, None):
2993- # Nothing we can do here to combine ranges with tail_amount
2994- # in a single range, just returns None. The whole file
2995- # should be downloaded.
2996- return None
2997- else:
2998- start = offsets[0].start
2999- last = offsets[-1]
3000- end = last.start + last.length - 1
3001- whole = self._coalesce_offsets([(start, end - start + 1)],
3002- limit=0, fudge_factor=0)
3003- return self._range_header(list(whole), 0)
3004- else:
3005- # Only tail_amount, requested, leave range_header
3006- # do its work
3007- return self._range_header(offsets, tail_amount)
3008- else:
3009- return None
3010-
3011- @staticmethod
3012- def _range_header(ranges, tail_amount):
3013- """Turn a list of bytes ranges into a HTTP Range header value.
3014-
3015- :param ranges: A list of _CoalescedOffset
3016- :param tail_amount: The amount to get from the end of the file.
3017-
3018- :return: HTTP range header string.
3019-
3020- At least a non-empty ranges *or* a tail_amount must be
3021- provided.
3022- """
3023- strings = []
3024- for offset in ranges:
3025- strings.append('%d-%d' % (offset.start,
3026- offset.start + offset.length - 1))
3027-
3028- if tail_amount:
3029- strings.append('-%d' % tail_amount)
3030-
3031- return ','.join(strings)
3032-
3033- def _redirected_to(self, source, target):
3034- """Returns a transport suitable to re-issue a redirected request.
3035-
3036- :param source: The source url as returned by the server.
3037- :param target: The target url as returned by the server.
3038-
3039- The redirection can be handled only if the relpath involved is not
3040- renamed by the redirection.
3041-
3042- :returns: A transport
3043- :raise UnusableRedirect: when the URL can not be reinterpreted
3044- """
3045- parsed_source = self._split_url(source)
3046- parsed_target = self._split_url(target)
3047- pl = len(self._parsed_url.path)
3048- # determine the excess tail - the relative path that was in
3049- # the original request but not part of this transports' URL.
3050- excess_tail = parsed_source.path[pl:].strip("/")
3051- if not parsed_target.path.endswith(excess_tail):
3052- # The final part of the url has been renamed, we can't handle the
3053- # redirection.
3054- raise UnusableRedirect(
3055- source, target, "final part of the url was renamed")
3056-
3057- target_path = parsed_target.path
3058- if excess_tail:
3059- # Drop the tail that was in the redirect but not part of
3060- # the path of this transport.
3061- target_path = target_path[:-len(excess_tail)]
3062-
3063- if parsed_target.scheme in ('http', 'https'):
3064- # Same protocol family (i.e. http[s]), we will preserve the same
3065- # http client implementation when a redirection occurs from one to
3066- # the other (otherwise users may be surprised that bzr switches
3067- # from one implementation to the other, and devs may suffer
3068- # debugging it).
3069- if (parsed_target.scheme == self._unqualified_scheme
3070- and parsed_target.host == self._parsed_url.host
3071- and parsed_target.port == self._parsed_url.port
3072- and (parsed_target.user is None or
3073- parsed_target.user == self._parsed_url.user)):
3074- # If a user is specified, it should match, we don't care about
3075- # passwords, wrong passwords will be rejected anyway.
3076- return self.clone(target_path)
3077- else:
3078- # Rebuild the url preserving the scheme qualification and the
3079- # credentials (if they don't apply, the redirected to server
3080- # will tell us, but if they do apply, we avoid prompting the
3081- # user)
3082- redir_scheme = parsed_target.scheme
3083- new_url = self._unsplit_url(redir_scheme,
3084- self._parsed_url.user,
3085- self._parsed_url.password,
3086- parsed_target.host, parsed_target.port,
3087- target_path)
3088- return transport.get_transport_from_url(new_url)
3089- else:
3090- # Redirected to a different protocol
3091- new_url = self._unsplit_url(parsed_target.scheme,
3092- parsed_target.user,
3093- parsed_target.password,
3094- parsed_target.host, parsed_target.port,
3095- target_path)
3096- return transport.get_transport_from_url(new_url)
3097-
3098- def _options(self, relpath):
3099- abspath = self._remote_path(relpath)
3100- resp = self.request('OPTIONS', abspath)
3101- if resp.status == 404:
3102- raise errors.NoSuchFile(abspath)
3103- if resp.status in (403, 405):
3104- raise errors.InvalidHttpResponse(
3105- abspath,
3106- "OPTIONS not supported or forbidden for remote URL")
3107- return resp.getheaders()
3108-
3109-
3110-# TODO: May be better located in smart/medium.py with the other
3111-# SmartMedium classes
3112-class SmartClientHTTPMedium(medium.SmartClientMedium):
3113-
3114- def __init__(self, http_transport):
3115- super(SmartClientHTTPMedium, self).__init__(http_transport.base)
3116- # We don't want to create a circular reference between the http
3117- # transport and its associated medium. Since the transport will live
3118- # longer than the medium, the medium keep only a weak reference to its
3119- # transport.
3120- self._http_transport_ref = weakref.ref(http_transport)
3121-
3122- def get_request(self):
3123- return SmartClientHTTPMediumRequest(self)
3124-
3125- def should_probe(self):
3126- return True
3127-
3128- def remote_path_from_transport(self, transport):
3129- # Strip the optional 'bzr+' prefix from transport so it will have the
3130- # same scheme as self.
3131- transport_base = transport.base
3132- if transport_base.startswith('bzr+'):
3133- transport_base = transport_base[4:]
3134- rel_url = urlutils.relative_url(self.base, transport_base)
3135- return urlutils.unquote(rel_url)
3136-
3137- def send_http_smart_request(self, bytes):
3138- try:
3139- # Get back the http_transport hold by the weak reference
3140- t = self._http_transport_ref()
3141- code, body_filelike = t._post(bytes)
3142- if code != 200:
3143- raise errors.UnexpectedHttpStatus(
3144- t._remote_path('.bzr/smart'), code)
3145- except (errors.InvalidHttpResponse, errors.ConnectionReset) as e:
3146- raise errors.SmartProtocolError(str(e))
3147- return body_filelike
3148-
3149- def _report_activity(self, bytes, direction):
3150- """See SmartMedium._report_activity.
3151-
3152- Does nothing; the underlying plain HTTP transport will report the
3153- activity that this medium would report.
3154- """
3155- pass
3156-
3157- def disconnect(self):
3158- """See SmartClientMedium.disconnect()."""
3159- t = self._http_transport_ref()
3160- t.disconnect()
3161-
3162-
3163-# TODO: May be better located in smart/medium.py with the other
3164-# SmartMediumRequest classes
3165-class SmartClientHTTPMediumRequest(medium.SmartClientMediumRequest):
3166- """A SmartClientMediumRequest that works with an HTTP medium."""
3167-
3168- def __init__(self, client_medium):
3169- medium.SmartClientMediumRequest.__init__(self, client_medium)
3170- self._buffer = b''
3171-
3172- def _accept_bytes(self, bytes):
3173- self._buffer += bytes
3174-
3175- def _finished_writing(self):
3176- data = self._medium.send_http_smart_request(self._buffer)
3177- self._response_body = data
3178-
3179- def _read_bytes(self, count):
3180- """See SmartClientMediumRequest._read_bytes."""
3181- return self._response_body.read(count)
3182-
3183- def _read_line(self):
3184- line, excess = medium._get_line(self._response_body.read)
3185- if excess != b'':
3186- raise AssertionError(
3187- '_get_line returned excess bytes, but this mediumrequest '
3188- 'cannot handle excess. (%r)' % (excess,))
3189- return line
3190-
3191- def _finished_reading(self):
3192- """See SmartClientMediumRequest._finished_reading."""
3193- pass
3194-
3195-
3196-def unhtml_roughly(maybe_html, length_limit=1000):
3197- """Very approximate html->text translation, for presenting error bodies.
3198-
3199- :param length_limit: Truncate the result to this many characters.
3200-
3201- >>> unhtml_roughly("<b>bad</b> things happened\\n")
3202- ' bad things happened '
3203- """
3204- return re.subn(r"(<[^>]*>|\n|&nbsp;)", " ", maybe_html)[0][:length_limit]
3205-
3206-
3207-def get_test_permutations():
3208- """Return the permutations to be used in testing."""
3209- from breezy.tests import (
3210- features,
3211- http_server,
3212- )
3213- permutations = [(HttpTransport, http_server.HttpServer), ]
3214- if features.HTTPSServerFeature.available():
3215- from breezy.tests import (
3216- https_server,
3217- ssl_certs,
3218- )
3219-
3220- class HTTPS_transport(HttpTransport):
3221-
3222- def __init__(self, base, _from_transport=None):
3223- super(HTTPS_transport, self).__init__(
3224- base, _from_transport=_from_transport,
3225- ca_certs=ssl_certs.build_path('ca.crt'))
3226-
3227- permutations.append((HTTPS_transport,
3228- https_server.HTTPSServer))
3229- return permutations
3230
3231=== added file 'breezy/transport/http/urllib.py'
3232--- breezy/transport/http/urllib.py 1970-01-01 00:00:00 +0000
3233+++ breezy/transport/http/urllib.py 2021-01-10 01:22:43 +0000
3234@@ -0,0 +1,2583 @@
3235+# Copyright (C) 2005-2010 Canonical Ltd
3236+#
3237+# This program is free software; you can redistribute it and/or modify
3238+# it under the terms of the GNU General Public License as published by
3239+# the Free Software Foundation; either version 2 of the License, or
3240+# (at your option) any later version.
3241+#
3242+# This program is distributed in the hope that it will be useful,
3243+# but WITHOUT ANY WARRANTY; without even the implied warranty of
3244+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
3245+# GNU General Public License for more details.
3246+#
3247+# You should have received a copy of the GNU General Public License
3248+# along with this program; if not, write to the Free Software
3249+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
3250+
3251+"""Base implementation of Transport over http using urllib.
3252+
3253+There are separate implementation modules for each http client implementation.
3254+"""
3255+
3256+from __future__ import absolute_import
3257+
3258+DEBUG = 0
3259+
3260+import base64
3261+import cgi
3262+import errno
3263+import os
3264+import re
3265+import socket
3266+import ssl
3267+import sys
3268+import time
3269+import urllib
3270+import weakref
3271+
3272+try:
3273+ import http.client as http_client
3274+except ImportError:
3275+ import httplib as http_client
3276+try:
3277+ import urllib.request as urllib_request
3278+except ImportError: # python < 3
3279+ import urllib2 as urllib_request
3280+try:
3281+ from urllib.parse import urljoin, splitport, splittype, splithost, urlencode
3282+except ImportError:
3283+ from urlparse import urljoin
3284+ from urllib import splitport, splittype, splithost, urlencode
3285+
3286+# TODO: handle_response should be integrated into the http/__init__.py
3287+from .response import handle_response
3288+
3289+# FIXME: Oversimplifying, two kind of exceptions should be
3290+# raised, once a request is issued: URLError before we have been
3291+# able to process the response, HTTPError after that. Process the
3292+# response means we are able to leave the socket clean, so if we
3293+# are not able to do that, we should close the connection. The
3294+# actual code more or less do that, tests should be written to
3295+# ensure that.
3296+
3297+from ... import __version__ as breezy_version
3298+from ... import (
3299+ config,
3300+ debug,
3301+ errors,
3302+ lazy_import,
3303+ osutils,
3304+ trace,
3305+ transport,
3306+ ui,
3307+ urlutils,
3308+)
3309+from ...bzr.smart import medium
3310+from ...trace import mutter
3311+from ...transport import (
3312+ ConnectedTransport,
3313+ UnusableRedirect,
3314+ )
3315+
3316+from . import default_user_agent, ssl
3317+
3318+
3319+checked_kerberos = False
3320+kerberos = None
3321+
3322+
3323+class addinfourl(urllib_request.addinfourl):
3324+ '''Replacement addinfourl class compatible with python-2.7's xmlrpclib
3325+
3326+ In python-2.7, xmlrpclib expects that the response object that it receives
3327+ has a getheader method. http_client.HTTPResponse provides this but
3328+ urllib_request.addinfourl does not. Add the necessary functions here, ported to
3329+ use the internal data structures of addinfourl.
3330+ '''
3331+
3332+ def getheader(self, name, default=None):
3333+ if self.headers is None:
3334+ raise http_client.ResponseNotReady()
3335+ return self.headers.getheader(name, default)
3336+
3337+ def getheaders(self):
3338+ if self.headers is None:
3339+ raise http_client.ResponseNotReady()
3340+ return list(self.headers.items())
3341+
3342+
3343+class _ReportingFileSocket(object):
3344+
3345+ def __init__(self, filesock, report_activity=None):
3346+ self.filesock = filesock
3347+ self._report_activity = report_activity
3348+
3349+ def report_activity(self, size, direction):
3350+ if self._report_activity:
3351+ self._report_activity(size, direction)
3352+
3353+ def read(self, size=1):
3354+ s = self.filesock.read(size)
3355+ self.report_activity(len(s), 'read')
3356+ return s
3357+
3358+ def readline(self, size=-1):
3359+ s = self.filesock.readline(size)
3360+ self.report_activity(len(s), 'read')
3361+ return s
3362+
3363+ def readinto(self, b):
3364+ s = self.filesock.readinto(b)
3365+ self.report_activity(s, 'read')
3366+ return s
3367+
3368+ def __getattr__(self, name):
3369+ return getattr(self.filesock, name)
3370+
3371+
3372+class _ReportingSocket(object):
3373+
3374+ def __init__(self, sock, report_activity=None):
3375+ self.sock = sock
3376+ self._report_activity = report_activity
3377+
3378+ def report_activity(self, size, direction):
3379+ if self._report_activity:
3380+ self._report_activity(size, direction)
3381+
3382+ def sendall(self, s, *args):
3383+ self.sock.sendall(s, *args)
3384+ self.report_activity(len(s), 'write')
3385+
3386+ def recv(self, *args):
3387+ s = self.sock.recv(*args)
3388+ self.report_activity(len(s), 'read')
3389+ return s
3390+
3391+ def makefile(self, mode='r', bufsize=-1):
3392+ # http_client creates a fileobject that doesn't do buffering, which
3393+ # makes fp.readline() very expensive because it only reads one byte
3394+ # at a time. So we wrap the socket in an object that forces
3395+ # sock.makefile to make a buffered file.
3396+ fsock = self.sock.makefile(mode, 65536)
3397+ # And wrap that into a reporting kind of fileobject
3398+ return _ReportingFileSocket(fsock, self._report_activity)
3399+
3400+ def __getattr__(self, name):
3401+ return getattr(self.sock, name)
3402+
3403+
3404+# We define our own Response class to keep our http_client pipe clean
3405+class Response(http_client.HTTPResponse):
3406+ """Custom HTTPResponse, to avoid the need to decorate.
3407+
3408+ http_client prefers to decorate the returned objects, rather
3409+ than using a custom object.
3410+ """
3411+
3412+ # Some responses have bodies in which we have no interest
3413+ _body_ignored_responses = [301, 302, 303, 307, 308, 400, 401, 403, 404, 501]
3414+
3415+ # in finish() below, we may have to discard several MB in the worst
3416+ # case. To avoid buffering that much, we read and discard by chunks
3417+ # instead. The underlying file is either a socket or a StringIO, so reading
3418+ # 8k chunks should be fine.
3419+ _discarded_buf_size = 8192
3420+
3421+ def __init__(self, sock, debuglevel=0, method=None, url=None):
3422+ self.url = url
3423+ super(Response, self).__init__(
3424+ sock, debuglevel=debuglevel, method=method, url=url)
3425+
3426+ def begin(self):
3427+ """Begin to read the response from the server.
3428+
3429+ http_client assumes that some responses get no content and do
3430+ not even attempt to read the body in that case, leaving
3431+ the body in the socket, blocking the next request. Let's
3432+ try to workaround that.
3433+ """
3434+ http_client.HTTPResponse.begin(self)
3435+ if self.status in self._body_ignored_responses:
3436+ if self.debuglevel >= 2:
3437+ print("For status: [%s], will ready body, length: %s" % (
3438+ self.status, self.length))
3439+ if not (self.length is None or self.will_close):
3440+ # In some cases, we just can't read the body not
3441+ # even try or we may encounter a 104, 'Connection
3442+ # reset by peer' error if there is indeed no body
3443+ # and the server closed the connection just after
3444+ # having issued the response headers (even if the
3445+ # headers indicate a Content-Type...)
3446+ body = self.read(self.length)
3447+ if self.debuglevel >= 9:
3448+ # This one can be huge and is generally not interesting
3449+ print("Consumed body: [%s]" % body)
3450+ self.close()
3451+ elif self.status == 200:
3452+ # Whatever the request is, it went ok, so we surely don't want to
3453+ # close the connection. Some cases are not correctly detected by
3454+ # http_client.HTTPConnection.getresponse (called by
3455+ # http_client.HTTPResponse.begin). The CONNECT response for the https
3456+ # through proxy case is one. Note: the 'will_close' below refers
3457+ # to the "true" socket between us and the server, whereas the
3458+ # 'close()' above refers to the copy of that socket created by
3459+ # http_client for the response itself. So, in the if above we close the
3460+ # socket to indicate that we are done with the response whereas
3461+ # below we keep the socket with the server opened.
3462+ self.will_close = False
3463+
3464+ def finish(self):
3465+ """Finish reading the body.
3466+
3467+ In some cases, the client may have left some bytes to read in the
3468+ body. That will block the next request to succeed if we use a
3469+ persistent connection. If we don't use a persistent connection, well,
3470+ nothing will block the next request since a new connection will be
3471+ issued anyway.
3472+
3473+ :return: the number of bytes left on the socket (may be None)
3474+ """
3475+ pending = None
3476+ if not self.isclosed():
3477+ # Make sure nothing was left to be read on the socket
3478+ pending = 0
3479+ data = True
3480+ while data and self.length:
3481+ # read() will update self.length
3482+ data = self.read(min(self.length, self._discarded_buf_size))
3483+ pending += len(data)
3484+ if pending:
3485+ trace.mutter("%s bytes left on the HTTP socket", pending)
3486+ self.close()
3487+ return pending
3488+
3489+
3490+# Not inheriting from 'object' because http_client.HTTPConnection doesn't.
3491+class AbstractHTTPConnection:
3492+ """A custom HTTP(S) Connection, which can reset itself on a bad response"""
3493+
3494+ response_class = Response
3495+
3496+ # When we detect a server responding with the whole file to range requests,
3497+ # we want to warn. But not below a given thresold.
3498+ _range_warning_thresold = 1024 * 1024
3499+
3500+ def __init__(self, report_activity=None):
3501+ self._response = None
3502+ self._report_activity = report_activity
3503+ self._ranges_received_whole_file = None
3504+
3505+ def _mutter_connect(self):
3506+ netloc = '%s:%s' % (self.host, self.port)
3507+ if self.proxied_host is not None:
3508+ netloc += '(proxy for %s)' % self.proxied_host
3509+ trace.mutter('* About to connect() to %s' % netloc)
3510+
3511+ def getresponse(self):
3512+ """Capture the response to be able to cleanup"""
3513+ self._response = http_client.HTTPConnection.getresponse(self)
3514+ return self._response
3515+
3516+ def cleanup_pipe(self):
3517+ """Read the remaining bytes of the last response if any."""
3518+ if self._response is not None:
3519+ try:
3520+ pending = self._response.finish()
3521+ # Warn the user (once)
3522+ if (self._ranges_received_whole_file is None
3523+ and self._response.status == 200
3524+ and pending
3525+ and pending > self._range_warning_thresold):
3526+ self._ranges_received_whole_file = True
3527+ trace.warning(
3528+ 'Got a 200 response when asking for multiple ranges,'
3529+ ' does your server at %s:%s support range requests?',
3530+ self.host, self.port)
3531+ except socket.error as e:
3532+ # It's conceivable that the socket is in a bad state here
3533+ # (including some test cases) and in this case, it doesn't need
3534+ # cleaning anymore, so no need to fail, we just get rid of the
3535+ # socket and let callers reconnect
3536+ if (len(e.args) == 0
3537+ or e.args[0] not in (errno.ECONNRESET, errno.ECONNABORTED)):
3538+ raise
3539+ self.close()
3540+ self._response = None
3541+ # Preserve our preciousss
3542+ sock = self.sock
3543+ self.sock = None
3544+ # Let http_client.HTTPConnection do its housekeeping
3545+ self.close()
3546+ # Restore our preciousss
3547+ self.sock = sock
3548+
3549+ def _wrap_socket_for_reporting(self, sock):
3550+ """Wrap the socket before anybody use it."""
3551+ self.sock = _ReportingSocket(sock, self._report_activity)
3552+
3553+
3554+class HTTPConnection(AbstractHTTPConnection, http_client.HTTPConnection):
3555+
3556+ # XXX: Needs refactoring at the caller level.
3557+ def __init__(self, host, port=None, proxied_host=None,
3558+ report_activity=None, ca_certs=None):
3559+ AbstractHTTPConnection.__init__(self, report_activity=report_activity)
3560+ http_client.HTTPConnection.__init__(self, host, port)
3561+ self.proxied_host = proxied_host
3562+ # ca_certs is ignored, it's only relevant for https
3563+
3564+ def connect(self):
3565+ if 'http' in debug.debug_flags:
3566+ self._mutter_connect()
3567+ http_client.HTTPConnection.connect(self)
3568+ self._wrap_socket_for_reporting(self.sock)
3569+
3570+
3571+class HTTPSConnection(AbstractHTTPConnection, http_client.HTTPSConnection):
3572+
3573+ def __init__(self, host, port=None, key_file=None, cert_file=None,
3574+ proxied_host=None,
3575+ report_activity=None, ca_certs=None):
3576+ AbstractHTTPConnection.__init__(self, report_activity=report_activity)
3577+ http_client.HTTPSConnection.__init__(
3578+ self, host, port, key_file, cert_file)
3579+ self.proxied_host = proxied_host
3580+ self.ca_certs = ca_certs
3581+
3582+ def connect(self):
3583+ if 'http' in debug.debug_flags:
3584+ self._mutter_connect()
3585+ http_client.HTTPConnection.connect(self)
3586+ self._wrap_socket_for_reporting(self.sock)
3587+ if self.proxied_host is None:
3588+ self.connect_to_origin()
3589+
3590+ def connect_to_origin(self):
3591+ # FIXME JRV 2011-12-18: Use location config here?
3592+ config_stack = config.GlobalStack()
3593+ cert_reqs = config_stack.get('ssl.cert_reqs')
3594+ if self.proxied_host is not None:
3595+ host = self.proxied_host.split(":", 1)[0]
3596+ else:
3597+ host = self.host
3598+ if cert_reqs == ssl.CERT_NONE:
3599+ ui.ui_factory.show_user_warning('not_checking_ssl_cert', host=host)
3600+ ui.ui_factory.suppressed_warnings.add('not_checking_ssl_cert')
3601+ ca_certs = None
3602+ else:
3603+ if self.ca_certs is None:
3604+ ca_certs = config_stack.get('ssl.ca_certs')
3605+ else:
3606+ ca_certs = self.ca_certs
3607+ if ca_certs is None:
3608+ trace.warning(
3609+ "No valid trusted SSL CA certificates file set. See "
3610+ "'brz help ssl.ca_certs' for more information on setting "
3611+ "trusted CAs.")
3612+ try:
3613+ ssl_context = ssl.create_default_context(
3614+ purpose=ssl.Purpose.SERVER_AUTH, cafile=ca_certs)
3615+ ssl_context.check_hostname = cert_reqs != ssl.CERT_NONE
3616+ if self.cert_file:
3617+ ssl_context.load_cert_chain(
3618+ keyfile=self.key_file, certfile=self.cert_file)
3619+ ssl_context.verify_mode = cert_reqs
3620+ ssl_sock = ssl_context.wrap_socket(
3621+ self.sock, server_hostname=self.host)
3622+ except ssl.SSLError:
3623+ trace.note(
3624+ "\n"
3625+ "See `brz help ssl.ca_certs` for how to specify trusted CA"
3626+ "certificates.\n"
3627+ "Pass -Ossl.cert_reqs=none to disable certificate "
3628+ "verification entirely.\n")
3629+ raise
3630+ # Wrap the ssl socket before anybody use it
3631+ self._wrap_socket_for_reporting(ssl_sock)
3632+
3633+
3634+class Request(urllib_request.Request):
3635+ """A custom Request object.
3636+
3637+ urllib_request determines the request method heuristically (based on
3638+ the presence or absence of data). We set the method
3639+ statically.
3640+
3641+ The Request object tracks:
3642+ - the connection the request will be made on.
3643+ - the authentication parameters needed to preventively set
3644+ the authentication header once a first authentication have
3645+ been made.
3646+ """
3647+
3648+ def __init__(self, method, url, data=None, headers={},
3649+ origin_req_host=None, unverifiable=False,
3650+ connection=None, parent=None):
3651+ urllib_request.Request.__init__(
3652+ self, url, data, headers,
3653+ origin_req_host, unverifiable)
3654+ self.method = method
3655+ self.connection = connection
3656+ # To handle redirections
3657+ self.parent = parent
3658+ self.redirected_to = None
3659+ # Unless told otherwise, redirections are not followed
3660+ self.follow_redirections = False
3661+ # auth and proxy_auth are dicts containing, at least
3662+ # (scheme, host, port, realm, user, password, protocol, path).
3663+ # The dict entries are mostly handled by the AuthHandler.
3664+ # Some authentication schemes may add more entries.
3665+ self.auth = {}
3666+ self.proxy_auth = {}
3667+ self.proxied_host = None
3668+
3669+ def get_method(self):
3670+ return self.method
3671+
3672+ def set_proxy(self, proxy, type):
3673+ """Set the proxy and remember the proxied host."""
3674+ host, port = splitport(self.host)
3675+ if port is None:
3676+ # We need to set the default port ourselves way before it gets set
3677+ # in the HTTP[S]Connection object at build time.
3678+ if self.type == 'https':
3679+ conn_class = HTTPSConnection
3680+ else:
3681+ conn_class = HTTPConnection
3682+ port = conn_class.default_port
3683+ self.proxied_host = '%s:%s' % (host, port)
3684+ urllib_request.Request.set_proxy(self, proxy, type)
3685+ # When urllib_request makes a https request with our wrapper code and a proxy,
3686+ # it sets Host to the https proxy, not the host we want to talk to.
3687+ # I'm fairly sure this is our fault, but what is the cause is an open
3688+ # question. -- Robert Collins May 8 2010.
3689+ self.add_unredirected_header('Host', self.proxied_host)
3690+
3691+
3692+class _ConnectRequest(Request):
3693+
3694+ def __init__(self, request):
3695+ """Constructor
3696+
3697+ :param request: the first request sent to the proxied host, already
3698+ processed by the opener (i.e. proxied_host is already set).
3699+ """
3700+ # We give a fake url and redefine selector or urllib_request will be
3701+ # confused
3702+ Request.__init__(self, 'CONNECT', request.get_full_url(),
3703+ connection=request.connection)
3704+ if request.proxied_host is None:
3705+ raise AssertionError()
3706+ self.proxied_host = request.proxied_host
3707+
3708+ @property
3709+ def selector(self):
3710+ return self.proxied_host
3711+
3712+ def get_selector(self):
3713+ return self.selector
3714+
3715+ def set_proxy(self, proxy, type):
3716+ """Set the proxy without remembering the proxied host.
3717+
3718+ We already know the proxied host by definition, the CONNECT request
3719+ occurs only when the connection goes through a proxy. The usual
3720+ processing (masquerade the request so that the connection is done to
3721+ the proxy while the request is targeted at another host) does not apply
3722+ here. In fact, the connection is already established with proxy and we
3723+ just want to enable the SSL tunneling.
3724+ """
3725+ urllib_request.Request.set_proxy(self, proxy, type)
3726+
3727+
3728+class ConnectionHandler(urllib_request.BaseHandler):
3729+ """Provides connection-sharing by pre-processing requests.
3730+
3731+ urllib_request provides no way to access the HTTPConnection object
3732+ internally used. But we need it in order to achieve
3733+ connection sharing. So, we add it to the request just before
3734+ it is processed, and then we override the do_open method for
3735+ http[s] requests in AbstractHTTPHandler.
3736+ """
3737+
3738+ handler_order = 1000 # after all pre-processings
3739+
3740+ def __init__(self, report_activity=None, ca_certs=None):
3741+ self._report_activity = report_activity
3742+ self.ca_certs = ca_certs
3743+
3744+ def create_connection(self, request, http_connection_class):
3745+ host = request.host
3746+ if not host:
3747+ # Just a bit of paranoia here, this should have been
3748+ # handled in the higher levels
3749+ raise urlutils.InvalidURL(request.get_full_url(), 'no host given.')
3750+
3751+ # We create a connection (but it will not connect until the first
3752+ # request is made)
3753+ try:
3754+ connection = http_connection_class(
3755+ host, proxied_host=request.proxied_host,
3756+ report_activity=self._report_activity,
3757+ ca_certs=self.ca_certs)
3758+ except http_client.InvalidURL as exception:
3759+ # There is only one occurrence of InvalidURL in http_client
3760+ raise urlutils.InvalidURL(request.get_full_url(),
3761+ extra='nonnumeric port')
3762+
3763+ return connection
3764+
3765+ def capture_connection(self, request, http_connection_class):
3766+ """Capture or inject the request connection.
3767+
3768+ Two cases:
3769+ - the request have no connection: create a new one,
3770+
3771+ - the request have a connection: this one have been used
3772+ already, let's capture it, so that we can give it to
3773+ another transport to be reused. We don't do that
3774+ ourselves: the Transport object get the connection from
3775+ a first request and then propagate it, from request to
3776+ request or to cloned transports.
3777+ """
3778+ connection = request.connection
3779+ if connection is None:
3780+ # Create a new one
3781+ connection = self.create_connection(request, http_connection_class)
3782+ request.connection = connection
3783+
3784+ # All connections will pass here, propagate debug level
3785+ connection.set_debuglevel(DEBUG)
3786+ return request
3787+
3788+ def http_request(self, request):
3789+ return self.capture_connection(request, HTTPConnection)
3790+
3791+ def https_request(self, request):
3792+ return self.capture_connection(request, HTTPSConnection)
3793+
3794+
3795+class AbstractHTTPHandler(urllib_request.AbstractHTTPHandler):
3796+ """A custom handler for HTTP(S) requests.
3797+
3798+ We overrive urllib_request.AbstractHTTPHandler to get a better
3799+ control of the connection, the ability to implement new
3800+ request types and return a response able to cope with
3801+ persistent connections.
3802+ """
3803+
3804+ # We change our order to be before urllib_request HTTP[S]Handlers
3805+ # and be chosen instead of them (the first http_open called
3806+ # wins).
3807+ handler_order = 400
3808+
3809+ _default_headers = {'Pragma': 'no-cache',
3810+ 'Cache-control': 'max-age=0',
3811+ 'Connection': 'Keep-Alive',
3812+ 'User-agent': default_user_agent(),
3813+ 'Accept': '*/*',
3814+ }
3815+
3816+ def __init__(self):
3817+ urllib_request.AbstractHTTPHandler.__init__(self, debuglevel=DEBUG)
3818+
3819+ def http_request(self, request):
3820+ """Common headers setting"""
3821+
3822+ for name, value in self._default_headers.items():
3823+ if name not in request.headers:
3824+ request.headers[name] = value
3825+ # FIXME: We may have to add the Content-Length header if
3826+ # we have data to send.
3827+ return request
3828+
3829+ def retry_or_raise(self, http_class, request, first_try):
3830+ """Retry the request (once) or raise the exception.
3831+
3832+ urllib_request raises exception of application level kind, we
3833+ just have to translate them.
3834+
3835+ http_client can raise exceptions of transport level (badly
3836+ formatted dialog, loss of connexion or socket level
3837+ problems). In that case we should issue the request again
3838+ (http_client will close and reopen a new connection if
3839+ needed).
3840+ """
3841+ # When an exception occurs, we give back the original
3842+ # Traceback or the bugs are hard to diagnose.
3843+ exc_type, exc_val, exc_tb = sys.exc_info()
3844+ if exc_type == socket.gaierror:
3845+ # No need to retry, that will not help
3846+ origin_req_host = request.origin_req_host
3847+ raise errors.ConnectionError("Couldn't resolve host '%s'"
3848+ % origin_req_host,
3849+ orig_error=exc_val)
3850+ elif isinstance(exc_val, http_client.ImproperConnectionState):
3851+ # The http_client pipeline is in incorrect state, it's a bug in our
3852+ # implementation.
3853+ raise exc_val.with_traceback(exc_tb)
3854+ else:
3855+ if first_try:
3856+ if self._debuglevel >= 2:
3857+ print('Received exception: [%r]' % exc_val)
3858+ print(' On connection: [%r]' % request.connection)
3859+ method = request.get_method()
3860+ url = request.get_full_url()
3861+ print(' Will retry, %s %r' % (method, url))
3862+ request.connection.close()
3863+ response = self.do_open(http_class, request, False)
3864+ else:
3865+ if self._debuglevel >= 2:
3866+ print('Received second exception: [%r]' % exc_val)
3867+ print(' On connection: [%r]' % request.connection)
3868+ if exc_type in (http_client.BadStatusLine, http_client.UnknownProtocol):
3869+ # http_client.BadStatusLine and
3870+ # http_client.UnknownProtocol indicates that a
3871+ # bogus server was encountered or a bad
3872+ # connection (i.e. transient errors) is
3873+ # experimented, we have already retried once
3874+ # for that request so we raise the exception.
3875+ my_exception = errors.InvalidHttpResponse(
3876+ request.get_full_url(),
3877+ 'Bad status line received',
3878+ orig_error=exc_val)
3879+ elif (isinstance(exc_val, socket.error) and len(exc_val.args)
3880+ and exc_val.args[0] in (errno.ECONNRESET, 10053, 10054)):
3881+ # 10053 == WSAECONNABORTED
3882+ # 10054 == WSAECONNRESET
3883+ raise errors.ConnectionReset(
3884+ "Connection lost while sending request.")
3885+ else:
3886+ # All other exception are considered connection related.
3887+
3888+ # socket errors generally occurs for reasons
3889+ # far outside our scope, so closing the
3890+ # connection and retrying is the best we can
3891+ # do.
3892+ selector = request.selector
3893+ my_exception = errors.ConnectionError(
3894+ msg='while sending %s %s:' % (request.get_method(),
3895+ selector),
3896+ orig_error=exc_val)
3897+
3898+ if self._debuglevel >= 2:
3899+ print('On connection: [%r]' % request.connection)
3900+ method = request.get_method()
3901+ url = request.get_full_url()
3902+ print(' Failed again, %s %r' % (method, url))
3903+ print(' Will raise: [%r]' % my_exception)
3904+ raise my_exception.with_traceback(exc_tb)
3905+ return response
3906+
3907+ def do_open(self, http_class, request, first_try=True):
3908+ """See urllib_request.AbstractHTTPHandler.do_open for the general idea.
3909+
3910+ The request will be retried once if it fails.
3911+ """
3912+ connection = request.connection
3913+ if connection is None:
3914+ raise AssertionError(
3915+ 'Cannot process a request without a connection')
3916+
3917+ # Get all the headers
3918+ headers = {}
3919+ headers.update(request.header_items())
3920+ headers.update(request.unredirected_hdrs)
3921+ # Some servers or proxies will choke on headers not properly
3922+ # cased. http_client/urllib/urllib_request all use capitalize to get canonical
3923+ # header names, but only python2.5 urllib_request use title() to fix them just
3924+ # before sending the request. And not all versions of python 2.5 do
3925+ # that. Since we replace urllib_request.AbstractHTTPHandler.do_open we do it
3926+ # ourself below.
3927+ headers = {name.title(): val for name, val in headers.items()}
3928+
3929+ try:
3930+ method = request.get_method()
3931+ url = request.selector
3932+ if sys.version_info[:2] >= (3, 6):
3933+ connection._send_request(method, url,
3934+ # FIXME: implements 100-continue
3935+ # None, # We don't send the body yet
3936+ request.data,
3937+ headers, encode_chunked=False)
3938+ else:
3939+ connection._send_request(method, url,
3940+ # FIXME: implements 100-continue
3941+ # None, # We don't send the body yet
3942+ request.data,
3943+ headers)
3944+ if 'http' in debug.debug_flags:
3945+ trace.mutter('> %s %s' % (method, url))
3946+ hdrs = []
3947+ for k, v in headers.items():
3948+ # People are often told to paste -Dhttp output to help
3949+ # debug. Don't compromise credentials.
3950+ if k in ('Authorization', 'Proxy-Authorization'):
3951+ v = '<masked>'
3952+ hdrs.append('%s: %s' % (k, v))
3953+ trace.mutter('> ' + '\n> '.join(hdrs) + '\n')
3954+ if self._debuglevel >= 1:
3955+ print('Request sent: [%r] from (%s)'
3956+ % (request, request.connection.sock.getsockname()))
3957+ response = connection.getresponse()
3958+ convert_to_addinfourl = True
3959+ except (ssl.SSLError, ssl.CertificateError):
3960+ # Something is wrong with either the certificate or the hostname,
3961+ # re-trying won't help
3962+ raise
3963+ except (socket.gaierror, http_client.BadStatusLine, http_client.UnknownProtocol,
3964+ socket.error, http_client.HTTPException):
3965+ response = self.retry_or_raise(http_class, request, first_try)
3966+ convert_to_addinfourl = False
3967+
3968+ response.msg = response.reason
3969+ return response
3970+
3971+# FIXME: HTTPConnection does not fully support 100-continue (the
3972+# server responses are just ignored)
3973+
3974+# if code == 100:
3975+# mutter('Will send the body')
3976+# # We can send the body now
3977+# body = request.data
3978+# if body is None:
3979+# raise URLError("No data given")
3980+# connection.send(body)
3981+# response = connection.getresponse()
3982+
3983+ if self._debuglevel >= 2:
3984+ print('Receives response: %r' % response)
3985+ print(' For: %r(%r)' % (request.get_method(),
3986+ request.get_full_url()))
3987+
3988+ if convert_to_addinfourl:
3989+ # Shamelessly copied from urllib_request
3990+ req = request
3991+ r = response
3992+ r.recv = r.read
3993+ fp = socket._fileobject(r, bufsize=65536)
3994+ resp = addinfourl(fp, r.msg, req.get_full_url())
3995+ resp.code = r.status
3996+ resp.msg = r.reason
3997+ resp.version = r.version
3998+ if self._debuglevel >= 2:
3999+ print('Create addinfourl: %r' % resp)
4000+ print(' For: %r(%r)' % (request.get_method(),
4001+ request.get_full_url()))
4002+ if 'http' in debug.debug_flags:
4003+ version = 'HTTP/%d.%d'
4004+ try:
4005+ version = version % (resp.version / 10,
4006+ resp.version % 10)
4007+ except:
4008+ version = 'HTTP/%r' % resp.version
4009+ trace.mutter('< %s %s %s' % (version, resp.code,
4010+ resp.msg))
4011+ # Use the raw header lines instead of treating resp.info() as a
4012+ # dict since we may miss duplicated headers otherwise.
4013+ hdrs = [h.rstrip('\r\n') for h in resp.info().headers]
4014+ trace.mutter('< ' + '\n< '.join(hdrs) + '\n')
4015+ else:
4016+ resp = response
4017+ return resp
4018+
4019+
4020+class HTTPHandler(AbstractHTTPHandler):
4021+ """A custom handler that just thunks into HTTPConnection"""
4022+
4023+ def http_open(self, request):
4024+ return self.do_open(HTTPConnection, request)
4025+
4026+
4027+class HTTPSHandler(AbstractHTTPHandler):
4028+ """A custom handler that just thunks into HTTPSConnection"""
4029+
4030+ https_request = AbstractHTTPHandler.http_request
4031+
4032+ def https_open(self, request):
4033+ connection = request.connection
4034+ if connection.sock is None and \
4035+ connection.proxied_host is not None and \
4036+ request.get_method() != 'CONNECT': # Don't loop
4037+ # FIXME: We need a gazillion connection tests here, but we still
4038+ # miss a https server :-( :
4039+ # - with and without proxy
4040+ # - with and without certificate
4041+ # - with self-signed certificate
4042+ # - with and without authentication
4043+ # - with good and bad credentials (especially the proxy auth around
4044+ # CONNECT)
4045+ # - with basic and digest schemes
4046+ # - reconnection on errors
4047+ # - connection persistence behaviour (including reconnection)
4048+
4049+ # We are about to connect for the first time via a proxy, we must
4050+ # issue a CONNECT request first to establish the encrypted link
4051+ connect = _ConnectRequest(request)
4052+ response = self.parent.open(connect)
4053+ if response.code != 200:
4054+ raise errors.ConnectionError("Can't connect to %s via proxy %s" % (
4055+ connect.proxied_host, self.host))
4056+ # Housekeeping
4057+ connection.cleanup_pipe()
4058+ # Establish the connection encryption
4059+ connection.connect_to_origin()
4060+ # Propagate the connection to the original request
4061+ request.connection = connection
4062+ return self.do_open(HTTPSConnection, request)
4063+
4064+
4065+class HTTPRedirectHandler(urllib_request.HTTPRedirectHandler):
4066+ """Handles redirect requests.
4067+
4068+ We have to implement our own scheme because we use a specific
4069+ Request object and because we want to implement a specific
4070+ policy.
4071+ """
4072+ _debuglevel = DEBUG
4073+ # RFC2616 says that only read requests should be redirected
4074+ # without interacting with the user. But Breezy uses some
4075+ # shortcuts to optimize against roundtrips which can leads to
4076+ # write requests being issued before read requests of
4077+ # containing dirs can be redirected. So we redirect write
4078+ # requests in the same way which seems to respect the spirit
4079+ # of the RFC if not its letter.
4080+
4081+ def redirect_request(self, req, fp, code, msg, headers, newurl):
4082+ """See urllib_request.HTTPRedirectHandler.redirect_request"""
4083+ # We would have preferred to update the request instead
4084+ # of creating a new one, but the urllib_request.Request object
4085+ # has a too complicated creation process to provide a
4086+ # simple enough equivalent update process. Instead, when
4087+ # redirecting, we only update the following request in
4088+ # the redirect chain with a reference to the parent
4089+ # request .
4090+
4091+ # Some codes make no sense in our context and are treated
4092+ # as errors:
4093+
4094+ # 300: Multiple choices for different representations of
4095+ # the URI. Using that mechanisn with Breezy will violate the
4096+ # protocol neutrality of Transport.
4097+
4098+ # 304: Not modified (SHOULD only occurs with conditional
4099+ # GETs which are not used by our implementation)
4100+
4101+ # 305: Use proxy. I can't imagine this one occurring in
4102+ # our context-- vila/20060909
4103+
4104+ # 306: Unused (if the RFC says so...)
4105+
4106+ # If the code is 302 and the request is HEAD, some may
4107+ # think that it is a sufficent hint that the file exists
4108+ # and that we MAY avoid following the redirections. But
4109+ # if we want to be sure, we MUST follow them.
4110+
4111+ origin_req_host = req.origin_req_host
4112+
4113+ if code in (301, 302, 303, 307, 308):
4114+ return Request(req.get_method(), newurl,
4115+ headers=req.headers,
4116+ origin_req_host=origin_req_host,
4117+ unverifiable=True,
4118+ # TODO: It will be nice to be able to
4119+ # detect virtual hosts sharing the same
4120+ # IP address, that will allow us to
4121+ # share the same connection...
4122+ connection=None,
4123+ parent=req,
4124+ )
4125+ else:
4126+ raise urllib_request.HTTPError(
4127+ req.get_full_url(), code, msg, headers, fp)
4128+
4129+ def http_error_302(self, req, fp, code, msg, headers):
4130+ """Requests the redirected to URI.
4131+
4132+ Copied from urllib_request to be able to clean the pipe of the associated
4133+ connection, *before* issuing the redirected request but *after* having
4134+ eventually raised an error.
4135+ """
4136+ # Some servers (incorrectly) return multiple Location headers
4137+ # (so probably same goes for URI). Use first header.
4138+
4139+ # TODO: Once we get rid of addinfourl objects, the
4140+ # following will need to be updated to use correct case
4141+ # for headers.
4142+ if 'location' in headers:
4143+ newurl = headers.get('location')
4144+ elif 'uri' in headers:
4145+ newurl = headers.get('uri')
4146+ else:
4147+ return
4148+
4149+ newurl = urljoin(req.get_full_url(), newurl)
4150+
4151+ if self._debuglevel >= 1:
4152+ print('Redirected to: %s (followed: %r)' % (newurl,
4153+ req.follow_redirections))
4154+ if req.follow_redirections is False:
4155+ req.redirected_to = newurl
4156+ return fp
4157+
4158+ # This call succeeds or raise an error. urllib_request returns
4159+ # if redirect_request returns None, but our
4160+ # redirect_request never returns None.
4161+ redirected_req = self.redirect_request(req, fp, code, msg, headers,
4162+ newurl)
4163+
4164+ # loop detection
4165+ # .redirect_dict has a key url if url was previously visited.
4166+ if hasattr(req, 'redirect_dict'):
4167+ visited = redirected_req.redirect_dict = req.redirect_dict
4168+ if (visited.get(newurl, 0) >= self.max_repeats or
4169+ len(visited) >= self.max_redirections):
4170+ raise urllib_request.HTTPError(req.get_full_url(), code,
4171+ self.inf_msg + msg, headers, fp)
4172+ else:
4173+ visited = redirected_req.redirect_dict = req.redirect_dict = {}
4174+ visited[newurl] = visited.get(newurl, 0) + 1
4175+
4176+ # We can close the fp now that we are sure that we won't
4177+ # use it with HTTPError.
4178+ fp.close()
4179+ # We have all we need already in the response
4180+ req.connection.cleanup_pipe()
4181+
4182+ return self.parent.open(redirected_req)
4183+
4184+ http_error_301 = http_error_303 = http_error_307 = http_error_308 = http_error_302
4185+
4186+
4187+class ProxyHandler(urllib_request.ProxyHandler):
4188+ """Handles proxy setting.
4189+
4190+ Copied and modified from urllib_request to be able to modify the request during
4191+ the request pre-processing instead of modifying it at _open time. As we
4192+ capture (or create) the connection object during request processing, _open
4193+ time was too late.
4194+
4195+ The main task is to modify the request so that the connection is done to
4196+ the proxy while the request still refers to the destination host.
4197+
4198+ Note: the proxy handling *may* modify the protocol used; the request may be
4199+ against an https server proxied through an http proxy. So, https_request
4200+ will be called, but later it's really http_open that will be called. This
4201+ explains why we don't have to call self.parent.open as the urllib_request did.
4202+ """
4203+
4204+ # Proxies must be in front
4205+ handler_order = 100
4206+ _debuglevel = DEBUG
4207+
4208+ def __init__(self, proxies=None):
4209+ urllib_request.ProxyHandler.__init__(self, proxies)
4210+ # First, let's get rid of urllib_request implementation
4211+ for type, proxy in self.proxies.items():
4212+ if self._debuglevel >= 3:
4213+ print('Will unbind %s_open for %r' % (type, proxy))
4214+ delattr(self, '%s_open' % type)
4215+
4216+ def bind_scheme_request(proxy, scheme):
4217+ if proxy is None:
4218+ return
4219+ scheme_request = scheme + '_request'
4220+ if self._debuglevel >= 3:
4221+ print('Will bind %s for %r' % (scheme_request, proxy))
4222+ setattr(self, scheme_request,
4223+ lambda request: self.set_proxy(request, scheme))
4224+ # We are interested only by the http[s] proxies
4225+ http_proxy = self.get_proxy_env_var('http')
4226+ bind_scheme_request(http_proxy, 'http')
4227+ https_proxy = self.get_proxy_env_var('https')
4228+ bind_scheme_request(https_proxy, 'https')
4229+
4230+ def get_proxy_env_var(self, name, default_to='all'):
4231+ """Get a proxy env var.
4232+
4233+ Note that we indirectly rely on
4234+ urllib.getproxies_environment taking into account the
4235+ uppercased values for proxy variables.
4236+ """
4237+ try:
4238+ return self.proxies[name.lower()]
4239+ except KeyError:
4240+ if default_to is not None:
4241+ # Try to get the alternate environment variable
4242+ try:
4243+ return self.proxies[default_to]
4244+ except KeyError:
4245+ pass
4246+ return None
4247+
4248+ def proxy_bypass(self, host):
4249+ """Check if host should be proxied or not.
4250+
4251+ :returns: True to skip the proxy, False otherwise.
4252+ """
4253+ no_proxy = self.get_proxy_env_var('no', default_to=None)
4254+ bypass = self.evaluate_proxy_bypass(host, no_proxy)
4255+ if bypass is None:
4256+ # Nevertheless, there are platform-specific ways to
4257+ # ignore proxies...
4258+ return urllib_request.proxy_bypass(host)
4259+ else:
4260+ return bypass
4261+
4262+ def evaluate_proxy_bypass(self, host, no_proxy):
4263+ """Check the host against a comma-separated no_proxy list as a string.
4264+
4265+ :param host: ``host:port`` being requested
4266+
4267+ :param no_proxy: comma-separated list of hosts to access directly.
4268+
4269+ :returns: True to skip the proxy, False not to, or None to
4270+ leave it to urllib.
4271+ """
4272+ if no_proxy is None:
4273+ # All hosts are proxied
4274+ return False
4275+ hhost, hport = splitport(host)
4276+ # Does host match any of the domains mentioned in
4277+ # no_proxy ? The rules about what is authorized in no_proxy
4278+ # are fuzzy (to say the least). We try to allow most
4279+ # commonly seen values.
4280+ for domain in no_proxy.split(','):
4281+ domain = domain.strip()
4282+ if domain == '':
4283+ continue
4284+ dhost, dport = splitport(domain)
4285+ if hport == dport or dport is None:
4286+ # Protect glob chars
4287+ dhost = dhost.replace(".", r"\.")
4288+ dhost = dhost.replace("*", r".*")
4289+ dhost = dhost.replace("?", r".")
4290+ if re.match(dhost, hhost, re.IGNORECASE):
4291+ return True
4292+ # Nothing explicitly avoid the host
4293+ return None
4294+
4295+ def set_proxy(self, request, type):
4296+ host = request.host
4297+ if self.proxy_bypass(host):
4298+ return request
4299+
4300+ proxy = self.get_proxy_env_var(type)
4301+ if self._debuglevel >= 3:
4302+ print('set_proxy %s_request for %r' % (type, proxy))
4303+ # FIXME: python 2.5 urlparse provides a better _parse_proxy which can
4304+ # grok user:password@host:port as well as
4305+ # http://user:password@host:port
4306+
4307+ parsed_url = transport.ConnectedTransport._split_url(proxy)
4308+ if not parsed_url.host:
4309+ raise urlutils.InvalidURL(proxy, 'No host component')
4310+
4311+ if request.proxy_auth == {}:
4312+ # No proxy auth parameter are available, we are handling the first
4313+ # proxied request, intialize. scheme (the authentication scheme)
4314+ # and realm will be set by the AuthHandler
4315+ request.proxy_auth = {
4316+ 'host': parsed_url.host,
4317+ 'port': parsed_url.port,
4318+ 'user': parsed_url.user,
4319+ 'password': parsed_url.password,
4320+ 'protocol': parsed_url.scheme,
4321+ # We ignore path since we connect to a proxy
4322+ 'path': None}
4323+ if parsed_url.port is None:
4324+ phost = parsed_url.host
4325+ else:
4326+ phost = parsed_url.host + ':%d' % parsed_url.port
4327+ request.set_proxy(phost, type)
4328+ if self._debuglevel >= 3:
4329+ print('set_proxy: proxy set to %s://%s' % (type, phost))
4330+ return request
4331+
4332+
4333+class AbstractAuthHandler(urllib_request.BaseHandler):
4334+ """A custom abstract authentication handler for all http authentications.
4335+
4336+ Provides the meat to handle authentication errors and
4337+ preventively set authentication headers after the first
4338+ successful authentication.
4339+
4340+ This can be used for http and proxy, as well as for basic, negotiate and
4341+ digest authentications.
4342+
4343+ This provides an unified interface for all authentication handlers
4344+ (urllib_request provides far too many with different policies).
4345+
4346+ The interaction between this handler and the urllib_request
4347+ framework is not obvious, it works as follow:
4348+
4349+ opener.open(request) is called:
4350+
4351+ - that may trigger http_request which will add an authentication header
4352+ (self.build_header) if enough info is available.
4353+
4354+ - the request is sent to the server,
4355+
4356+ - if an authentication error is received self.auth_required is called,
4357+ we acquire the authentication info in the error headers and call
4358+ self.auth_match to check that we are able to try the
4359+ authentication and complete the authentication parameters,
4360+
4361+ - we call parent.open(request), that may trigger http_request
4362+ and will add a header (self.build_header), but here we have
4363+ all the required info (keep in mind that the request and
4364+ authentication used in the recursive calls are really (and must be)
4365+ the *same* objects).
4366+
4367+ - if the call returns a response, the authentication have been
4368+ successful and the request authentication parameters have been updated.
4369+ """
4370+
4371+ scheme = None
4372+ """The scheme as it appears in the server header (lower cased)"""
4373+
4374+ _max_retry = 3
4375+ """We don't want to retry authenticating endlessly"""
4376+
4377+ requires_username = True
4378+ """Whether the auth mechanism requires a username."""
4379+
4380+ # The following attributes should be defined by daughter
4381+ # classes:
4382+ # - auth_required_header: the header received from the server
4383+ # - auth_header: the header sent in the request
4384+
4385+ def __init__(self):
4386+ # We want to know when we enter into an try/fail cycle of
4387+ # authentications so we initialize to None to indicate that we aren't
4388+ # in such a cycle by default.
4389+ self._retry_count = None
4390+
4391+ def _parse_auth_header(self, server_header):
4392+ """Parse the authentication header.
4393+
4394+ :param server_header: The value of the header sent by the server
4395+ describing the authenticaion request.
4396+
4397+ :return: A tuple (scheme, remainder) scheme being the first word in the
4398+ given header (lower cased), remainder may be None.
4399+ """
4400+ try:
4401+ scheme, remainder = server_header.split(None, 1)
4402+ except ValueError:
4403+ scheme = server_header
4404+ remainder = None
4405+ return (scheme.lower(), remainder)
4406+
4407+ def update_auth(self, auth, key, value):
4408+ """Update a value in auth marking the auth as modified if needed"""
4409+ old_value = auth.get(key, None)
4410+ if old_value != value:
4411+ auth[key] = value
4412+ auth['modified'] = True
4413+
4414+ def auth_required(self, request, headers):
4415+ """Retry the request if the auth scheme is ours.
4416+
4417+ :param request: The request needing authentication.
4418+ :param headers: The headers for the authentication error response.
4419+ :return: None or the response for the authenticated request.
4420+ """
4421+ # Don't try to authenticate endlessly
4422+ if self._retry_count is None:
4423+ # The retry being recusrsive calls, None identify the first retry
4424+ self._retry_count = 1
4425+ else:
4426+ self._retry_count += 1
4427+ if self._retry_count > self._max_retry:
4428+ # Let's be ready for next round
4429+ self._retry_count = None
4430+ return None
4431+ server_headers = headers.get_all(self.auth_required_header)
4432+ if not server_headers:
4433+ # The http error MUST have the associated
4434+ # header. This must never happen in production code.
4435+ trace.mutter('%s not found', self.auth_required_header)
4436+ return None
4437+
4438+ auth = self.get_auth(request)
4439+ auth['modified'] = False
4440+ # Put some common info in auth if the caller didn't
4441+ if auth.get('path', None) is None:
4442+ parsed_url = urlutils.URL.from_string(request.get_full_url())
4443+ self.update_auth(auth, 'protocol', parsed_url.scheme)
4444+ self.update_auth(auth, 'host', parsed_url.host)
4445+ self.update_auth(auth, 'port', parsed_url.port)
4446+ self.update_auth(auth, 'path', parsed_url.path)
4447+ # FIXME: the auth handler should be selected at a single place instead
4448+ # of letting all handlers try to match all headers, but the current
4449+ # design doesn't allow a simple implementation.
4450+ for server_header in server_headers:
4451+ # Several schemes can be proposed by the server, try to match each
4452+ # one in turn
4453+ matching_handler = self.auth_match(server_header, auth)
4454+ if matching_handler:
4455+ # auth_match may have modified auth (by adding the
4456+ # password or changing the realm, for example)
4457+ if (request.get_header(self.auth_header, None) is not None
4458+ and not auth['modified']):
4459+ # We already tried that, give up
4460+ return None
4461+
4462+ # Only the most secure scheme proposed by the server should be
4463+ # used, since the handlers use 'handler_order' to describe that
4464+ # property, the first handler tried takes precedence, the
4465+ # others should not attempt to authenticate if the best one
4466+ # failed.
4467+ best_scheme = auth.get('best_scheme', None)
4468+ if best_scheme is None:
4469+ # At that point, if current handler should doesn't succeed
4470+ # the credentials are wrong (or incomplete), but we know
4471+ # that the associated scheme should be used.
4472+ best_scheme = auth['best_scheme'] = self.scheme
4473+ if best_scheme != self.scheme:
4474+ continue
4475+
4476+ if self.requires_username and auth.get('user', None) is None:
4477+ # Without a known user, we can't authenticate
4478+ return None
4479+
4480+ # Housekeeping
4481+ request.connection.cleanup_pipe()
4482+ # Retry the request with an authentication header added
4483+ response = self.parent.open(request)
4484+ if response:
4485+ self.auth_successful(request, response)
4486+ return response
4487+ # We are not qualified to handle the authentication.
4488+ # Note: the authentication error handling will try all
4489+ # available handlers. If one of them authenticates
4490+ # successfully, a response will be returned. If none of
4491+ # them succeeds, None will be returned and the error
4492+ # handler will raise the 401 'Unauthorized' or the 407
4493+ # 'Proxy Authentication Required' error.
4494+ return None
4495+
4496+ def add_auth_header(self, request, header):
4497+ """Add the authentication header to the request"""
4498+ request.add_unredirected_header(self.auth_header, header)
4499+
4500+ def auth_match(self, header, auth):
4501+ """Check that we are able to handle that authentication scheme.
4502+
4503+ The request authentication parameters may need to be
4504+ updated with info from the server. Some of these
4505+ parameters, when combined, are considered to be the
4506+ authentication key, if one of them change the
4507+ authentication result may change. 'user' and 'password'
4508+ are exampls, but some auth schemes may have others
4509+ (digest's nonce is an example, digest's nonce_count is a
4510+ *counter-example*). Such parameters must be updated by
4511+ using the update_auth() method.
4512+
4513+ :param header: The authentication header sent by the server.
4514+ :param auth: The auth parameters already known. They may be
4515+ updated.
4516+ :returns: True if we can try to handle the authentication.
4517+ """
4518+ raise NotImplementedError(self.auth_match)
4519+
4520+ def build_auth_header(self, auth, request):
4521+ """Build the value of the header used to authenticate.
4522+
4523+ :param auth: The auth parameters needed to build the header.
4524+ :param request: The request needing authentication.
4525+
4526+ :return: None or header.
4527+ """
4528+ raise NotImplementedError(self.build_auth_header)
4529+
4530+ def auth_successful(self, request, response):
4531+ """The authentification was successful for the request.
4532+
4533+ Additional infos may be available in the response.
4534+
4535+ :param request: The succesfully authenticated request.
4536+ :param response: The server response (may contain auth info).
4537+ """
4538+ # It may happen that we need to reconnect later, let's be ready
4539+ self._retry_count = None
4540+
4541+ def get_user_password(self, auth):
4542+ """Ask user for a password if none is already available.
4543+
4544+ :param auth: authentication info gathered so far (from the initial url
4545+ and then during dialog with the server).
4546+ """
4547+ auth_conf = config.AuthenticationConfig()
4548+ user = auth.get('user', None)
4549+ password = auth.get('password', None)
4550+ realm = auth['realm']
4551+ port = auth.get('port', None)
4552+
4553+ if user is None:
4554+ user = auth_conf.get_user(auth['protocol'], auth['host'],
4555+ port=port, path=auth['path'],
4556+ realm=realm, ask=True,
4557+ prompt=self.build_username_prompt(auth))
4558+ if user is not None and password is None:
4559+ password = auth_conf.get_password(
4560+ auth['protocol'], auth['host'], user,
4561+ port=port,
4562+ path=auth['path'], realm=realm,
4563+ prompt=self.build_password_prompt(auth))
4564+
4565+ return user, password
4566+
4567+ def _build_password_prompt(self, auth):
4568+ """Build a prompt taking the protocol used into account.
4569+
4570+ The AuthHandler is used by http and https, we want that information in
4571+ the prompt, so we build the prompt from the authentication dict which
4572+ contains all the needed parts.
4573+
4574+ Also, http and proxy AuthHandlers present different prompts to the
4575+ user. The daughter classes should implements a public
4576+ build_password_prompt using this method.
4577+ """
4578+ prompt = u'%s' % auth['protocol'].upper() + u' %(user)s@%(host)s'
4579+ realm = auth['realm']
4580+ if realm is not None:
4581+ prompt += u", Realm: '%s'" % realm
4582+ prompt += u' password'
4583+ return prompt
4584+
4585+ def _build_username_prompt(self, auth):
4586+ """Build a prompt taking the protocol used into account.
4587+
4588+ The AuthHandler is used by http and https, we want that information in
4589+ the prompt, so we build the prompt from the authentication dict which
4590+ contains all the needed parts.
4591+
4592+ Also, http and proxy AuthHandlers present different prompts to the
4593+ user. The daughter classes should implements a public
4594+ build_username_prompt using this method.
4595+ """
4596+ prompt = u'%s' % auth['protocol'].upper() + u' %(host)s'
4597+ realm = auth['realm']
4598+ if realm is not None:
4599+ prompt += u", Realm: '%s'" % realm
4600+ prompt += u' username'
4601+ return prompt
4602+
4603+ def http_request(self, request):
4604+ """Insert an authentication header if information is available"""
4605+ auth = self.get_auth(request)
4606+ if self.auth_params_reusable(auth):
4607+ self.add_auth_header(
4608+ request, self.build_auth_header(auth, request))
4609+ return request
4610+
4611+ https_request = http_request # FIXME: Need test
4612+
4613+
4614+class NegotiateAuthHandler(AbstractAuthHandler):
4615+ """A authentication handler that handles WWW-Authenticate: Negotiate.
4616+
4617+ At the moment this handler supports just Kerberos. In the future,
4618+ NTLM support may also be added.
4619+ """
4620+
4621+ scheme = 'negotiate'
4622+ handler_order = 480
4623+ requires_username = False
4624+
4625+ def auth_match(self, header, auth):
4626+ scheme, raw_auth = self._parse_auth_header(header)
4627+ if scheme != self.scheme:
4628+ return False
4629+ self.update_auth(auth, 'scheme', scheme)
4630+ resp = self._auth_match_kerberos(auth)
4631+ if resp is None:
4632+ return False
4633+ # Optionally should try to authenticate using NTLM here
4634+ self.update_auth(auth, 'negotiate_response', resp)
4635+ return True
4636+
4637+ def _auth_match_kerberos(self, auth):
4638+ """Try to create a GSSAPI response for authenticating against a host."""
4639+ global kerberos, checked_kerberos
4640+ if kerberos is None and not checked_kerberos:
4641+ try:
4642+ import kerberos
4643+ except ImportError:
4644+ kerberos = None
4645+ checked_kerberos = True
4646+ if kerberos is None:
4647+ return None
4648+ ret, vc = kerberos.authGSSClientInit("HTTP@%(host)s" % auth)
4649+ if ret < 1:
4650+ trace.warning('Unable to create GSSAPI context for %s: %d',
4651+ auth['host'], ret)
4652+ return None
4653+ ret = kerberos.authGSSClientStep(vc, "")
4654+ if ret < 0:
4655+ trace.mutter('authGSSClientStep failed: %d', ret)
4656+ return None
4657+ return kerberos.authGSSClientResponse(vc)
4658+
4659+ def build_auth_header(self, auth, request):
4660+ return "Negotiate %s" % auth['negotiate_response']
4661+
4662+ def auth_params_reusable(self, auth):
4663+ # If the auth scheme is known, it means a previous
4664+ # authentication was successful, all information is
4665+ # available, no further checks are needed.
4666+ return (auth.get('scheme', None) == 'negotiate' and
4667+ auth.get('negotiate_response', None) is not None)
4668+
4669+
4670+class BasicAuthHandler(AbstractAuthHandler):
4671+ """A custom basic authentication handler."""
4672+
4673+ scheme = 'basic'
4674+ handler_order = 500
4675+ auth_regexp = re.compile('realm="([^"]*)"', re.I)
4676+
4677+ def build_auth_header(self, auth, request):
4678+ raw = '%s:%s' % (auth['user'], auth['password'])
4679+ auth_header = 'Basic ' + \
4680+ base64.b64encode(raw.encode('utf-8')).decode('ascii')
4681+ return auth_header
4682+
4683+ def extract_realm(self, header_value):
4684+ match = self.auth_regexp.search(header_value)
4685+ realm = None
4686+ if match:
4687+ realm = match.group(1)
4688+ return match, realm
4689+
4690+ def auth_match(self, header, auth):
4691+ scheme, raw_auth = self._parse_auth_header(header)
4692+ if scheme != self.scheme:
4693+ return False
4694+
4695+ match, realm = self.extract_realm(raw_auth)
4696+ if match:
4697+ # Put useful info into auth
4698+ self.update_auth(auth, 'scheme', scheme)
4699+ self.update_auth(auth, 'realm', realm)
4700+ if (auth.get('user', None) is None
4701+ or auth.get('password', None) is None):
4702+ user, password = self.get_user_password(auth)
4703+ self.update_auth(auth, 'user', user)
4704+ self.update_auth(auth, 'password', password)
4705+ return match is not None
4706+
4707+ def auth_params_reusable(self, auth):
4708+ # If the auth scheme is known, it means a previous
4709+ # authentication was successful, all information is
4710+ # available, no further checks are needed.
4711+ return auth.get('scheme', None) == 'basic'
4712+
4713+
4714+def get_digest_algorithm_impls(algorithm):
4715+ H = None
4716+ KD = None
4717+ if algorithm == 'MD5':
4718+ def H(x): return osutils.md5(x).hexdigest()
4719+ elif algorithm == 'SHA':
4720+ H = osutils.sha_string
4721+ if H is not None:
4722+ def KD(secret, data): return H(
4723+ ("%s:%s" % (secret, data)).encode('utf-8'))
4724+ return H, KD
4725+
4726+
4727+def get_new_cnonce(nonce, nonce_count):
4728+ raw = '%s:%d:%s:%s' % (nonce, nonce_count, time.ctime(),
4729+ osutils.rand_chars(8))
4730+ return osutils.sha_string(raw.encode('utf-8'))[:16]
4731+
4732+
4733+class DigestAuthHandler(AbstractAuthHandler):
4734+ """A custom digest authentication handler."""
4735+
4736+ scheme = 'digest'
4737+ # Before basic as digest is a bit more secure and should be preferred
4738+ handler_order = 490
4739+
4740+ def auth_params_reusable(self, auth):
4741+ # If the auth scheme is known, it means a previous
4742+ # authentication was successful, all information is
4743+ # available, no further checks are needed.
4744+ return auth.get('scheme', None) == 'digest'
4745+
4746+ def auth_match(self, header, auth):
4747+ scheme, raw_auth = self._parse_auth_header(header)
4748+ if scheme != self.scheme:
4749+ return False
4750+
4751+ # Put the requested authentication info into a dict
4752+ req_auth = urllib_request.parse_keqv_list(
4753+ urllib_request.parse_http_list(raw_auth))
4754+
4755+ # Check that we can handle that authentication
4756+ qop = req_auth.get('qop', None)
4757+ if qop != 'auth': # No auth-int so far
4758+ return False
4759+
4760+ H, KD = get_digest_algorithm_impls(req_auth.get('algorithm', 'MD5'))
4761+ if H is None:
4762+ return False
4763+
4764+ realm = req_auth.get('realm', None)
4765+ # Put useful info into auth
4766+ self.update_auth(auth, 'scheme', scheme)
4767+ self.update_auth(auth, 'realm', realm)
4768+ if auth.get('user', None) is None or auth.get('password', None) is None:
4769+ user, password = self.get_user_password(auth)
4770+ self.update_auth(auth, 'user', user)
4771+ self.update_auth(auth, 'password', password)
4772+
4773+ try:
4774+ if req_auth.get('algorithm', None) is not None:
4775+ self.update_auth(auth, 'algorithm', req_auth.get('algorithm'))
4776+ nonce = req_auth['nonce']
4777+ if auth.get('nonce', None) != nonce:
4778+ # A new nonce, never used
4779+ self.update_auth(auth, 'nonce_count', 0)
4780+ self.update_auth(auth, 'nonce', nonce)
4781+ self.update_auth(auth, 'qop', qop)
4782+ auth['opaque'] = req_auth.get('opaque', None)
4783+ except KeyError:
4784+ # Some required field is not there
4785+ return False
4786+
4787+ return True
4788+
4789+ def build_auth_header(self, auth, request):
4790+ selector = request.selector
4791+ url_scheme, url_selector = splittype(selector)
4792+ sel_host, uri = splithost(url_selector)
4793+
4794+ A1 = ('%s:%s:%s' %
4795+ (auth['user'], auth['realm'], auth['password'])).encode('utf-8')
4796+ A2 = ('%s:%s' % (request.get_method(), uri)).encode('utf-8')
4797+
4798+ nonce = auth['nonce']
4799+ qop = auth['qop']
4800+
4801+ nonce_count = auth['nonce_count'] + 1
4802+ ncvalue = '%08x' % nonce_count
4803+ cnonce = get_new_cnonce(nonce, nonce_count)
4804+
4805+ H, KD = get_digest_algorithm_impls(auth.get('algorithm', 'MD5'))
4806+ nonce_data = '%s:%s:%s:%s:%s' % (nonce, ncvalue, cnonce, qop, H(A2))
4807+ request_digest = KD(H(A1), nonce_data)
4808+
4809+ header = 'Digest '
4810+ header += 'username="%s", realm="%s", nonce="%s"' % (auth['user'],
4811+ auth['realm'],
4812+ nonce)
4813+ header += ', uri="%s"' % uri
4814+ header += ', cnonce="%s", nc=%s' % (cnonce, ncvalue)
4815+ header += ', qop="%s"' % qop
4816+ header += ', response="%s"' % request_digest
4817+ # Append the optional fields
4818+ opaque = auth.get('opaque', None)
4819+ if opaque:
4820+ header += ', opaque="%s"' % opaque
4821+ if auth.get('algorithm', None):
4822+ header += ', algorithm="%s"' % auth.get('algorithm')
4823+
4824+ # We have used the nonce once more, update the count
4825+ auth['nonce_count'] = nonce_count
4826+
4827+ return header
4828+
4829+
4830+class HTTPAuthHandler(AbstractAuthHandler):
4831+ """Custom http authentication handler.
4832+
4833+ Send the authentication preventively to avoid the roundtrip
4834+ associated with the 401 error and keep the revelant info in
4835+ the auth request attribute.
4836+ """
4837+
4838+ auth_required_header = 'www-authenticate'
4839+ auth_header = 'Authorization'
4840+
4841+ def get_auth(self, request):
4842+ """Get the auth params from the request"""
4843+ return request.auth
4844+
4845+ def set_auth(self, request, auth):
4846+ """Set the auth params for the request"""
4847+ request.auth = auth
4848+
4849+ def build_password_prompt(self, auth):
4850+ return self._build_password_prompt(auth)
4851+
4852+ def build_username_prompt(self, auth):
4853+ return self._build_username_prompt(auth)
4854+
4855+ def http_error_401(self, req, fp, code, msg, headers):
4856+ return self.auth_required(req, headers)
4857+
4858+
4859+class ProxyAuthHandler(AbstractAuthHandler):
4860+ """Custom proxy authentication handler.
4861+
4862+ Send the authentication preventively to avoid the roundtrip
4863+ associated with the 407 error and keep the revelant info in
4864+ the proxy_auth request attribute..
4865+ """
4866+
4867+ auth_required_header = 'proxy-authenticate'
4868+ # FIXME: the correct capitalization is Proxy-Authorization,
4869+ # but python-2.4 urllib_request.Request insist on using capitalize()
4870+ # instead of title().
4871+ auth_header = 'Proxy-authorization'
4872+
4873+ def get_auth(self, request):
4874+ """Get the auth params from the request"""
4875+ return request.proxy_auth
4876+
4877+ def set_auth(self, request, auth):
4878+ """Set the auth params for the request"""
4879+ request.proxy_auth = auth
4880+
4881+ def build_password_prompt(self, auth):
4882+ prompt = self._build_password_prompt(auth)
4883+ prompt = u'Proxy ' + prompt
4884+ return prompt
4885+
4886+ def build_username_prompt(self, auth):
4887+ prompt = self._build_username_prompt(auth)
4888+ prompt = u'Proxy ' + prompt
4889+ return prompt
4890+
4891+ def http_error_407(self, req, fp, code, msg, headers):
4892+ return self.auth_required(req, headers)
4893+
4894+
4895+class HTTPBasicAuthHandler(BasicAuthHandler, HTTPAuthHandler):
4896+ """Custom http basic authentication handler"""
4897+
4898+
4899+class ProxyBasicAuthHandler(BasicAuthHandler, ProxyAuthHandler):
4900+ """Custom proxy basic authentication handler"""
4901+
4902+
4903+class HTTPDigestAuthHandler(DigestAuthHandler, HTTPAuthHandler):
4904+ """Custom http basic authentication handler"""
4905+
4906+
4907+class ProxyDigestAuthHandler(DigestAuthHandler, ProxyAuthHandler):
4908+ """Custom proxy basic authentication handler"""
4909+
4910+
4911+class HTTPNegotiateAuthHandler(NegotiateAuthHandler, HTTPAuthHandler):
4912+ """Custom http negotiate authentication handler"""
4913+
4914+
4915+class ProxyNegotiateAuthHandler(NegotiateAuthHandler, ProxyAuthHandler):
4916+ """Custom proxy negotiate authentication handler"""
4917+
4918+
4919+class HTTPErrorProcessor(urllib_request.HTTPErrorProcessor):
4920+ """Process HTTP error responses.
4921+
4922+ We don't really process the errors, quite the contrary
4923+ instead, we leave our Transport handle them.
4924+ """
4925+
4926+ accepted_errors = [200, # Ok
4927+ 201,
4928+ 202,
4929+ 204,
4930+ 206, # Partial content
4931+ 400,
4932+ 403,
4933+ 404, # Not found
4934+ 405, # Method not allowed
4935+ 406, # Not Acceptable
4936+ 409, # Conflict
4937+ 416, # Range not satisfiable
4938+ 422, # Unprocessible entity
4939+ 501, # Not implemented
4940+ ]
4941+ """The error codes the caller will handle.
4942+
4943+ This can be specialized in the request on a case-by case basis, but the
4944+ common cases are covered here.
4945+ """
4946+
4947+ def http_response(self, request, response):
4948+ code, msg, hdrs = response.code, response.msg, response.info()
4949+
4950+ if code not in self.accepted_errors:
4951+ response = self.parent.error('http', request, response,
4952+ code, msg, hdrs)
4953+ return response
4954+
4955+ https_response = http_response
4956+
4957+
4958+class HTTPDefaultErrorHandler(urllib_request.HTTPDefaultErrorHandler):
4959+ """Translate common errors into Breezy Exceptions"""
4960+
4961+ def http_error_default(self, req, fp, code, msg, hdrs):
4962+ if code == 403:
4963+ raise errors.TransportError(
4964+ 'Server refuses to fulfill the request (403 Forbidden)'
4965+ ' for %s' % req.get_full_url())
4966+ else:
4967+ raise errors.UnexpectedHttpStatus(
4968+ req.get_full_url(), code,
4969+ 'Unable to handle http code: %s' % msg)
4970+
4971+
4972+class Opener(object):
4973+ """A wrapper around urllib_request.build_opener
4974+
4975+ Daughter classes can override to build their own specific opener
4976+ """
4977+ # TODO: Provides hooks for daughter classes.
4978+
4979+ def __init__(self,
4980+ connection=ConnectionHandler,
4981+ redirect=HTTPRedirectHandler,
4982+ error=HTTPErrorProcessor,
4983+ report_activity=None,
4984+ ca_certs=None):
4985+ self._opener = urllib_request.build_opener(
4986+ connection(report_activity=report_activity, ca_certs=ca_certs),
4987+ redirect, error,
4988+ ProxyHandler(),
4989+ HTTPBasicAuthHandler(),
4990+ HTTPDigestAuthHandler(),
4991+ HTTPNegotiateAuthHandler(),
4992+ ProxyBasicAuthHandler(),
4993+ ProxyDigestAuthHandler(),
4994+ ProxyNegotiateAuthHandler(),
4995+ HTTPHandler,
4996+ HTTPSHandler,
4997+ HTTPDefaultErrorHandler,
4998+ )
4999+
5000+ self.open = self._opener.open
The diff has been truncated for viewing.

Subscribers

People subscribed via source and target branches