Merge ~pappacena/launchpad:https-mirror-prober-proxy-fix into launchpad:master

Proposed by Thiago F. Pappacena
Status: Merged
Approved by: Thiago F. Pappacena
Approved revision: 931d52c0bc2e333a611bb9c513fd61b950dba9ad
Merge reported by: Otto Co-Pilot
Merged at revision: not available
Proposed branch: ~pappacena/launchpad:https-mirror-prober-proxy-fix
Merge into: launchpad:master
Diff against target: 269 lines (+130/-41)
4 files modified
lib/lp/registry/scripts/distributionmirror_prober.py (+16/-8)
lib/lp/registry/tests/test_distributionmirror_prober.py (+10/-33)
lib/lp/services/httpproxy/__init__.py (+0/-0)
lib/lp/services/httpproxy/connect_tunneling.py (+104/-0)
Reviewer Review Type Date Requested Status
Colin Watson (community) Approve
Review via email: mp+380650@code.launchpad.net

Commit message

Adding a HTTPS proxy-CONNECT for twisted, and using it for HTTPS mirrors prober.

Description of the change

- Added a new type of HTTP Agent at lp.services.httpproxy.connect_tunneling, so we can reuse it for future implementations that needs to make HTTPS requests through proxy
- Changed the prober to use this new type of agent

To post a comment you must log in.
cc2cf58... by Thiago F. Pappacena

removing unused imports

Revision history for this message
Colin Watson (cjwatson) wrote :

The lack of tests of the actual tunnelling agent is unfortunate, but from what you said yesterday I gather that getting tests of this to work is difficult. I'd suggest adding an XXX comment or filing a bug about this so that the untestedness isn't entirely forgotten.

Other than that, this seems worth a go. Thanks.

review: Approve
Revision history for this message
Thiago F. Pappacena (pappacena) wrote :

The lack of tests is bothering me too, but I would like to see if it's working properly at least on our QA envs before spending time writing tests. I've added a bug for this: https://bugs.launchpad.net/launchpad/+bug/1867348.

I'll make the requested change and land this branch.

931d52c... by Thiago F. Pappacena

Improving coding style

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1diff --git a/lib/lp/registry/scripts/distributionmirror_prober.py b/lib/lp/registry/scripts/distributionmirror_prober.py
2index c633fb8..fd40f8d 100644
3--- a/lib/lp/registry/scripts/distributionmirror_prober.py
4+++ b/lib/lp/registry/scripts/distributionmirror_prober.py
5@@ -13,6 +13,10 @@ import os.path
6 from StringIO import StringIO
7
8 import OpenSSL
9+from OpenSSL.SSL import (
10+ Context,
11+ TLSv1_1_METHOD,
12+ )
13 import requests
14 from six.moves import http_client
15 from six.moves.urllib.parse import (
16@@ -31,13 +35,11 @@ from twisted.internet.defer import (
17 CancelledError,
18 DeferredSemaphore,
19 )
20-from twisted.internet.endpoints import HostnameEndpoint
21 from twisted.internet.ssl import VerificationError
22 from twisted.python.failure import Failure
23 from twisted.web.client import (
24 Agent,
25 BrowserLikePolicyForHTTPS,
26- ProxyAgent,
27 ResponseNeverReceived,
28 )
29 from twisted.web.http import HTTPClient
30@@ -53,6 +55,7 @@ from lp.registry.interfaces.distributionmirror import (
31 )
32 from lp.registry.interfaces.distroseries import IDistroSeries
33 from lp.services.config import config
34+from lp.services.httpproxy.connect_tunneling import TunnelingAgent
35 from lp.services.librarian.interfaces import ILibraryFileAliasSet
36 from lp.services.timeout import urlfetch
37 from lp.services.webapp import canonical_url
38@@ -205,9 +208,8 @@ class HTTPSProbeFailureHandler:
39 if self.isInvalidCertificateError(error):
40 invalid_certificate_hosts.add(
41 (self.factory.request_host, self.factory.request_port))
42- reason = InvalidHTTPSCertificate(
43+ raise InvalidHTTPSCertificate(
44 self.factory.request_host, self.factory.request_port)
45- raise reason
46 if self.isTimeout(error):
47 raise ProberTimeout(self.factory.url, self.factory.timeout)
48 raise error
49@@ -304,6 +306,7 @@ class ProberFactory(protocol.ClientFactory):
50 self.timeoutCall = None
51 self.setURL(url.encode('ascii'))
52 self.logger = logging.getLogger('distributionmirror-prober')
53+ self._https_client = None
54
55 @property
56 def is_https(self):
57@@ -339,15 +342,20 @@ class ProberFactory(protocol.ClientFactory):
58 return self._deferred
59
60 def getHttpsClient(self):
61+ if self._https_client is not None:
62+ return self._https_client
63 # Should we use a proxy?
64 if not config.launchpad.http_proxy:
65 agent = Agent(
66 reactor=reactor, contextFactory=self.https_agent_policy())
67 else:
68- endpoint = HostnameEndpoint(
69- reactor, self.connect_host, self.connect_port)
70- agent = ProxyAgent(endpoint)
71- return TreqHTTPClient(agent)
72+ contextFactory = self.https_agent_policy()
73+ contextFactory.getContext = lambda: Context(TLSv1_1_METHOD)
74+ agent = TunnelingAgent(
75+ reactor, (self.connect_host, self.connect_port, None),
76+ contextFactory=contextFactory)
77+ self._https_client = TreqHTTPClient(agent)
78+ return self._https_client
79
80 def connect(self):
81 """Starts the connection and sets the self._deferred to the proper
82diff --git a/lib/lp/registry/tests/test_distributionmirror_prober.py b/lib/lp/registry/tests/test_distributionmirror_prober.py
83index c3e363c..46a8cae 100644
84--- a/lib/lp/registry/tests/test_distributionmirror_prober.py
85+++ b/lib/lp/registry/tests/test_distributionmirror_prober.py
86@@ -33,10 +33,7 @@ from twisted.internet import (
87 )
88 from twisted.python.failure import Failure
89 from twisted.web import server
90-from twisted.web.client import (
91- BrowserLikePolicyForHTTPS,
92- ProxyAgent,
93- )
94+from twisted.web.client import BrowserLikePolicyForHTTPS
95 from zope.component import getUtility
96 from zope.security.proxy import removeSecurityProxy
97
98@@ -77,6 +74,7 @@ from lp.registry.tests.distributionmirror_http_server import (
99 )
100 from lp.services.config import config
101 from lp.services.daemons.tachandler import TacTestSetup
102+from lp.services.httpproxy.connect_tunneling import TunnelingAgent
103 from lp.services.timeout import default_timeout
104 from lp.testing import (
105 clean_up_reactor,
106@@ -219,40 +217,19 @@ class TestProberHTTPSProtocolAndFactory(TestCase):
107 return deferred.addCallback(got_result)
108
109 def test_https_prober_uses_proxy(self):
110- root = DistributionMirrorTestSecureHTTPServer()
111- site = server.Site(root)
112- proxy_listen_port = reactor.listenTCP(0, site)
113- proxy_port = proxy_listen_port.getHost().port
114+ proxy_port = 6654
115 self.pushConfig(
116- 'launchpad', http_proxy='http://localhost:%s/valid-mirror/file'
117- % proxy_port)
118+ 'launchpad', http_proxy='http://localhost:%s'% proxy_port)
119
120 url = 'https://localhost:%s/valid-mirror/file' % self.port
121- prober = RedirectAwareProberFactory(url)
122+ prober = RedirectAwareProberFactory(url, timeout=0.5)
123 self.assertEqual(prober.url, url)
124- deferred = prober.probe()
125-
126- def got_result(result):
127- # We basically don't care about the result here. We just want to
128- # check that it did the request to the correct URI,
129- # and ProxyAgent was used pointing to the correct proxy.
130- agent = prober.getHttpsClient()._agent
131- self.assertIsInstance(agent, ProxyAgent)
132- self.assertEqual('localhost', agent._proxyEndpoint._hostText)
133- self.assertEqual(proxy_port, agent._proxyEndpoint._port)
134-
135- self.assertEqual(
136- 'https://localhost:%s/valid-mirror/file' % self.port,
137- result.value.response.request.absoluteURI)
138-
139- def cleanup(*args, **kwargs):
140- proxy_listen_port.stopListening()
141
142- # Doing the proxy checks on the error callback because the
143- # proxy is dummy and always returns 404.
144- deferred.addErrback(got_result)
145- deferred.addBoth(cleanup)
146- return deferred
147+ # We just want to check that it did the request using the correct
148+ # Agent, pointing to the correct proxy config.
149+ agent = prober.getHttpsClient()._agent
150+ self.assertIsInstance(agent, TunnelingAgent)
151+ self.assertEqual(('localhost', proxy_port, None), agent._proxyConf)
152
153 def test_https_fails_on_invalid_certificates(self):
154 """Changes set back the default browser-like policy for HTTPS
155diff --git a/lib/lp/services/httpproxy/__init__.py b/lib/lp/services/httpproxy/__init__.py
156new file mode 100644
157index 0000000..e69de29
158--- /dev/null
159+++ b/lib/lp/services/httpproxy/__init__.py
160diff --git a/lib/lp/services/httpproxy/connect_tunneling.py b/lib/lp/services/httpproxy/connect_tunneling.py
161new file mode 100644
162index 0000000..96d49e6
163--- /dev/null
164+++ b/lib/lp/services/httpproxy/connect_tunneling.py
165@@ -0,0 +1,104 @@
166+# Copyright 2009-2020 Canonical Ltd. This software is licensed under the
167+# GNU Affero General Public License version 3 (see the file LICENSE).
168+
169+"""CONNECT proxy to help on HTTPS connections when using twisted.
170+
171+See https://twistedmatrix.com/trac/ticket/8806 (and reference
172+implementation at https://github.com/scrapy/scrapy/pull/397/files)."""
173+
174+__metaclass__ = type
175+
176+__all__ = [
177+ 'TunnelingAgent',
178+ ]
179+
180+import re
181+
182+from twisted.internet import defer
183+from twisted.internet.endpoints import TCP4ClientEndpoint
184+from twisted.web.client import Agent
185+
186+
187+class TunnelError(Exception):
188+ """An HTTP CONNECT tunnel could not be established by the proxy."""
189+
190+
191+class TunnelingTCP4ClientEndpoint(TCP4ClientEndpoint):
192+ """An endpoint that tunnels through proxies to allow HTTPS requests.
193+
194+ To accomplish that, this endpoint sends an HTTP CONNECT to the proxy.
195+ """
196+
197+ _responseMatcher = re.compile('HTTP/1\.. 200')
198+
199+ def __init__(self, reactor, host, port, proxyConf, contextFactory,
200+ timeout=30, bindAddress=None):
201+ proxyHost, proxyPort, self._proxyAuthHeader = proxyConf
202+ super(TunnelingTCP4ClientEndpoint, self).__init__(reactor, proxyHost,
203+ proxyPort, timeout, bindAddress)
204+ self._tunneledHost = host
205+ self._tunneledPort = port
206+ self._contextFactory = contextFactory
207+ self._tunnelReadyDeferred = defer.Deferred()
208+ self._connectDeferred = None
209+ self._protocol = None
210+
211+ def requestTunnel(self, protocol):
212+ """Asks the proxy to open a tunnel."""
213+ tunnelReq = 'CONNECT %s:%s HTTP/1.1\n' % (self._tunneledHost,
214+ self._tunneledPort)
215+ if self._proxyAuthHeader:
216+ tunnelReq += 'Proxy-Authorization: %s\n' % self._proxyAuthHeader
217+ tunnelReq += '\n'
218+ protocol.transport.write(tunnelReq)
219+ self._protocolDataReceived = protocol.dataReceived
220+ protocol.dataReceived = self.processProxyResponse
221+ self._protocol = protocol
222+ return protocol
223+
224+ def processProxyResponse(self, bytes):
225+ """Processes the response from the proxy. If the tunnel is successfully
226+ created, notifies the client that we are ready to send requests. If not
227+ raises a TunnelError.
228+ """
229+ self._protocol.dataReceived = self._protocolDataReceived
230+ if TunnelingTCP4ClientEndpoint._responseMatcher.match(bytes):
231+ self._protocol.transport.startTLS(
232+ self._contextFactory, self._protocolFactory)
233+ self._tunnelReadyDeferred.callback(self._protocol)
234+ else:
235+ self._tunnelReadyDeferred.errback(
236+ TunnelError('Could not open CONNECT tunnel.'))
237+
238+ def connectFailed(self, reason):
239+ """Propagates the errback to the appropriate deferred."""
240+ self._tunnelReadyDeferred.errback(reason)
241+
242+ def connect(self, protocolFactory):
243+ self._protocolFactory = protocolFactory
244+ self._connectDeferred = super(
245+ TunnelingTCP4ClientEndpoint, self).connect(protocolFactory)
246+ self._connectDeferred.addCallback(self.requestTunnel)
247+ self._connectDeferred.addErrback(self.connectFailed)
248+ return self._tunnelReadyDeferred
249+
250+
251+class TunnelingAgent(Agent):
252+ """An agent that uses a L{TunnelingTCP4ClientEndpoint} to make HTTPS
253+ requests.
254+ """
255+
256+ def __init__(self, reactor, proxyConf, contextFactory=None,
257+ connectTimeout=None, bindAddress=None, pool=None):
258+ super(TunnelingAgent, self).__init__(reactor, contextFactory,
259+ connectTimeout, bindAddress, pool)
260+ self._contextFactory = contextFactory
261+ self._connectTimeout = connectTimeout
262+ self._bindAddress = bindAddress
263+ self._proxyConf = proxyConf
264+
265+ def _getEndpoint(self, url):
266+ return TunnelingTCP4ClientEndpoint(
267+ self._reactor, url.host, url.port,
268+ self._proxyConf, self._contextFactory, self._connectTimeout,
269+ self._bindAddress)