Merge lp:~cjwatson/launchpad/geoip-ip-address-handling into lp:launchpad

Proposed by Colin Watson
Status: Merged
Merged at revision: 19025
Proposed branch: lp:~cjwatson/launchpad/geoip-ip-address-handling
Merge into: lp:launchpad
Diff against target: 507 lines (+88/-189)
8 files modified
lib/lp/answers/browser/tests/views.txt (+6/-6)
lib/lp/answers/stories/question-search-multiple-languages.txt (+17/-4)
lib/lp/services/geoip/configure.zcml (+1/-6)
lib/lp/services/geoip/doc/geoip.txt (+26/-70)
lib/lp/services/geoip/helpers.py (+25/-18)
lib/lp/services/geoip/interfaces.py (+3/-19)
lib/lp/services/geoip/model.py (+9/-66)
setup.py (+1/-0)
To merge this branch: bzr merge lp:~cjwatson/launchpad/geoip-ip-address-handling
Reviewer Review Type Date Requested Status
William Grant code Approve
Review via email: mp+369729@code.launchpad.net

Commit message

Revamp GeoIP IP address handling.

Description of the change

This removes the weird special case mapping private addresses to South Africa, and switches to the ipaddress module rather than ad-hoc private IP address detection, theoretically allowing us to handle IPv6 addresses too (although real IPv6 support requires switching to GeoIP2 data).

The ipaddress module was already in our virtualenv as an indirect dependency.

To post a comment you must log in.
Revision history for this message
William Grant (wgrant) :
review: Approve (code)

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'lib/lp/answers/browser/tests/views.txt'
2--- lib/lp/answers/browser/tests/views.txt 2018-12-07 13:24:51 +0000
3+++ lib/lp/answers/browser/tests/views.txt 2019-07-04 18:55:19 +0000
4@@ -573,14 +573,14 @@
5 will contain languages from the HTTP request, or the most likely
6 interesting languages based on GeoIP information.
7
8-For example, if the user doesn't log in and their browser is configured to
9-accept Brazilian Portuguese, the vocabulary will contain the languages
10-spoken in South Africa (because the 127.0.0.1 IP address is mapped to
11-South Africa in the tests).
12+For example, if the user doesn't log in, their browser is configured to
13+accept Brazilian Portuguese, and their request appears to come from a South
14+African IP address, the vocabulary will contain the languages spoken in
15+South Africa.
16
17 >>> login(ANONYMOUS)
18 >>> request = LaunchpadTestRequest(
19- ... HTTP_ACCEPT_LANGUAGE='pt_BR')
20+ ... HTTP_ACCEPT_LANGUAGE='pt_BR', REMOTE_ADDR='196.36.161.227')
21 >>> from lp.answers.browser.question import (
22 ... QuestionLanguageVocabularyFactory)
23 >>> view = getMultiAdapter((firefox, request), name='+addticket')
24@@ -676,7 +676,7 @@
25 database.
26
27 >>> request = LaunchpadTestRequest(
28- ... HTTP_ACCEPT_LANGUAGE='fr, en_CA')
29+ ... HTTP_ACCEPT_LANGUAGE='fr, en_CA', REMOTE_ADDR='196.36.161.227')
30
31 >>> login(ANONYMOUS)
32 >>> view = UserSupportLanguagesView(None, request)
33
34=== modified file 'lib/lp/answers/stories/question-search-multiple-languages.txt'
35--- lib/lp/answers/stories/question-search-multiple-languages.txt 2019-05-22 14:57:45 +0000
36+++ lib/lp/answers/stories/question-search-multiple-languages.txt 2019-07-04 18:55:19 +0000
37@@ -3,6 +3,12 @@
38 By default, only questions written in English or one of the user
39 preferred languages are listed and searched.
40
41+Make the test browsers look like they're coming from an arbitrary South
42+African IP address, since we'll use that later.
43+
44+ >>> anon_browser.addHeader('X_FORWARDED_FOR', '196.36.161.227')
45+ >>> user_browser.addHeader('X_FORWARDED_FOR', '196.36.161.227')
46+
47
48 == Anonymous searching ==
49
50@@ -29,10 +35,9 @@
51 ... print(question.find('a').renderContents())
52 Installation failed
53
54-The questions match the languages inferred from by GeoIP
55-127.0.0.1 is mapped to South Africa in the test suite. The language
56-control shows the intersection of the user's languages and the
57-languages of the target's questions. In this example:
58+The questions match the languages inferred from GeoIP (South Africa in
59+this case). The language control shows the intersection of the user's
60+languages and the languages of the target's questions. In this example:
61 set(['af', 'en', 'st', 'xh', 'zu']) & set(['en', 'es']) == set(en).
62
63 >>> sorted(anon_browser.getControl(name='field.language').options)
64@@ -113,6 +118,10 @@
65 anonymous user makes a request from a GeoIP that has no languages
66 mapped, we assume they speak the default language of English.
67
68+ >>> for pos, (key, _) in enumerate(anon_browser.mech_browser.addheaders):
69+ ... if key == 'X_FORWARDED_FOR':
70+ ... del anon_browser.mech_browser.addheaders[pos]
71+ ... break
72 >>> anon_browser.addHeader('X_FORWARDED_FOR', '172.16.1.1')
73 >>> anon_browser.open(
74 ... 'http://launchpad.test/ubuntu/+source/mozilla-firefox/+questions')
75@@ -166,6 +175,10 @@
76 When the project languages are just English, and the user speaks
77 that language, we do not show the language controls.
78
79+ >>> for pos, (key, _) in enumerate(user_browser.mech_browser.addheaders):
80+ ... if key == 'X_FORWARDED_FOR':
81+ ... del user_browser.mech_browser.addheaders[pos]
82+ ... break
83 >>> user_browser.addHeader('X_FORWARDED_FOR', '172.16.1.1')
84 >>> user_browser.open(
85 ... 'http://launchpad.test/ubuntu/+source/mozilla-firefox/+questions')
86
87=== modified file 'lib/lp/services/geoip/configure.zcml'
88--- lib/lp/services/geoip/configure.zcml 2010-10-25 05:33:20 +0000
89+++ lib/lp/services/geoip/configure.zcml 2019-07-04 18:55:19 +0000
90@@ -1,4 +1,4 @@
91-<!-- Copyright 2009-2010 Canonical Ltd. This software is licensed under the
92+<!-- Copyright 2009-2019 Canonical Ltd. This software is licensed under the
93 GNU Affero General Public License version 3 (see the file LICENSE).
94 -->
95
96@@ -34,9 +34,4 @@
97 factory="lp.services.geoip.helpers.request_country"
98 provides="lp.services.worlddata.interfaces.country.ICountry" />
99
100- <adapter
101- for="zope.publisher.interfaces.browser.IBrowserRequest"
102- factory="lp.services.geoip.model.GeoIPRequest"
103- provides="lp.services.geoip.interfaces.IGeoIPRecord" />
104-
105 </configure>
106
107=== modified file 'lib/lp/services/geoip/doc/geoip.txt'
108--- lib/lp/services/geoip/doc/geoip.txt 2016-01-26 15:47:37 +0000
109+++ lib/lp/services/geoip/doc/geoip.txt 2019-07-04 18:55:19 +0000
110@@ -15,26 +15,30 @@
111 u'Brazil'
112
113 When running tests the IP address will start with '127.', and GeoIP
114-would, obviously, fail to find the country for, so we use a South
115-African IP address in that case.
116+would, obviously, fail to find the country for that, so we return None.
117
118- >>> geoip.getCountryByAddr('127.0.0.88').name
119- u'South Africa'
120+ >>> print(geoip.getCountryByAddr('127.0.0.88'))
121+ None
122
123 We do the same trick for any IP addresses on private networks.
124
125- >>> geoip.getCountryByAddr('10.0.0.88').name
126- u'South Africa'
127-
128- >>> geoip.getCountryByAddr('192.168.0.7').name
129- u'South Africa'
130-
131- >>> geoip.getCountryByAddr('172.16.0.1').name
132- u'South Africa'
133-
134-IGeoIP also provides a getRecordByAddress() method, which returns a
135-GeoIPRecord object, containing a bunch more information about that IP's
136-location.
137+ >>> print(geoip.getCountryByAddr('10.0.0.88'))
138+ None
139+
140+ >>> print(geoip.getCountryByAddr('192.168.0.7'))
141+ None
142+
143+ >>> print(geoip.getCountryByAddr('172.16.0.1'))
144+ None
145+
146+ >>> print(geoip.getCountryByAddr('::1'))
147+ None
148+
149+ >>> print(geoip.getCountryByAddr('fc00::1'))
150+ None
151+
152+IGeoIP also provides a getRecordByAddress() method, which returns a dict
153+containing a bunch more information about that IP's location.
154
155 >>> record = geoip.getRecordByAddress('201.13.165.145')
156 >>> for key, value in sorted(record.items()):
157@@ -52,61 +56,13 @@
158 region_name: ...
159 time_zone: ...
160
161-And again we'll use a South African IP if the address starts with '127.'.
162-
163- >>> record = geoip.getRecordByAddress('127.0.0.1')
164- >>> for key, value in sorted(record.items()):
165- ... print "%s: %s" % (key, value)
166- area_code: ...
167- city: ...
168- country_code: ZA
169- country_code3: ZAF
170- country_name: South Africa
171- dma_code: ...
172- latitude: ...
173- longitude: ...
174- postal_code: ...
175- region: ...
176- region_name: ...
177- time_zone: ...
178-
179-If it can't find a GeoIPRecord for the given IP address, it will return
180+And again we'll return None if the address is private.
181+
182+ >>> print(geoip.getRecordByAddress('127.0.0.1'))
183+ None
184+
185+If it can't find a GeoIP record for the given IP address, it will return
186 None.
187
188 >>> print geoip.getRecordByAddress('255.255.255.255')
189 None
190-
191-We also have GeoIPRequest, which adapts an IBrowserRequest into an
192-IGeoIPRecord, providing the latitude, longitude and time zone of the
193-request's originating IP address.
194-
195- >>> from lp.services.webapp.servers import LaunchpadTestRequest
196- >>> from lp.services.geoip.interfaces import IGeoIPRecord
197- >>> request = LaunchpadTestRequest()
198-
199-Since our request won't have an originating IP address, we'll use that
200-same South African IP address.
201-
202- >>> from lp.services.geoip.helpers import ipaddress_from_request
203- >>> print ipaddress_from_request(request)
204- None
205- >>> geoip_request = IGeoIPRecord(request)
206- >>> geoip_request.latitude
207- -33...
208- >>> geoip_request.longitude
209- 18...
210- >>> geoip_request.time_zone
211- 'Africa/Johannesburg'
212-
213-If the request had an originating IP address, though, it'd be used when
214-we adapted it into an IGeoIPRecord.
215-
216- >>> request = LaunchpadTestRequest(
217- ... environ={'REMOTE_ADDR': '201.13.165.145'})
218- >>> geoip_request = IGeoIPRecord(request)
219- >>> geoip_request.latitude
220- -23...
221- >>> geoip_request.longitude
222- -45...
223- >>> geoip_request.time_zone in ('Brazil/Acre', 'America/Sao_Paulo')
224- True
225
226=== modified file 'lib/lp/services/geoip/helpers.py'
227--- lib/lp/services/geoip/helpers.py 2012-01-06 11:08:30 +0000
228+++ lib/lp/services/geoip/helpers.py 2019-07-04 18:55:19 +0000
229@@ -1,10 +1,10 @@
230-# Copyright 2009 Canonical Ltd. This software is licensed under the
231+# Copyright 2009-2019 Canonical Ltd. This software is licensed under the
232 # GNU Affero General Public License version 3 (see the file LICENSE).
233
234 __metaclass__ = type
235
236-import re
237-
238+import ipaddress
239+import six
240 from zope.component import getUtility
241
242 from lp.services.geoip.interfaces import IGeoIP
243@@ -12,6 +12,7 @@
244
245 __all__ = [
246 'request_country',
247+ 'ipaddress_is_global',
248 'ipaddress_from_request',
249 ]
250
251@@ -25,39 +26,48 @@
252 This information is not reliable and trivially spoofable - use it only
253 for selecting sane defaults.
254 """
255- ipaddress = ipaddress_from_request(request)
256- if ipaddress is not None:
257- return getUtility(IGeoIP).getCountryByAddr(ipaddress)
258+ ip_address = ipaddress_from_request(request)
259+ if ip_address is not None:
260+ return getUtility(IGeoIP).getCountryByAddr(ip_address)
261 return None
262
263
264-_ipaddr_re = re.compile('\d\d?\d?\.\d\d?\d?\.\d\d?\d?\.\d\d?\d?')
265+def ipaddress_is_global(addr):
266+ """Return True iff the IP address is on a global public network."""
267+ try:
268+ return ipaddress.ip_address(six.ensure_text(addr)).is_global
269+ except ValueError:
270+ return False
271
272
273 def ipaddress_from_request(request):
274 """Determine the IP address for this request.
275
276- Returns None if the IP address cannot be determined or is localhost.
277+ Returns None if the IP address cannot be determined or is not on a
278+ global public network.
279
280 The remote IP address is determined by the X-Forwarded-For: header,
281 or failing that, the REMOTE_ADDR CGI environment variable.
282
283 Because this information is unreliable and trivially spoofable, we
284- don't bother to do much error checking to ensure the IP address is at all
285- valid.
286+ don't bother to do much error checking to ensure the IP address is at
287+ all valid, beyond what the ipaddress module gives us.
288
289- >>> google = '66.102.7.104'
290 >>> ipaddress_from_request({'REMOTE_ADDR': '1.1.1.1'})
291 '1.1.1.1'
292 >>> ipaddress_from_request({
293- ... 'HTTP_X_FORWARDED_FOR': '666.666.666.666',
294+ ... 'HTTP_X_FORWARDED_FOR': '66.66.66.66',
295 ... 'REMOTE_ADDR': '1.1.1.1'
296 ... })
297- '666.666.666.666'
298+ '66.66.66.66'
299 >>> ipaddress_from_request({'HTTP_X_FORWARDED_FOR':
300 ... 'localhost, 127.0.0.1, 255.255.255.255,1.1.1.1'
301 ... })
302- '255.255.255.255'
303+ '1.1.1.1'
304+ >>> ipaddress_from_request({
305+ ... 'HTTP_X_FORWARDED_FOR': 'nonsense',
306+ ... 'REMOTE_ADDR': '1.1.1.1'
307+ ... })
308 """
309 ipaddresses = request.get('HTTP_X_FORWARDED_FOR')
310
311@@ -70,10 +80,7 @@
312 # We actually get a comma separated list of addresses. We need to throw
313 # away the obvious duds, such as loopback addresses
314 ipaddresses = [addr.strip() for addr in ipaddresses.split(',')]
315- ipaddresses = [
316- addr for addr in ipaddresses
317- if not (addr.startswith('127.')
318- or _ipaddr_re.search(addr) is None)]
319+ ipaddresses = [addr for addr in ipaddresses if ipaddress_is_global(addr)]
320
321 if ipaddresses:
322 # If we have more than one, have a guess.
323
324=== modified file 'lib/lp/services/geoip/interfaces.py'
325--- lib/lp/services/geoip/interfaces.py 2013-01-07 02:40:55 +0000
326+++ lib/lp/services/geoip/interfaces.py 2019-07-04 18:55:19 +0000
327@@ -1,15 +1,11 @@
328-# Copyright 2009 Canonical Ltd. This software is licensed under the
329+# Copyright 2009-2019 Canonical Ltd. This software is licensed under the
330 # GNU Affero General Public License version 3 (see the file LICENSE).
331
332-from zope.interface import (
333- Attribute,
334- Interface,
335- )
336+from zope.interface import Interface
337
338
339 __all__ = [
340 'IGeoIP',
341- 'IGeoIPRecord',
342 'IRequestLocalLanguages',
343 'IRequestPreferredLanguages',
344 ]
345@@ -19,7 +15,7 @@
346 """The GeoIP utility, which represents the GeoIP database."""
347
348 def getRecordByAddress(ip_address):
349- """Return the IGeoIPRecord for the given IP address, or None."""
350+ """Return the GeoIP record for the given IP address, or None."""
351
352 def getCountryByAddr(ip_address):
353 """Find and return an ICountry based on the given IP address.
354@@ -29,18 +25,6 @@
355 """
356
357
358-class IGeoIPRecord(Interface):
359- """A single record in the GeoIP database.
360-
361- A GeoIP record gathers together all of the relevant information for a
362- single IP address or machine name in the DNS.
363- """
364-
365- latitude = Attribute("The geographic latitude, in real degrees.")
366- latitude = Attribute("The geographic longitude, in real degrees.")
367- time_zone = Attribute("The time zone.")
368-
369-
370 class IRequestLocalLanguages(Interface):
371
372 def getLocalLanguages():
373
374=== modified file 'lib/lp/services/geoip/model.py'
375--- lib/lp/services/geoip/model.py 2015-07-08 16:05:11 +0000
376+++ lib/lp/services/geoip/model.py 2019-07-04 18:55:19 +0000
377@@ -1,9 +1,8 @@
378-# Copyright 2009 Canonical Ltd. This software is licensed under the
379+# Copyright 2009-2019 Canonical Ltd. This software is licensed under the
380 # GNU Affero General Public License version 3 (see the file LICENSE).
381
382 __all__ = [
383 'GeoIP',
384- 'GeoIPRequest',
385 'RequestLocalLanguages',
386 'RequestPreferredLanguages',
387 ]
388@@ -16,10 +15,12 @@
389 from zope.interface import implementer
390
391 from lp.services.config import config
392-from lp.services.geoip.helpers import ipaddress_from_request
393+from lp.services.geoip.helpers import (
394+ ipaddress_from_request,
395+ ipaddress_is_global,
396+ )
397 from lp.services.geoip.interfaces import (
398 IGeoIP,
399- IGeoIPRecord,
400 IRequestLocalLanguages,
401 IRequestPreferredLanguages,
402 )
403@@ -42,7 +43,8 @@
404
405 def getRecordByAddress(self, ip_address):
406 """See `IGeoIP`."""
407- ip_address = ensure_address_is_not_private(ip_address)
408+ if not ipaddress_is_global(ip_address):
409+ return None
410 try:
411 return self._gi.record_by_addr(ip_address)
412 except SystemError:
413@@ -53,7 +55,8 @@
414
415 def getCountryByAddr(self, ip_address):
416 """See `IGeoIP`."""
417- ip_address = ensure_address_is_not_private(ip_address)
418+ if not ipaddress_is_global(ip_address):
419+ return None
420 geoip_record = self.getRecordByAddress(ip_address)
421 if geoip_record is None:
422 return None
423@@ -68,44 +71,6 @@
424 return country
425
426
427-@implementer(IGeoIPRecord)
428-class GeoIPRequest:
429- """An adapter for a BrowserRequest into an IGeoIPRecord."""
430-
431- def __init__(self, request):
432- self.request = request
433- ip_address = ipaddress_from_request(self.request)
434- if ip_address is None:
435- # This happens during page testing, when the REMOTE_ADDR is not
436- # set by Zope.
437- ip_address = '127.0.0.1'
438- ip_address = ensure_address_is_not_private(ip_address)
439- self.ip_address = ip_address
440- self.geoip_record = getUtility(IGeoIP).getRecordByAddress(
441- self.ip_address)
442-
443- @property
444- def latitude(self):
445- """See `IGeoIPRecord`."""
446- if self.geoip_record is None:
447- return None
448- return self.geoip_record['latitude']
449-
450- @property
451- def longitude(self):
452- """See `IGeoIPRecord`."""
453- if self.geoip_record is None:
454- return None
455- return self.geoip_record['longitude']
456-
457- @property
458- def time_zone(self):
459- """See `IGeoIPRecord`."""
460- if self.geoip_record is None:
461- return None
462- return self.geoip_record['time_zone']
463-
464-
465 @implementer(IRequestLocalLanguages)
466 class RequestLocalLanguages(object):
467
468@@ -168,27 +133,5 @@
469 return sorted(languages, key=lambda x: x.englishname)
470
471
472-def ensure_address_is_not_private(ip_address):
473- """Return the given IP address if it doesn't start with '127.'.
474-
475- If it does start with '127.' then we return a South African IP address.
476- Notice that we have no specific reason for using a South African IP
477- address here -- we could have used any other non-private IP address.
478- """
479- private_prefixes = (
480- '127.',
481- '192.168.',
482- '172.16.',
483- '10.',
484- )
485-
486- for prefix in private_prefixes:
487- if ip_address.startswith(prefix):
488- # This is an arbitrary South African IP which was handy at the
489- # time of writing; it's not special in any way.
490- return '196.36.161.227'
491- return ip_address
492-
493-
494 class NoGeoIPDatabaseFound(Exception):
495 """No GeoIP database was found."""
496
497=== modified file 'setup.py'
498--- setup.py 2019-06-18 16:52:56 +0000
499+++ setup.py 2019-07-04 18:55:19 +0000
500@@ -164,6 +164,7 @@
501 'gunicorn[gthread]',
502 'html5browser',
503 'importlib-resources',
504+ 'ipaddress',
505 'ipython',
506 'jsautobuild',
507 'launchpad-buildd',