Merge lp:~stefanor/ibid/url-idn-specialchar-502380 into lp:~ibid-core/ibid/old-trunk-1.6

Proposed by Stefano Rivera on 2010-01-02
Status: Merged
Approved by: Michael Gorven on 2010-01-05
Approved revision: 830
Merged at revision: 828
Proposed branch: lp:~stefanor/ibid/url-idn-specialchar-502380
Merge into: lp:~ibid-core/ibid/old-trunk-1.6
Diff against target: 118 lines (+23/-12)
4 files modified
ibid/plugins/url.py (+9/-9)
ibid/test/plugins/test_url.py (+1/-0)
ibid/utils/__init__.py (+11/-2)
ibid/utils/html.py (+2/-1)
To merge this branch: bzr merge lp:~stefanor/ibid/url-idn-specialchar-502380
Reviewer Review Type Date Requested Status
Michael Gorven Approve on 2010-01-05
Jonathan Hitchcock 2010-01-02 Approve on 2010-01-04
Review via email: mp+16740@code.launchpad.net
To post a comment you must log in.
Stefano Rivera (stefanor) wrote :

This will be useful for full IDN support in the isitup merge

830. By Stefano Rivera on 2010-01-04

Spelling in comments

Jonathan Hitchcock (vhata) wrote :

Looks solid.

review: Approve
Michael Gorven (mgorven) wrote :

 review approve
 status approved

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'ibid/plugins/url.py'
2--- ibid/plugins/url.py 2010-01-02 08:06:31 +0000
3+++ ibid/plugins/url.py 2010-01-04 21:42:14 +0000
4@@ -76,16 +76,16 @@
5 tags = u' '.join((event.sender['nick'], obfusc_conn, obfusc_chan, event.source))
6
7 data = {
8- 'url' : url,
9- 'description' : title,
10- 'tags' : tags,
11- 'replace' : u'yes',
12+ 'url' : url.encode('utf-8'),
13+ 'description' : title.encode('utf-8'),
14+ 'tags' : tags.encode('utf-8'),
15+ 'replace' : 'yes',
16 'dt' : date.strftime('%Y-%m-%dT%H:%M:%SZ'),
17- 'extended' : event.message['raw'],
18+ 'extended' : event.message['raw'].encode('utf-8'),
19 }
20
21 self._set_auth(username, password)
22- posturl = 'https://api.del.icio.us/v1/posts/add?' + urlencode(data, 'utf-8')
23+ posturl = 'https://api.del.icio.us/v1/posts/add?' + urlencode(data)
24
25 try:
26 resp = urlopen(posturl).read()
27@@ -138,10 +138,10 @@
28 tlds = 'com.org.net.za'.split('.')
29
30 self.grab.im_func.pattern = re.compile((
31- r'(?:[^@.]\b(?!\.)|\A)(' # Match a boundry, but not on an e-mail address
32+ r'(?:[^@./]\b(?!\.)|\A)(' # Match a boundary, but not on an e-mail address
33 r'(?:\w+://|(?:www|ftp)\.)\S+?' # Match an explicit URL or guess by www.
34- r'|[^@\s:]+\.(?:%s)(?:/\S*?)?' # Guess at the URL based on TLD
35- r')[\[>)\]"\'.,;:]*(?:\s|\Z)' # End Boundry
36+ r'|[^@\s:/]+\.(?:%s)(?:/\S*?)?' # Guess at the URL based on TLD
37+ r')[\[>)\]"\'.,;:]*(?:\s|\Z)' # End boundary
38 ) % '|'.join(tlds), re.I | re.DOTALL)
39
40 @handler
41
42=== modified file 'ibid/test/plugins/test_url.py'
43--- ibid/test/plugins/test_url.py 2009-08-20 15:31:15 +0000
44+++ ibid/test/plugins/test_url.py 2010-01-04 21:42:14 +0000
45@@ -50,6 +50,7 @@
46 u'x joe@google.com',
47 u'<joe@bar.com>',
48 u'joe@bar.za.net',
49+ u'File "/usr/lib/python2.5/httplib.py", line 866, in request',
50 ]
51
52 def test_bad_grabs(self):
53
54=== modified file 'ibid/utils/__init__.py'
55--- ibid/utils/__init__.py 2010-01-01 15:28:25 +0000
56+++ ibid/utils/__init__.py 2010-01-04 21:42:14 +0000
57@@ -8,6 +8,7 @@
58 import time
59 from urllib import urlencode
60 import urllib2
61+from urlparse import urlparse, urlunparse
62 import zlib
63
64 from dateutil.tz import tzlocal, tzutc
65@@ -72,7 +73,7 @@
66
67 exists = os.path.isfile(cachefile)
68
69- req = urllib2.Request(url)
70+ req = urllib2.Request(url_to_bytestring(url))
71 for name, value in headers:
72 req.add_header(name, value)
73 if not req.has_header('user-agent'):
74@@ -158,6 +159,14 @@
75 class JSONException(Exception):
76 pass
77
78+def url_to_bytestring(url):
79+ "Expand an IDN hostname and UTF-8 encode the path of a unicode URL"
80+ parts = list(urlparse(url))
81+ host = parts[1].split(':')
82+ host[0] = host[0].encode('idna')
83+ parts[1] = ':'.join(host)
84+ return urlunparse(parts).encode('utf-8')
85+
86 def json_webservice(url, params={}, headers={}):
87 "Request data from a JSON webservice, and deserialise"
88
89@@ -166,7 +175,7 @@
90 params[key] = params[key].encode('utf-8')
91
92 if params:
93- url += '?' + urlencode(params)
94+ url = url_to_bytestring(url) + '?' + urlencode(params)
95
96 req = urllib2.Request(url, headers=headers)
97 if not req.has_header('user-agent'):
98
99=== modified file 'ibid/utils/html.py'
100--- ibid/utils/html.py 2009-12-29 09:39:33 +0000
101+++ ibid/utils/html.py 2010-01-04 21:42:14 +0000
102@@ -8,6 +8,7 @@
103 from BeautifulSoup import BeautifulSoup
104
105 from ibid.compat import ElementTree
106+from ibid.utils import url_to_bytestring
107
108 class ContentTypeException(Exception):
109 pass
110@@ -15,7 +16,7 @@
111 def get_html_parse_tree(url, data=None, headers={}, treetype='beautifulsoup'):
112 "Request a URL, parse with html5lib, and return a parse tree from it"
113
114- req = urllib2.Request(url, data, headers)
115+ req = urllib2.Request(url_to_bytestring(url), data, headers)
116 f = urllib2.urlopen(req)
117
118 if f.info().gettype() not in ('text/html', 'application/xhtml+xml'):

Subscribers

People subscribed via source and target branches