Merge lp:~stefanor/ibid/url-terminology-528779 into lp:~ibid-core/ibid/old-trunk-1.6

Proposed by Stefano Rivera
Status: Merged
Approved by: Stefano Rivera
Approved revision: 913
Merged at revision: 919
Proposed branch: lp:~stefanor/ibid/url-terminology-528779
Merge into: lp:~ibid-core/ibid/old-trunk-1.6
Diff against target: 157 lines (+30/-17)
6 files modified
docs/api/ibid.utils.rst (+3/-3)
ibid/plugins/ascii.py (+2/-2)
ibid/plugins/languages.py (+2/-2)
ibid/plugins/network.py (+2/-2)
ibid/utils/__init__.py (+19/-6)
ibid/utils/html.py (+2/-2)
To merge this branch: bzr merge lp:~stefanor/ibid/url-terminology-528779
Reviewer Review Type Date Requested Status
Michael Gorven Approve
Jonathan Hitchcock Approve
Max Rabkin Approve
Review via email: mp+21402@code.launchpad.net

Commit message

Rename ibid.utils.url_to_bytestring -> iri_to_uri and improve the accuracy of the conversion

To post a comment you must log in.
Revision history for this message
Max Rabkin (max-rabkin) :
review: Approve
Revision history for this message
Jonathan Hitchcock (vhata) :
review: Approve
Revision history for this message
Michael Gorven (mgorven) wrote :

 review approve

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'docs/api/ibid.utils.rst'
2--- docs/api/ibid.utils.rst 2010-03-02 20:29:27 +0000
3+++ docs/api/ibid.utils.rst 2010-03-15 21:08:14 +0000
4@@ -122,10 +122,10 @@
5
6 Is *url* a valid URL? (according to :func:`url_regex`)
7
8-.. function:: url_to_bytestring(url)
9+.. function:: iri_to_uri(iri)
10
11- Convert a unicode *url* to punycode host and UTF-8 path.
12- This allows IDN URLs to be opened with :mod:`urllib`.
13+ Convert a unicode *iri* to punycode host and UTF-8 path.
14+ This allows IRIs to be opened with :mod:`urllib`.
15
16 Web Service Functions
17 ---------------------
18
19=== modified file 'ibid/plugins/ascii.py'
20--- ibid/plugins/ascii.py 2010-02-20 21:58:38 +0000
21+++ ibid/plugins/ascii.py 2010-03-15 21:08:14 +0000
22@@ -17,7 +17,7 @@
23
24 from ibid.config import Option, IntOption
25 from ibid.plugins import Processor, match
26-from ibid.utils import file_in_path, url_to_bytestring
27+from ibid.utils import file_in_path, iri_to_uri
28
29 """
30 Dependencies:
31@@ -60,7 +60,7 @@
32 url += '/'
33
34 try:
35- f = urlopen(url_to_bytestring(url))
36+ f = urlopen(iri_to_uri(url))
37 except HTTPError, e:
38 event.addresponse(u'Sorry, error fetching URL: %s', BaseHTTPRequestHandler.responses[e.code][0])
39 return
40
41=== modified file 'ibid/plugins/languages.py'
42--- ibid/plugins/languages.py 2010-03-02 19:41:09 +0000
43+++ ibid/plugins/languages.py 2010-03-15 21:08:14 +0000
44@@ -11,7 +11,7 @@
45 from ibid.plugins import Processor, match
46 from ibid.config import Option, IntOption
47 from ibid.utils import decode_htmlentities, json_webservice, human_join, \
48- is_url, url_to_bytestring
49+ is_url, iri_to_uri
50
51 features = {}
52
53@@ -192,7 +192,7 @@
54
55 if is_url(text):
56 if urlparse(text).scheme in ('', 'http'):
57- url = url_to_bytestring(text)
58+ url = iri_to_uri(text)
59 query = {'sl': src_lang, 'tl': dest_lang, 'u': url}
60 event.addresponse(u'http://translate.google.com/translate?' +
61 urlencode(query))
62
63=== modified file 'ibid/plugins/network.py'
64--- ibid/plugins/network.py 2010-03-07 10:08:03 +0000
65+++ ibid/plugins/network.py 2010-03-15 21:08:14 +0000
66@@ -22,7 +22,7 @@
67 from ibid.plugins import Processor, match, authorise
68 from ibid.config import Option, IntOption, FloatOption, DictOption
69 from ibid.utils import file_in_path, get_country_codes, get_process_output, \
70- human_join, unicode_output, url_to_bytestring
71+ human_join, unicode_output, iri_to_uri
72
73 features = {}
74
75@@ -391,7 +391,7 @@
76
77 try:
78 try:
79- conn.request(method.upper(), url_to_bytestring(url),
80+ conn.request(method.upper(), iri_to_uri(url),
81 headers=headers)
82 response = conn.getresponse()
83 data = response.read(self.max_size)
84
85=== modified file 'ibid/utils/__init__.py'
86--- ibid/utils/__init__.py 2010-03-07 10:06:56 +0000
87+++ ibid/utils/__init__.py 2010-03-15 21:08:14 +0000
88@@ -84,7 +84,7 @@
89
90 exists = os.path.isfile(cachefile)
91
92- req = urllib2.Request(url_to_bytestring(url))
93+ req = urllib2.Request(iri_to_uri(url))
94 for name, value in headers.iteritems():
95 req.add_header(name, value)
96 if not req.has_header('user-agent'):
97@@ -190,12 +190,25 @@
98 class JSONException(Exception):
99 pass
100
101-def url_to_bytestring(url):
102+def iri_to_uri(url):
103 "Expand an IDN hostname and UTF-8 encode the path of a unicode URL"
104 parts = list(urlparse(url))
105- host = parts[1].split(':')
106- host[0] = host[0].encode('idna')
107- parts[1] = ':'.join(host)
108+ username, passwd, host, port = re.match(
109+ r'^(?:(.*)(?::(.*))?@)?(.*)(?::(.*))?$', parts[1]).groups()
110+ parts[1] = ''
111+ if username:
112+ parts[1] = quote(username.encode('utf-8'))
113+ if passwd:
114+ parts[1] += ':' + quote(passwd.encode('utf-8'))
115+ parts[1] += '@'
116+ if host:
117+ if parts[0].lower() in ('http', 'https', 'ftp'):
118+ parts[1] += host.encode('idna')
119+ else:
120+ parts[1] += quote(host.encode('utf-8'))
121+ if port:
122+ parts[1] += ':' + quote(port.encode('utf-8'))
123+
124 parts[2] = quote(parts[2].encode('utf-8'), '/%')
125 return urlunparse(parts).encode('utf-8')
126
127@@ -234,7 +247,7 @@
128 params[key] = params[key].encode('utf-8')
129
130 if params:
131- url = url_to_bytestring(url) + '?' + urlencode(params)
132+ url = iri_to_uri(url) + '?' + urlencode(params)
133
134 req = urllib2.Request(url, headers=headers)
135 if not req.has_header('user-agent'):
136
137=== modified file 'ibid/utils/html.py'
138--- ibid/utils/html.py 2010-03-07 09:42:37 +0000
139+++ ibid/utils/html.py 2010-03-15 21:08:14 +0000
140@@ -12,7 +12,7 @@
141 from BeautifulSoup import BeautifulSoup
142
143 from ibid.compat import ElementTree
144-from ibid.utils import url_to_bytestring
145+from ibid.utils import iri_to_uri
146
147 class ContentTypeException(Exception):
148 pass
149@@ -20,7 +20,7 @@
150 def get_html_parse_tree(url, data=None, headers={}, treetype='beautifulsoup'):
151 "Request a URL, parse with html5lib, and return a parse tree from it"
152
153- req = urllib2.Request(url_to_bytestring(url), data, headers)
154+ req = urllib2.Request(iri_to_uri(url), data, headers)
155 f = urllib2.urlopen(req)
156
157 if f.info().gettype() not in ('text/html', 'application/xhtml+xml'):

Subscribers

People subscribed via source and target branches