Merge lp:~fwereade/pyjuju/webdav-unicode-paths into lp:pyjuju

Proposed by William Reade
Status: Merged
Approved by: Gustavo Niemeyer
Approved revision: 329
Merged at revision: 334
Proposed branch: lp:~fwereade/pyjuju/webdav-unicode-paths
Merge into: lp:pyjuju
Diff against target: 181 lines (+55/-18)
4 files modified
ensemble/providers/ec2/files.py (+2/-1)
ensemble/providers/ec2/tests/test_files.py (+26/-4)
ensemble/providers/orchestra/files.py (+12/-6)
ensemble/providers/orchestra/tests/test_files.py (+15/-7)
To merge this branch: bzr merge lp:~fwereade/pyjuju/webdav-unicode-paths
Reviewer Review Type Date Requested Status
Gustavo Niemeyer Approve
Kapil Thangavelu (community) Approve
Review via email: mp+72579@code.launchpad.net

Description of the change

Should be a trivial; just changed it to match the EC2 implementation.

To post a comment you must log in.
Revision history for this message
Kapil Thangavelu (hazmat) wrote :

lgtm, +1

review: Approve
Revision history for this message
Gustavo Niemeyer (niemeyer) wrote :

I'm not entirely sure this is the right thing to do.

[1]

A few questions:

- Shouldn't this be URL-encoding the string, rather than simply encoding into UTF-8?

- What happens if the URL actually contains non-ASCII characters? Will EC2/Orchestra explode?

review: Needs Information
Revision history for this message
William Reade (fwereade) wrote :

Hmm, I vaguely assumed I was following an agreed-upon restriction.

[1]

- Yes.

- No idea.

I'll look into it.

Revision history for this message
William Reade (fwereade) wrote :

EC2 bucket names are restricted to a subset of ascii; from [1], "The name for a key is a sequence of Unicode characters whose UTF-8 encoding is at most 1024 bytes long."

From [2], for orchestra, the WebDAV paths should be utf-8 encoded, then urlencoded; DNS names should be PunyCoded.

I suspect that any code I write to do this properly is unlikely to be executed outside a test context, but it shouldn't take too long to do the Right Thing, so I may as well ;).

[1] http://docs.amazonwebservices.com/AmazonS3/latest/dev/index.html?UsingMetadata.html
[2] http://www.w3.org/International/articles/idn-and-iri/

Revision history for this message
Gustavo Niemeyer (niemeyer) wrote :

> I suspect that any code I write to do this properly is unlikely to be
> executed outside a test context

You'd be surprised with what people do. ;-)

[3]

9 + name = _safe_string(name)
10 + path = "%s/%s" % (self._bucket, urllib.quote(name))
11 signed = hmac.new(self._s3.creds.secret_key, digestmod=sha)
12 signed.update("GET\n\n\n%s\n/%s" % (expires, path))

I'm fine with just going ahead with this and fixing it later if necessary because
we've already spent too much time on this detail, but I won't be surprised if we
find issues with this in practice. The quoting algorithm used by urllib.quote
and the one expected by Amazon must match _precisely_ for this to work, and must
continue to be so in the future.

review: Approve
329. By William Reade

merge trunk

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'ensemble/providers/ec2/files.py'
2--- ensemble/providers/ec2/files.py 2011-08-24 20:58:41 +0000
3+++ ensemble/providers/ec2/files.py 2011-08-29 16:11:24 +0000
4@@ -40,7 +40,8 @@
5 """
6 # URLs are good for 10 years.
7 expires = int(time.time()) + 365 * 24 * 3600 * 10
8- path = urllib.quote("%s/%s" % (self._bucket, name))
9+ name = _safe_string(name)
10+ path = "%s/%s" % (self._bucket, urllib.quote(name))
11 signed = hmac.new(self._s3.creds.secret_key, digestmod=sha)
12 signed.update("GET\n\n\n%s\n/%s" % (expires, path))
13 signature = urllib.quote_plus(b64encode(signed.digest()).strip())
14
15=== modified file 'ensemble/providers/ec2/tests/test_files.py'
16--- ensemble/providers/ec2/tests/test_files.py 2011-08-25 15:59:32 +0000
17+++ ensemble/providers/ec2/tests/test_files.py 2011-08-29 16:11:24 +0000
18@@ -47,12 +47,14 @@
19 content = "blah blah"
20 control_bucket = self.get_config()["control-bucket"]
21 self.s3.put_object(
22- control_bucket, "pirates/content.txt", content)
23+ control_bucket,
24+ "\xe2\x99\xa3\xe2\x99\xa6\xe2\x99\xa5\xe2\x99\xa0.txt",
25+ content)
26 self.mocker.result(succeed(""))
27 self.mocker.replay()
28
29 storage = self.get_storage()
30- d = storage.put(u"pirates/content.txt", StringIO(content))
31+ d = storage.put(u"\u2663\u2666\u2665\u2660.txt", StringIO(content))
32
33 def validate_result(result):
34 self.assertIdentical(result, "")
35@@ -125,6 +127,25 @@
36 "Expires=1628829969",
37 "Signature=8A%2BF4sk48OmJ8xfPoOY7U0%2FacvM%3D"])
38
39+ def test_get_url_unicode(self):
40+ """A url can be generated for *any* stored file."""
41+ self.mocker.reset()
42+
43+ # Freeze time for the hmac comparison
44+ self.patch(time, "time", lambda: 1315469969.311376)
45+
46+ storage = self.get_storage()
47+ url = storage.get_url(u"\u2663\u2666\u2665\u2660.txt")
48+ self.assertTrue(url.startswith(
49+ "https://s3.amazonaws.com/moon/"
50+ "%E2%99%A3%E2%99%A6%E2%99%A5%E2%99%A0.txt"))
51+ params = url[url.index("?") + 1:].split("&")
52+ self.assertEqual(
53+ sorted(params),
54+ ["AWSAccessKeyId=0f62e973d5f8",
55+ "Expires=1630829969",
56+ "Signature=bbmdpkLqmrY4ebc2eoCJgt95ojg%3D"])
57+
58 def test_get_file(self):
59 """Retrieving a file from storage returns a temporary file."""
60 content = "blah blah"
61@@ -150,7 +171,8 @@
62 control_bucket = self.get_config()["control-bucket"]
63
64 def match_string(s):
65- self.assertEqual(s, "pirates/content.txt")
66+ self.assertEqual(
67+ s, "\xe2\x99\xa3\xe2\x99\xa6\xe2\x99\xa5\xe2\x99\xa0.txt")
68 self.assertFalse(isinstance(s, unicode))
69 return True
70
71@@ -159,7 +181,7 @@
72 self.mocker.replay()
73
74 storage = self.get_storage()
75- d = storage.get(u"pirates/content.txt")
76+ d = storage.get(u"\u2663\u2666\u2665\u2660.txt")
77
78 def validate_result(result):
79 self.assertEqual(result.read(), content)
80
81=== modified file 'ensemble/providers/orchestra/files.py'
82--- ensemble/providers/orchestra/files.py 2011-08-10 22:01:56 +0000
83+++ ensemble/providers/orchestra/files.py 2011-08-29 16:11:24 +0000
84@@ -1,4 +1,6 @@
85 from cStringIO import StringIO
86+import urllib
87+import urlparse
88
89 from twisted.web.client import getPage
90 from twisted.web.error import Error
91@@ -11,14 +13,18 @@
92 def __init__(self, base_url):
93 self._base_url = base_url
94
95- def _key_to_url(self, name):
96- return "%s/%s" % (self._base_url, name)
97-
98 def get_url(self, name):
99- return self._key_to_url(name)
100+ url = u"/".join((self._base_url, name))
101+ # query and fragment are irrelevant to our purposes
102+ scheme, netloc, path = urlparse.urlsplit(url)[:3]
103+ return urlparse.urlunsplit((
104+ str(scheme),
105+ netloc.encode("idna"),
106+ urllib.quote(path.encode("utf-8")),
107+ "", ""))
108
109 def get(self, name):
110- url = self._key_to_url(name)
111+ url = self.get_url(name)
112 d = getPage(url)
113 d.addCallback(StringIO)
114
115@@ -31,7 +37,7 @@
116 return d
117
118 def put(self, remote_path, file_object):
119- url = self._key_to_url(remote_path)
120+ url = self.get_url(remote_path)
121 data = file_object.read()
122 d = getPage(url, method="PUT", postdata=data)
123 d.addCallback(lambda _: True)
124
125=== modified file 'ensemble/providers/orchestra/tests/test_files.py'
126--- ensemble/providers/orchestra/tests/test_files.py 2011-08-22 23:03:03 +0000
127+++ ensemble/providers/orchestra/tests/test_files.py 2011-08-29 16:11:24 +0000
128@@ -8,15 +8,16 @@
129 from ensemble.providers.orchestra import MachineProvider
130
131
132-def get_file_storage(with_storage_url=True):
133- config = {"storage-url": "http://somewhe.re/webdav",
134- "orchestra-server": "somewhereel.se",
135+def get_file_storage(custom_config=None):
136+ config = {"orchestra-server": "somewhereel.se",
137 "orchestra-user": "user",
138 "orchestra-pass": "pass",
139 "acquired-mgmt-class": "acquired",
140 "available-mgmt-class": "available"}
141- if not with_storage_url:
142- del config["storage-url"]
143+ if custom_config is None:
144+ config["storage-url"] = "http://somewhe.re/webdav"
145+ else:
146+ config.update(custom_config)
147 provider = MachineProvider("blah", config)
148 return provider.get_file_storage()
149
150@@ -29,7 +30,7 @@
151 self.mocker.result(succeed("pulley"))
152 self.mocker.replay()
153
154- fs = get_file_storage(with_storage_url=False)
155+ fs = get_file_storage({})
156 d = fs.get("rubber/chicken")
157
158 def verify(result):
159@@ -78,6 +79,13 @@
160 url = fs.get_url("rubber/chicken")
161 self.assertEqual(url, "http://somewhe.re/webdav/rubber/chicken")
162
163+ def test_get_url_unicode(self):
164+ fs = get_file_storage({"storage-url": u"http://\u2666.co.\u2660"})
165+ url = fs.get_url(u"rubber/\u2665/chicken")
166+ self.assertEqual(
167+ url, "http://xn--h6h.co.xn--b6h/rubber/%E2%99%A5/chicken")
168+ self.assertInstance(url, str)
169+
170 def test_put_works(self):
171 getPage = self.mocker.replace("twisted.web.client.getPage")
172 getPage("http://somewhe.re/webdav/rubber/chicken",
173@@ -100,7 +108,7 @@
174 self.mocker.result(succeed(None))
175 self.mocker.replay()
176
177- fs = get_file_storage(with_storage_url=False)
178+ fs = get_file_storage({})
179 d = fs.put("rubber/chicken", StringIO("pulley"))
180
181 def verify(result):

Subscribers

People subscribed via source and target branches

to status/vote changes: