Merge lp:~fwereade/pyjuju/webdav-unicode-paths into lp:pyjuju

Proposed by William Reade
Status: Merged
Approved by: Gustavo Niemeyer
Approved revision: 329
Merged at revision: 334
Proposed branch: lp:~fwereade/pyjuju/webdav-unicode-paths
Merge into: lp:pyjuju
Diff against target: 181 lines (+55/-18)
4 files modified
ensemble/providers/ec2/files.py (+2/-1)
ensemble/providers/ec2/tests/test_files.py (+26/-4)
ensemble/providers/orchestra/files.py (+12/-6)
ensemble/providers/orchestra/tests/test_files.py (+15/-7)
To merge this branch: bzr merge lp:~fwereade/pyjuju/webdav-unicode-paths
Reviewer Review Type Date Requested Status
Gustavo Niemeyer Approve
Kapil Thangavelu (community) Approve
Review via email: mp+72579@code.launchpad.net

Description of the change

Should be a trivial; just changed it to match the EC2 implementation.

To post a comment you must log in.
Revision history for this message
Kapil Thangavelu (hazmat) wrote :

lgtm, +1

review: Approve
Revision history for this message
Gustavo Niemeyer (niemeyer) wrote :

I'm not entirely sure this is the right thing to do.

[1]

A few questions:

- Shouldn't this be URL-encoding the string, rather than simply encoding into UTF-8?

- What happens if the URL actually contains non-ASCII characters? Will EC2/Orchestra explode?

review: Needs Information
Revision history for this message
William Reade (fwereade) wrote :

Hmm, I vaguely assumed I was following an agreed-upon restriction.

[1]

- Yes.

- No idea.

I'll look into it.

Revision history for this message
William Reade (fwereade) wrote :

EC2 bucket names are restricted to a subset of ascii; from [1], "The name for a key is a sequence of Unicode characters whose UTF-8 encoding is at most 1024 bytes long."

From [2], for orchestra, the WebDAV paths should be utf-8 encoded, then urlencoded; DNS names should be PunyCoded.

I suspect that any code I write to do this properly is unlikely to be executed outside a test context, but it shouldn't take too long to do the Right Thing, so I may as well ;).

[1] http://docs.amazonwebservices.com/AmazonS3/latest/dev/index.html?UsingMetadata.html
[2] http://www.w3.org/International/articles/idn-and-iri/

Revision history for this message
Gustavo Niemeyer (niemeyer) wrote :

> I suspect that any code I write to do this properly is unlikely to be
> executed outside a test context

You'd be surprised with what people do. ;-)

[3]

9 + name = _safe_string(name)
10 + path = "%s/%s" % (self._bucket, urllib.quote(name))
11 signed = hmac.new(self._s3.creds.secret_key, digestmod=sha)
12 signed.update("GET\n\n\n%s\n/%s" % (expires, path))

I'm fine with just going ahead with this and fixing it later if necessary because
we've already spent too much time on this detail, but I won't be surprised if we
find issues with this in practice. The quoting algorithm used by urllib.quote
and the one expected by Amazon must match _precisely_ for this to work, and must
continue to be so in the future.

review: Approve
329. By William Reade

merge trunk

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== modified file 'ensemble/providers/ec2/files.py'
--- ensemble/providers/ec2/files.py 2011-08-24 20:58:41 +0000
+++ ensemble/providers/ec2/files.py 2011-08-29 16:11:24 +0000
@@ -40,7 +40,8 @@
40 """40 """
41 # URLs are good for 10 years.41 # URLs are good for 10 years.
42 expires = int(time.time()) + 365 * 24 * 3600 * 1042 expires = int(time.time()) + 365 * 24 * 3600 * 10
43 path = urllib.quote("%s/%s" % (self._bucket, name))43 name = _safe_string(name)
44 path = "%s/%s" % (self._bucket, urllib.quote(name))
44 signed = hmac.new(self._s3.creds.secret_key, digestmod=sha)45 signed = hmac.new(self._s3.creds.secret_key, digestmod=sha)
45 signed.update("GET\n\n\n%s\n/%s" % (expires, path))46 signed.update("GET\n\n\n%s\n/%s" % (expires, path))
46 signature = urllib.quote_plus(b64encode(signed.digest()).strip())47 signature = urllib.quote_plus(b64encode(signed.digest()).strip())
4748
=== modified file 'ensemble/providers/ec2/tests/test_files.py'
--- ensemble/providers/ec2/tests/test_files.py 2011-08-25 15:59:32 +0000
+++ ensemble/providers/ec2/tests/test_files.py 2011-08-29 16:11:24 +0000
@@ -47,12 +47,14 @@
47 content = "blah blah"47 content = "blah blah"
48 control_bucket = self.get_config()["control-bucket"]48 control_bucket = self.get_config()["control-bucket"]
49 self.s3.put_object(49 self.s3.put_object(
50 control_bucket, "pirates/content.txt", content)50 control_bucket,
51 "\xe2\x99\xa3\xe2\x99\xa6\xe2\x99\xa5\xe2\x99\xa0.txt",
52 content)
51 self.mocker.result(succeed(""))53 self.mocker.result(succeed(""))
52 self.mocker.replay()54 self.mocker.replay()
5355
54 storage = self.get_storage()56 storage = self.get_storage()
55 d = storage.put(u"pirates/content.txt", StringIO(content))57 d = storage.put(u"\u2663\u2666\u2665\u2660.txt", StringIO(content))
5658
57 def validate_result(result):59 def validate_result(result):
58 self.assertIdentical(result, "")60 self.assertIdentical(result, "")
@@ -125,6 +127,25 @@
125 "Expires=1628829969",127 "Expires=1628829969",
126 "Signature=8A%2BF4sk48OmJ8xfPoOY7U0%2FacvM%3D"])128 "Signature=8A%2BF4sk48OmJ8xfPoOY7U0%2FacvM%3D"])
127129
130 def test_get_url_unicode(self):
131 """A url can be generated for *any* stored file."""
132 self.mocker.reset()
133
134 # Freeze time for the hmac comparison
135 self.patch(time, "time", lambda: 1315469969.311376)
136
137 storage = self.get_storage()
138 url = storage.get_url(u"\u2663\u2666\u2665\u2660.txt")
139 self.assertTrue(url.startswith(
140 "https://s3.amazonaws.com/moon/"
141 "%E2%99%A3%E2%99%A6%E2%99%A5%E2%99%A0.txt"))
142 params = url[url.index("?") + 1:].split("&")
143 self.assertEqual(
144 sorted(params),
145 ["AWSAccessKeyId=0f62e973d5f8",
146 "Expires=1630829969",
147 "Signature=bbmdpkLqmrY4ebc2eoCJgt95ojg%3D"])
148
128 def test_get_file(self):149 def test_get_file(self):
129 """Retrieving a file from storage returns a temporary file."""150 """Retrieving a file from storage returns a temporary file."""
130 content = "blah blah"151 content = "blah blah"
@@ -150,7 +171,8 @@
150 control_bucket = self.get_config()["control-bucket"]171 control_bucket = self.get_config()["control-bucket"]
151172
152 def match_string(s):173 def match_string(s):
153 self.assertEqual(s, "pirates/content.txt")174 self.assertEqual(
175 s, "\xe2\x99\xa3\xe2\x99\xa6\xe2\x99\xa5\xe2\x99\xa0.txt")
154 self.assertFalse(isinstance(s, unicode))176 self.assertFalse(isinstance(s, unicode))
155 return True177 return True
156178
@@ -159,7 +181,7 @@
159 self.mocker.replay()181 self.mocker.replay()
160182
161 storage = self.get_storage()183 storage = self.get_storage()
162 d = storage.get(u"pirates/content.txt")184 d = storage.get(u"\u2663\u2666\u2665\u2660.txt")
163185
164 def validate_result(result):186 def validate_result(result):
165 self.assertEqual(result.read(), content)187 self.assertEqual(result.read(), content)
166188
=== modified file 'ensemble/providers/orchestra/files.py'
--- ensemble/providers/orchestra/files.py 2011-08-10 22:01:56 +0000
+++ ensemble/providers/orchestra/files.py 2011-08-29 16:11:24 +0000
@@ -1,4 +1,6 @@
1from cStringIO import StringIO1from cStringIO import StringIO
2import urllib
3import urlparse
24
3from twisted.web.client import getPage5from twisted.web.client import getPage
4from twisted.web.error import Error6from twisted.web.error import Error
@@ -11,14 +13,18 @@
11 def __init__(self, base_url):13 def __init__(self, base_url):
12 self._base_url = base_url14 self._base_url = base_url
1315
14 def _key_to_url(self, name):
15 return "%s/%s" % (self._base_url, name)
16
17 def get_url(self, name):16 def get_url(self, name):
18 return self._key_to_url(name)17 url = u"/".join((self._base_url, name))
18 # query and fragment are irrelevant to our purposes
19 scheme, netloc, path = urlparse.urlsplit(url)[:3]
20 return urlparse.urlunsplit((
21 str(scheme),
22 netloc.encode("idna"),
23 urllib.quote(path.encode("utf-8")),
24 "", ""))
1925
20 def get(self, name):26 def get(self, name):
21 url = self._key_to_url(name)27 url = self.get_url(name)
22 d = getPage(url)28 d = getPage(url)
23 d.addCallback(StringIO)29 d.addCallback(StringIO)
2430
@@ -31,7 +37,7 @@
31 return d37 return d
3238
33 def put(self, remote_path, file_object):39 def put(self, remote_path, file_object):
34 url = self._key_to_url(remote_path)40 url = self.get_url(remote_path)
35 data = file_object.read()41 data = file_object.read()
36 d = getPage(url, method="PUT", postdata=data)42 d = getPage(url, method="PUT", postdata=data)
37 d.addCallback(lambda _: True)43 d.addCallback(lambda _: True)
3844
=== modified file 'ensemble/providers/orchestra/tests/test_files.py'
--- ensemble/providers/orchestra/tests/test_files.py 2011-08-22 23:03:03 +0000
+++ ensemble/providers/orchestra/tests/test_files.py 2011-08-29 16:11:24 +0000
@@ -8,15 +8,16 @@
8from ensemble.providers.orchestra import MachineProvider8from ensemble.providers.orchestra import MachineProvider
99
1010
11def get_file_storage(with_storage_url=True):11def get_file_storage(custom_config=None):
12 config = {"storage-url": "http://somewhe.re/webdav",12 config = {"orchestra-server": "somewhereel.se",
13 "orchestra-server": "somewhereel.se",
14 "orchestra-user": "user",13 "orchestra-user": "user",
15 "orchestra-pass": "pass",14 "orchestra-pass": "pass",
16 "acquired-mgmt-class": "acquired",15 "acquired-mgmt-class": "acquired",
17 "available-mgmt-class": "available"}16 "available-mgmt-class": "available"}
18 if not with_storage_url:17 if custom_config is None:
19 del config["storage-url"]18 config["storage-url"] = "http://somewhe.re/webdav"
19 else:
20 config.update(custom_config)
20 provider = MachineProvider("blah", config)21 provider = MachineProvider("blah", config)
21 return provider.get_file_storage()22 return provider.get_file_storage()
2223
@@ -29,7 +30,7 @@
29 self.mocker.result(succeed("pulley"))30 self.mocker.result(succeed("pulley"))
30 self.mocker.replay()31 self.mocker.replay()
3132
32 fs = get_file_storage(with_storage_url=False)33 fs = get_file_storage({})
33 d = fs.get("rubber/chicken")34 d = fs.get("rubber/chicken")
3435
35 def verify(result):36 def verify(result):
@@ -78,6 +79,13 @@
78 url = fs.get_url("rubber/chicken")79 url = fs.get_url("rubber/chicken")
79 self.assertEqual(url, "http://somewhe.re/webdav/rubber/chicken")80 self.assertEqual(url, "http://somewhe.re/webdav/rubber/chicken")
8081
82 def test_get_url_unicode(self):
83 fs = get_file_storage({"storage-url": u"http://\u2666.co.\u2660"})
84 url = fs.get_url(u"rubber/\u2665/chicken")
85 self.assertEqual(
86 url, "http://xn--h6h.co.xn--b6h/rubber/%E2%99%A5/chicken")
87 self.assertInstance(url, str)
88
81 def test_put_works(self):89 def test_put_works(self):
82 getPage = self.mocker.replace("twisted.web.client.getPage")90 getPage = self.mocker.replace("twisted.web.client.getPage")
83 getPage("http://somewhe.re/webdav/rubber/chicken",91 getPage("http://somewhe.re/webdav/rubber/chicken",
@@ -100,7 +108,7 @@
100 self.mocker.result(succeed(None))108 self.mocker.result(succeed(None))
101 self.mocker.replay()109 self.mocker.replay()
102110
103 fs = get_file_storage(with_storage_url=False)111 fs = get_file_storage({})
104 d = fs.put("rubber/chicken", StringIO("pulley"))112 d = fs.put("rubber/chicken", StringIO("pulley"))
105113
106 def verify(result):114 def verify(result):

Subscribers

People subscribed via source and target branches

to status/vote changes: