Merge lp:~jtv/gwacl/only-batch-list-blobs into lp:gwacl

Proposed by Jeroen T. Vermeulen
Status: Merged
Approved by: Jeroen T. Vermeulen
Approved revision: 105
Merged at revision: 99
Proposed branch: lp:~jtv/gwacl/only-batch-list-blobs
Merge into: lp:gwacl
Diff against target: 362 lines (+208/-27)
4 files modified
storage_base.go (+79/-23)
storage_base_test.go (+127/-4)
xmlobjects.go (+1/-0)
xmlobjects_test.go (+1/-0)
To merge this branch: bzr merge lp:~jtv/gwacl/only-batch-list-blobs
Reviewer Review Type Date Requested Status
Julian Edwards (community) Approve
Review via email: mp+161375@code.launchpad.net

Commit message

Support batched responses from List Blobs.

Description of the change

This duplicates the logic from ListContainers(). It's basically my previous branch, but without the refactoring. The next step is to factor out the awful commonality.

Repeated jobs that I'd like to factor out include:
 * Interpolation of the URLs for storage accounts, containers, and files.
 * Adding of query parameters to a storage URL.
 * Stripping markers and adding them to URL queries.
 * Generation, signing, and send() of HTTP requests.

A bit harder to do without complicating things is the commonality between the batching loops in ListContainers() and ListBlobs().

I'd also like to remove the useless (and in this case, sort of misleading) Marker field from BlobsEnumerationResult.

Jeroen

To post a comment you must log in.
Revision history for this message
Julian Edwards (julian-edwards) wrote :

s/for/For/ on line 102 of the diff, otherwise nothing controversial here, thanks.

review: Approve
lp:~jtv/gwacl/only-batch-list-blobs updated
105. By Jeroen T. Vermeulen

Review change. Failed to upper-case a letter in a comment. Cheap keyboard.

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'storage_base.go'
2--- storage_base.go 2013-04-26 03:53:37 +0000
3+++ storage_base.go 2013-04-30 05:28:25 +0000
4@@ -265,48 +265,53 @@
5 return resp, nil
6 }
7
8+// addURLQueryParam adds a query parameter to a URL (and escapes as needed).
9+func addURLQueryParam(originalURL, key, value string) (string, error) {
10+ parsedURL, err := url.Parse(originalURL)
11+ if err != nil {
12+ // Not much we can add beyond that the URL doesn't parse. Leave it
13+ // to the caller to provide context.
14+ return "", err
15+ }
16+ query := parsedURL.Query()
17+ query.Add(key, value)
18+ parsedURL.RawQuery = query.Encode()
19+ return parsedURL.String(), nil
20+}
21+
22 // getListContainersBatch calls the "List Containers" operation on the storage
23-// API, and returns a single batch of results; its "next marker" for batching,
24-// and an error code.
25+// API, and returns a single batch of results.
26 // The marker argument should be empty for a new List Containers request. for
27 // subsequent calls to get additional batches of the same result, pass the
28-// "next marker" returned by the previous call.
29-// The "next marker" will be empty on the last batch.
30-func (context *StorageContext) getListContainersBatch(marker string) (*ContainerEnumerationResults, string, error) {
31+// NextMarker from the previous call's result.
32+func (context *StorageContext) getListContainersBatch(marker string) (*ContainerEnumerationResults, error) {
33+ var err error
34 uri := interpolateURL("http://___.blob.core.windows.net/?comp=list", context.Account)
35 if marker != "" {
36 // Add the marker argument. Do it after interpolation, in case the
37 // marker string might contain the interpolation's placeholder
38 // sequence.
39- parsedURL, err := url.Parse(uri)
40+ uri, err = addURLQueryParam(uri, "marker", marker)
41 if err != nil {
42- return nil, "", err
43+ return nil, fmt.Errorf("malformed storage account URL '%s': %v", uri, err)
44 }
45- query := parsedURL.Query()
46- query.Set("marker", marker)
47- parsedURL.RawQuery = query.Encode()
48- uri = parsedURL.String()
49 }
50 req, err := http.NewRequest("GET", uri, nil)
51 if err != nil {
52- return nil, "", err
53+ return nil, err
54 }
55 addStandardHeaders(req, context.Account, context.Key, "2012-02-12")
56 containers := ContainerEnumerationResults{}
57 _, err = context.send(req, &containers, http.StatusOK)
58 if err != nil {
59- return nil, "", err
60+ return nil, err
61 }
62
63- // The response may contain a NextMarker field, to let us request a
64- // subsequent batch of results. The XML parser won't trim whitespace out
65- // of the marker tag, so we do that here.
66- nextMarker := strings.TrimSpace(containers.NextMarker)
67- return &containers, nextMarker, nil
68+ return &containers, nil
69 }
70
71-// ListContainers sends a request to the storage service to list the containers
72-// in the storage account. error is non-nil if an error occurred.
73+// ListContainers requests from the storage service a list of containers
74+// in the storage account.
75 func (context *StorageContext) ListContainers() (*ContainerEnumerationResults, error) {
76 containers := make([]Container, 0)
77 var batch *ContainerEnumerationResults
78@@ -318,10 +323,14 @@
79 var err error
80 // Don't use := here or you'll shadow variables from the function's
81 // outer scopes.
82- batch, nextMarker, err = context.getListContainersBatch(marker)
83+ batch, err = context.getListContainersBatch(marker)
84 if err != nil {
85 return nil, err
86 }
87+ // The response may contain a NextMarker field, to let us request a
88+ // subsequent batch of results. The XML parser won't trim whitespace out
89+ // of the marker tag, so we do that here.
90+ nextMarker = strings.TrimSpace(batch.NextMarker)
91 containers = append(containers, batch.Containers...)
92 }
93
94@@ -334,11 +343,25 @@
95 return batch, nil
96 }
97
98-// Send a request to the storage service to list the blobs in a container.
99-func (context *StorageContext) ListBlobs(container string) (*BlobEnumerationResults, error) {
100+// getListBlobsBatch calls the "List Blobs" operation on the storage API, and
101+// returns a single batch of results.
102+// The marker argument should be empty for a new List Blobs request. For
103+// subsequent calls to get additional batches of the same result, pass the
104+// NextMarker from the previous call's result.
105+func (context *StorageContext) getListBlobsBatch(container, marker string) (*BlobEnumerationResults, error) {
106+ var err error
107 uri := interpolateURL(
108 "http://___.blob.core.windows.net/___?restype=container&comp=list",
109 context.Account, container)
110+ if marker != "" {
111+ // Add the marker argument. Do it after interpolation, in case the
112+ // marker string might contain the interpolation's placeholder
113+ // sequence.
114+ uri, err = addURLQueryParam(uri, "marker", marker)
115+ if err != nil {
116+ return nil, fmt.Errorf("malformed storage account URL '%s': %v", uri, err)
117+ }
118+ }
119 req, err := http.NewRequest("GET", uri, nil)
120 if err != nil {
121 return nil, err
122@@ -349,6 +372,39 @@
123 return blob, err
124 }
125
126+// ListBlobs requests from the API a list of blobs in a container.
127+func (context *StorageContext) ListBlobs(container string) (*BlobEnumerationResults, error) {
128+ blobs := make([]Blob, 0)
129+ var batch *BlobEnumerationResults
130+
131+ // Request the initial result, using the empty marker. Then, for as long
132+ // as the result has a nonempty NextMarker, request the next batch using
133+ // that marker.
134+ // This loop is very similar to the one in ListContainers().
135+ for marker, nextMarker := "", "x"; nextMarker != ""; marker = nextMarker {
136+ var err error
137+ // Don't use := here or you'll shadow variables from the function's
138+ // outer scopes.
139+ batch, err = context.getListBlobsBatch(container, marker)
140+ if err != nil {
141+ return nil, err
142+ }
143+ // The response may contain a NextMarker field, to let us request a
144+ // subsequent batch of results. The XML parser won't trim whitespace out
145+ // of the marker tag, so we do that here.
146+ nextMarker = strings.TrimSpace(batch.NextMarker)
147+ blobs = append(blobs, batch.Blobs...)
148+ }
149+
150+ // There's more in a BlobsEnumerationResults than just the blobs.
151+ // Return the latest batch, but give it the full cumulative blobs list
152+ // instead of just the last batch.
153+ // To the caller, this will look like they made one call to Azure's
154+ // List Blobs method, but batch size was unlimited.
155+ batch.Blobs = blobs
156+ return batch, nil
157+}
158+
159 // Send a request to the storage service to create a new container. If the
160 // request fails, error is non-nil.
161 func (context *StorageContext) CreateContainer(container string) error {
162
163=== modified file 'storage_base_test.go'
164--- storage_base_test.go 2013-04-26 03:58:24 +0000
165+++ storage_base_test.go 2013-04-30 05:28:25 +0000
166@@ -266,6 +266,48 @@
167 }
168 }
169
170+type TestAddURLQueryParam struct{}
171+
172+var _ = Suite(&TestAddURLQueryParam{})
173+
174+func (*TestAddURLQueryParam) TestAddURLQueryParamUsesBaseURL(c *C) {
175+ baseURL := "http://example.com"
176+
177+ extendedURL, err := addURLQueryParam(baseURL, "key", "value")
178+ c.Assert(err, IsNil)
179+
180+ parsedURL, err := url.Parse(extendedURL)
181+ c.Assert(err, IsNil)
182+ c.Check(parsedURL.Scheme, Equals, "http")
183+ c.Check(parsedURL.Host, Equals, "example.com")
184+}
185+
186+func (suite *TestAddURLQueryParam) TestEscapesParam(c *C) {
187+ key := "key&key"
188+ value := "value%value"
189+
190+ uri, err := addURLQueryParam("http://example.com", key, value)
191+ c.Assert(err, IsNil)
192+
193+ parsedURL, err := url.Parse(uri)
194+ c.Assert(err, IsNil)
195+ c.Check(parsedURL.Query()[key], DeepEquals, []string{value})
196+}
197+
198+// The parameter may safely contain the placeholder that we happen to use
199+// for URL interpolation.
200+func (suite *TestAddURLQueryParam) TestDoesNotInterpolateParam(c *C) {
201+ key := "key" + interpolationPlaceholder + "key"
202+ value := "value" + interpolationPlaceholder + "value"
203+
204+ uri, err := addURLQueryParam("http://example.com", key, value)
205+ c.Assert(err, IsNil)
206+
207+ parsedURL, err := url.Parse(uri)
208+ c.Assert(err, IsNil)
209+ c.Check(parsedURL.Query()[key], DeepEquals, []string{value})
210+}
211+
212 type TestListContainers struct{}
213
214 var _ = Suite(&TestListContainers{})
215@@ -391,7 +433,7 @@
216
217 // Call getListContainersBatch. This will fail because of the empty
218 // response, but no matter. We only care about the request.
219- _, _, err := context.getListContainersBatch("thismarkerhere")
220+ _, err := context.getListContainersBatch("thismarkerhere")
221 c.Assert(err, ErrorMatches, ".*Failed to deserialize data.*")
222 c.Assert(transport.ExchangeCount, Equals, 1)
223
224@@ -408,7 +450,7 @@
225 context.client = &http.Client{Transport: &transport}
226
227 // The error is OK. We only care about the request.
228- _, _, err := context.getListContainersBatch("")
229+ _, err := context.getListContainersBatch("")
230 c.Assert(err, ErrorMatches, ".*Failed to deserialize data.*")
231 c.Assert(transport.ExchangeCount, Equals, 1)
232
233@@ -427,7 +469,7 @@
234 context.client = &http.Client{Transport: &transport}
235
236 // The error is OK. We only care about the request.
237- _, _, err := context.getListContainersBatch("x&y")
238+ _, err := context.getListContainersBatch("x&y")
239 c.Assert(err, ErrorMatches, ".*Failed to deserialize data.*")
240 c.Assert(transport.ExchangeCount, Equals, 1)
241
242@@ -448,7 +490,7 @@
243
244 // An error about the http response is fine. What matters is that (1) we
245 // don't fail while putting together the URL, and (2) we get the right URL.
246- _, _, err := context.getListContainersBatch(marker)
247+ _, err := context.getListContainersBatch(marker)
248 c.Assert(err, ErrorMatches, ".*Failed to deserialize data.*")
249 c.Assert(transport.ExchangeCount, Equals, 1)
250
251@@ -549,6 +591,87 @@
252 c.Assert(err, NotNil)
253 }
254
255+// ListBlobs combines multiple batches of output.
256+func (suite *TestListBlobs) TestBatchedResult(c *C) {
257+ firstBlob := "blob1"
258+ lastBlob := "blob2"
259+ marker := "moreplease"
260+ firstBatch := http.Response{
261+ StatusCode: http.StatusOK,
262+ Body: makeResponseBody(fmt.Sprintf(`
263+ <EnumerationResults>
264+ <Blobs>
265+ <Blob>
266+ <Name>%s</Name>
267+ </Blob>
268+ </Blobs>
269+ <NextMarker>%s</NextMarker>
270+ </EnumerationResults>
271+ `, firstBlob, marker)),
272+ }
273+ lastBatch := http.Response{
274+ StatusCode: http.StatusOK,
275+ Body: makeResponseBody(fmt.Sprintf(`
276+ <EnumerationResults>
277+ <Blobs>
278+ <Blob>
279+ <Name>%s</Name>
280+ </Blob>
281+ </Blobs>
282+ </EnumerationResults>
283+ `, lastBlob)),
284+ }
285+ transport := TestTransport2{}
286+ transport.AddExchange(&firstBatch, nil)
287+ transport.AddExchange(&lastBatch, nil)
288+ context := makeStorageContext()
289+ context.client = &http.Client{Transport: &transport}
290+
291+ blobs, err := context.ListBlobs("mycontainer")
292+ c.Assert(err, IsNil)
293+
294+ c.Check(len(blobs.Blobs), Equals, 2)
295+ c.Check(blobs.Blobs[0].Name, Equals, firstBlob)
296+ c.Check(blobs.Blobs[1].Name, Equals, lastBlob)
297+}
298+
299+func (suite *TestListBlobs) TestGetListBlobsBatchPassesMarker(c *C) {
300+ transport := TestTransport2{}
301+ transport.AddExchange(&http.Response{StatusCode: http.StatusOK, Body: Empty}, nil)
302+ context := makeStorageContext()
303+ context.client = &http.Client{Transport: &transport}
304+
305+ // Call getListBlobsBatch. This will fail because of the empty
306+ // response, but no matter. We only care about the request.
307+ _, err := context.getListBlobsBatch("mycontainer", "thismarkerhere")
308+ c.Assert(err, ErrorMatches, ".*Failed to deserialize data.*")
309+ c.Assert(transport.ExchangeCount, Equals, 1)
310+
311+ query := transport.Exchanges[0].Request.URL.RawQuery
312+ values, err := url.ParseQuery(query)
313+ c.Assert(err, IsNil)
314+ c.Check(values["marker"], DeepEquals, []string{"thismarkerhere"})
315+}
316+
317+func (suite *TestListBlobs) TestGetListBlobsBatchDoesNotPassEmptyMarker(c *C) {
318+ transport := TestTransport2{}
319+ transport.AddExchange(&http.Response{StatusCode: http.StatusOK, Body: Empty}, nil)
320+ context := makeStorageContext()
321+ context.client = &http.Client{Transport: &transport}
322+
323+ // The error is OK. We only care about the request.
324+ _, err := context.getListBlobsBatch("mycontainer", "")
325+ c.Assert(err, ErrorMatches, ".*Failed to deserialize data.*")
326+ c.Assert(transport.ExchangeCount, Equals, 1)
327+
328+ query := transport.Exchanges[0].Request.URL.RawQuery
329+ values, err := url.ParseQuery(query)
330+ c.Assert(err, IsNil)
331+ marker, present := values["marker"]
332+ c.Check(present, Equals, false)
333+ c.Check(marker, DeepEquals, []string(nil))
334+}
335+
336 type TestCreateContainer struct{}
337
338 var _ = Suite(&TestCreateContainer{})
339
340=== modified file 'xmlobjects.go'
341--- xmlobjects.go 2013-04-02 08:51:18 +0000
342+++ xmlobjects.go 2013-04-30 05:28:25 +0000
343@@ -447,6 +447,7 @@
344 Delimiter string `xml:"Delimiter"`
345 Blobs []Blob `xml:"Blobs>Blob"`
346 BlobPrefixes []string `xml:"Blobs>BlobPrefix>Name"`
347+ NextMarker string `xml:"NextMarker"`
348 }
349
350 func (b *BlobEnumerationResults) Deserialize(data []byte) error {
351
352=== modified file 'xmlobjects_test.go'
353--- xmlobjects_test.go 2013-04-01 07:27:12 +0000
354+++ xmlobjects_test.go 2013-04-30 05:28:25 +0000
355@@ -383,6 +383,7 @@
356 c.Check(r.Prefix, Equals, "prefix")
357 c.Check(r.Marker, Equals, "marker")
358 c.Check(r.Delimiter, Equals, "delimiter")
359+ c.Check(r.NextMarker, Equals, "")
360 b := r.Blobs[0]
361 c.Check(b.Name, Equals, "blob-name")
362 c.Check(b.Snapshot, Equals, "snapshot-date-time")

Subscribers

People subscribed via source and target branches