Merge lp:~duplicity-team/duplicity/check-volumes into lp:duplicity/0.6
- check-volumes
- Merge into 0.6-series
Status: | Merged |
---|---|
Merged at revision: | 782 |
Proposed branch: | lp:~duplicity-team/duplicity/check-volumes |
Merge into: | lp:duplicity/0.6 |
Diff against target: |
423 lines (+244/-10) 12 files modified
duplicity-bin (+18/-4) duplicity/backend.py (+29/-0) duplicity/backends/botobackend.py (+19/-0) duplicity/backends/cloudfilesbackend.py (+19/-0) duplicity/backends/giobackend.py (+17/-0) duplicity/backends/localbackend.py (+13/-1) duplicity/backends/u1backend.py (+37/-5) duplicity/commandline.py (+4/-0) duplicity/globals.py (+3/-0) duplicity/log.py (+1/-0) testing/alltests (+1/-0) testing/badupload.py (+83/-0) |
To merge this branch: | bzr merge lp:~duplicity-team/duplicity/check-volumes |
Related bugs: |
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
Michael Terry | Pending | ||
Review via email: mp+72826@code.launchpad.net |
Commit message
Description of the change
This is the first pass of checking each volume's size as it is uploaded, as discussed in https:/
edso (ed.so) wrote : | # |
Michael Terry (mterry) wrote : | # |
Having added and tested query support to the Ubuntu One, Rackspace, Amazon S3, GIO, and local backends, I'm marking this branch 'ready for review'.
Edso said he'd look into sftp/ftp later as well.
edso (ed.so) wrote : | # |
On 29.08.2011 05:42, Michael Terry wrote:
> Michael Terry has proposed merging lp:~duplicity-team/duplicity/check-volumes into lp:duplicity.
>
> Requested reviews:
> Michael Terry (mterry)
>
> For more details, see:
> https:/
>
> This is the first pass of checking each volume's size as it is uploaded, as discussed in https:/
>
mt,
did you see my comment on
https:/
?
ede
Michael Terry (mterry) wrote : | # |
> did you see my comment on
> https:/
> ?
Yes. The code gracefully handles backends that don't support querying metadata. It only declares a volume corrupt if the backend successfully determined size (or lack of a file altogether).
edso (ed.so) wrote : | # |
On 29.08.2011 14:42, Michael Terry wrote:
>> did you see my comment on
>> https:/
>> ?
>
> Yes. The code gracefully handles backends that don't support querying metadata. It only declares a volume corrupt if the backend successfully determined size (or lack of a file altogether).
ah, i see it now.. sorry for the noise.. ede
Preview Diff
1 | === modified file 'duplicity-bin' |
2 | --- duplicity-bin 2011-08-23 13:37:06 +0000 |
3 | +++ duplicity-bin 2011-08-29 03:36:23 +0000 |
4 | @@ -269,14 +269,28 @@ |
5 | end_block -= 1 |
6 | return start_index, start_block, end_index, end_block |
7 | |
8 | - def put(tdp, dest_filename): |
9 | + def validate_block(tdp, dest_filename): |
10 | + info = backend.query_info([dest_filename])[dest_filename] |
11 | + if 'size' not in info: |
12 | + return # backend didn't know how to query size |
13 | + size = info['size'] |
14 | + if size is None: |
15 | + return # error querying file |
16 | + if size != tdp.getsize(): |
17 | + code_extra = "%s %d %d" % (util.escape(dest_filename), tdp.getsize(), size) |
18 | + log.FatalError(_("File %s was corrupted during upload.") % dest_filename, |
19 | + log.ErrorCode.volume_wrong_size, code_extra) |
20 | + |
21 | + def put(tdp, dest_filename, vol_num): |
22 | """ |
23 | Retrieve file size *before* calling backend.put(), which may (at least |
24 | in case of the localbackend) rename the temporary file to the target |
25 | instead of copying. |
26 | """ |
27 | putsize = tdp.getsize() |
28 | - backend.put(tdp, dest_filename) |
29 | + if globals.skip_volume != vol_num: # for testing purposes only |
30 | + backend.put(tdp, dest_filename) |
31 | + validate_block(tdp, dest_filename) |
32 | if tdp.stat: |
33 | tdp.delete() |
34 | return putsize |
35 | @@ -350,8 +364,8 @@ |
36 | sig_outfp.flush() |
37 | man_outfp.flush() |
38 | |
39 | - async_waiters.append(io_scheduler.schedule_task(lambda tdp, dest_filename: put(tdp, dest_filename), |
40 | - (tdp, dest_filename))) |
41 | + async_waiters.append(io_scheduler.schedule_task(lambda tdp, dest_filename, vol_num: put(tdp, dest_filename, vol_num), |
42 | + (tdp, dest_filename, vol_num))) |
43 | |
44 | # Log human-readable version as well as raw numbers for machine consumers |
45 | log.Progress('Processed volume %d' % vol_num, diffdir.stats.SourceFileSize) |
46 | |
47 | === modified file 'duplicity/backend.py' |
48 | --- duplicity/backend.py 2011-08-06 15:57:54 +0000 |
49 | +++ duplicity/backend.py 2011-08-29 03:36:23 +0000 |
50 | @@ -361,6 +361,35 @@ |
51 | """ |
52 | raise NotImplementedError() |
53 | |
54 | + # Should never cause FatalError. |
55 | + # Returns a dictionary of dictionaries. The outer dictionary maps |
56 | + # filenames to metadata dictionaries. Supported metadata are: |
57 | + # |
58 | + # 'size': if >= 0, size of file |
59 | + # if -1, file is not found |
60 | + # if None, error querying file |
61 | + # |
62 | + # Returned dictionary is guaranteed to contain a metadata dictionary for |
63 | + # each filename, but not all metadata are guaranteed to be present. |
64 | + def query_info(self, filename_list, raise_errors=True): |
65 | + """ |
66 | + Return metadata about each filename in filename_list |
67 | + """ |
68 | + info = {} |
69 | + if hasattr(self, '_query_list_info'): |
70 | + info = self._query_list_info(filename_list) |
71 | + elif hasattr(self, '_query_file_info'): |
72 | + for filename in filename_list: |
73 | + info[filename] = self._query_file_info(filename) |
74 | + |
75 | + # Fill out any missing entries (may happen if backend has no support |
76 | + # or its query_list support is lazy) |
77 | + for filename in filename_list: |
78 | + if filename not in info: |
79 | + info[filename] = {} |
80 | + |
81 | + return info |
82 | + |
83 | """ use getpass by default, inherited backends may overwrite this behaviour """ |
84 | use_getpass = True |
85 | |
86 | |
87 | === modified file 'duplicity/backends/botobackend.py' |
88 | --- duplicity/backends/botobackend.py 2011-04-04 13:01:12 +0000 |
89 | +++ duplicity/backends/botobackend.py 2011-08-29 03:36:23 +0000 |
90 | @@ -26,6 +26,7 @@ |
91 | from duplicity import log |
92 | from duplicity.errors import * #@UnusedWildImport |
93 | from duplicity.util import exception_traceback |
94 | +from duplicity.backend import retry |
95 | |
96 | class BotoBackend(duplicity.backend.Backend): |
97 | """ |
98 | @@ -294,6 +295,24 @@ |
99 | self.bucket.delete_key(self.key_prefix + filename) |
100 | log.Debug("Deleted %s/%s" % (self.straight_url, filename)) |
101 | |
102 | + @retry |
103 | + def _query_file_info(self, filename, raise_errors=False): |
104 | + try: |
105 | + key = self.bucket.lookup(self.key_prefix + filename) |
106 | + if key is None: |
107 | + return {'size': -1} |
108 | + return {'size': key.size} |
109 | + except Exception, e: |
110 | + log.Warn("Query %s/%s failed: %s" |
111 | + "" % (self.straight_url, |
112 | + filename, |
113 | + str(e))) |
114 | + self.resetConnection() |
115 | + if raise_errors: |
116 | + raise e |
117 | + else: |
118 | + return {'size': None} |
119 | + |
120 | duplicity.backend.register_backend("s3", BotoBackend) |
121 | duplicity.backend.register_backend("s3+http", BotoBackend) |
122 | |
123 | |
124 | === modified file 'duplicity/backends/cloudfilesbackend.py' |
125 | --- duplicity/backends/cloudfilesbackend.py 2011-02-12 15:11:34 +0000 |
126 | +++ duplicity/backends/cloudfilesbackend.py 2011-08-29 03:36:23 +0000 |
127 | @@ -26,6 +26,7 @@ |
128 | from duplicity import log |
129 | from duplicity.errors import * #@UnusedWildImport |
130 | from duplicity.util import exception_traceback |
131 | +from duplicity.backend import retry |
132 | |
133 | class CloudFilesBackend(duplicity.backend.Backend): |
134 | """ |
135 | @@ -140,4 +141,22 @@ |
136 | self.container.delete_object(file) |
137 | log.Debug("Deleted '%s/%s'" % (self.container, file)) |
138 | |
139 | + @retry |
140 | + def _query_file_info(self, filename, raise_errors=False): |
141 | + from cloudfiles.errors import NoSuchObject |
142 | + try: |
143 | + sobject = self.container.get_object(filename) |
144 | + return {'size': sobject.size} |
145 | + except NoSuchObject: |
146 | + return {'size': -1} |
147 | + except Exception, e: |
148 | + log.Warn("Error querying '%s/%s': %s" |
149 | + "" % (self.container, |
150 | + filename, |
151 | + str(e))) |
152 | + if raise_errors: |
153 | + raise e |
154 | + else: |
155 | + return {'size': None} |
156 | + |
157 | duplicity.backend.register_backend("cf+http", CloudFilesBackend) |
158 | |
159 | === modified file 'duplicity/backends/giobackend.py' |
160 | --- duplicity/backends/giobackend.py 2011-06-12 22:25:39 +0000 |
161 | +++ duplicity/backends/giobackend.py 2011-08-29 03:36:23 +0000 |
162 | @@ -164,3 +164,20 @@ |
163 | self.handle_error(raise_errors, e, 'delete', |
164 | target_file.get_parse_name()) |
165 | return |
166 | + |
167 | + @retry |
168 | + def _query_file_info(self, filename, raise_errors=False): |
169 | + """Query attributes on filename""" |
170 | + target_file = self.remote_file.get_child(filename) |
171 | + attrs = gio.FILE_ATTRIBUTE_STANDARD_SIZE |
172 | + try: |
173 | + info = target_file.query_info(attrs, gio.FILE_QUERY_INFO_NONE) |
174 | + return {'size': info.get_size()} |
175 | + except Exception, e: |
176 | + if isinstance(e, gio.Error): |
177 | + if e.code == gio.ERROR_NOT_FOUND: |
178 | + return {'size': -1} # early exit, no need to retry |
179 | + if raise_errors: |
180 | + raise e |
181 | + else: |
182 | + return {'size': None} |
183 | |
184 | === modified file 'duplicity/backends/localbackend.py' |
185 | --- duplicity/backends/localbackend.py 2011-06-17 18:22:28 +0000 |
186 | +++ duplicity/backends/localbackend.py 2011-08-29 03:36:23 +0000 |
187 | @@ -57,7 +57,7 @@ |
188 | code = log.ErrorCode.backend_no_space |
189 | extra = ' '.join([util.escape(x) for x in [file1, file2] if x]) |
190 | extra = ' '.join([op, extra]) |
191 | - if op != 'delete': |
192 | + if op != 'delete' and op != 'query': |
193 | log.FatalError(str(e), code, extra) |
194 | else: |
195 | log.Warn(str(e), code, extra) |
196 | @@ -110,5 +110,17 @@ |
197 | except Exception, e: |
198 | self.handle_error(e, 'delete', self.remote_pathdir.append(filename).name) |
199 | |
200 | + def _query_file_info(self, filename): |
201 | + """Query attributes on filename""" |
202 | + try: |
203 | + target_file = self.remote_pathdir.append(filename) |
204 | + if not os.path.exists(target_file.name): |
205 | + return {'size': -1} |
206 | + target_file.setdata() |
207 | + size = target_file.getsize() |
208 | + return {'size': size} |
209 | + except Exception, e: |
210 | + self.handle_error(e, 'query', target_file.name) |
211 | + return {'size': None} |
212 | |
213 | duplicity.backend.register_backend("file", LocalBackend) |
214 | |
215 | === modified file 'duplicity/backends/u1backend.py' |
216 | --- duplicity/backends/u1backend.py 2011-08-17 14:25:52 +0000 |
217 | +++ duplicity/backends/u1backend.py 2011-08-29 03:36:23 +0000 |
218 | @@ -98,17 +98,15 @@ |
219 | import urllib |
220 | return urllib.quote(url, safe="/~") |
221 | |
222 | - def handle_error(self, raise_error, op, headers, file1=None, file2=None, ignore=None): |
223 | + def parse_error(self, headers, ignore=None): |
224 | from duplicity import log |
225 | - from duplicity import util |
226 | - import json |
227 | |
228 | status = int(headers[0].get('status')) |
229 | if status >= 200 and status < 300: |
230 | - return |
231 | + return None |
232 | |
233 | if ignore and status in ignore: |
234 | - return |
235 | + return None |
236 | |
237 | if status == 400: |
238 | code = log.ErrorCode.backend_permission_denied |
239 | @@ -118,6 +116,18 @@ |
240 | code = log.ErrorCode.backend_no_space |
241 | else: |
242 | code = log.ErrorCode.backend_error |
243 | + return code |
244 | + |
245 | + def handle_error(self, raise_error, op, headers, file1=None, file2=None, ignore=None): |
246 | + from duplicity import log |
247 | + from duplicity import util |
248 | + import json |
249 | + |
250 | + code = self.parse_error(headers, ignore) |
251 | + if code is None: |
252 | + return |
253 | + |
254 | + status = int(headers[0].get('status')) |
255 | |
256 | if file1: |
257 | file1 = file1.encode("utf8") |
258 | @@ -222,5 +232,27 @@ |
259 | answer = auth.request(remote_full, http_method="DELETE") |
260 | self.handle_error(raise_errors, 'delete', answer, remote_full, ignore=[404]) |
261 | |
262 | + @retry |
263 | + def _query_file_info(self, filename, raise_errors=False): |
264 | + """Query attributes on filename""" |
265 | + import json |
266 | + import ubuntuone.couch.auth as auth |
267 | + from duplicity import log |
268 | + remote_full = self.meta_base + self.quote(filename) |
269 | + answer = auth.request(remote_full) |
270 | + |
271 | + code = self.parse_error(answer) |
272 | + if code is not None: |
273 | + if code == log.ErrorCode.backend_not_found: |
274 | + return {'size': -1} |
275 | + elif raise_errors: |
276 | + self.handle_error(raise_errors, 'query', answer, remote_full, filename) |
277 | + else: |
278 | + return {'size': None} |
279 | + |
280 | + node = json.loads(answer[1]) |
281 | + size = node.get('size') |
282 | + return {'size': size} |
283 | + |
284 | duplicity.backend.register_backend("u1", U1Backend) |
285 | duplicity.backend.register_backend("u1+http", U1Backend) |
286 | |
287 | === modified file 'duplicity/commandline.py' |
288 | --- duplicity/commandline.py 2011-08-18 18:09:18 +0000 |
289 | +++ duplicity/commandline.py 2011-08-29 03:36:23 +0000 |
290 | @@ -292,6 +292,10 @@ |
291 | parser.add_option("--fail-on-volume", type="int", |
292 | help=optparse.SUPPRESS_HELP) |
293 | |
294 | + # used in testing only - skips upload for a given volume |
295 | + parser.add_option("--skip-volume", type="int", |
296 | + help=optparse.SUPPRESS_HELP) |
297 | + |
298 | # If set, restore only the subdirectory or file specified, not the |
299 | # whole root. |
300 | # TRANSL: Used in usage help to represent a Unix-style path name. Example: |
301 | |
302 | === modified file 'duplicity/globals.py' |
303 | --- duplicity/globals.py 2011-08-18 18:09:18 +0000 |
304 | +++ duplicity/globals.py 2011-08-29 03:36:23 +0000 |
305 | @@ -200,6 +200,9 @@ |
306 | # used in testing only - raises exception after volume |
307 | fail_on_volume = 0 |
308 | |
309 | +# used in testing only - skips uploading a particular volume |
310 | +skip_volume = 0 |
311 | + |
312 | # ignore (some) errors during operations; supposed to make it more |
313 | # likely that you are able to restore data under problematic |
314 | # circumstances. the default should absolutely always be True unless |
315 | |
316 | === modified file 'duplicity/log.py' |
317 | --- duplicity/log.py 2011-05-31 18:07:07 +0000 |
318 | +++ duplicity/log.py 2011-08-29 03:36:23 +0000 |
319 | @@ -189,6 +189,7 @@ |
320 | gio_not_available = 40 |
321 | source_dir_mismatch = 42 # 41 is reserved for par2 |
322 | ftps_lftp_missing = 43 |
323 | + volume_wrong_size = 44 |
324 | |
325 | # 50->69 reserved for backend errors |
326 | backend_error = 50 |
327 | |
328 | === modified file 'testing/alltests' |
329 | --- testing/alltests 2009-08-12 17:43:42 +0000 |
330 | +++ testing/alltests 2011-08-29 03:36:23 +0000 |
331 | @@ -24,3 +24,4 @@ |
332 | finaltest.py |
333 | restarttest.py |
334 | cleanuptest.py |
335 | +badupload.py |
336 | |
337 | === added file 'testing/badupload.py' |
338 | --- testing/badupload.py 1970-01-01 00:00:00 +0000 |
339 | +++ testing/badupload.py 2011-08-29 03:36:23 +0000 |
340 | @@ -0,0 +1,83 @@ |
341 | +# -*- Mode:Python; indent-tabs-mode:nil; tab-width:4 -*- |
342 | +# |
343 | +# Copyright 2002 Ben Escoto <ben@emerose.org> |
344 | +# Copyright 2007 Kenneth Loafman <kenneth@loafman.com> |
345 | +# Copyright 2011 Canonical Ltd |
346 | +# |
347 | +# This file is part of duplicity. |
348 | +# |
349 | +# Duplicity is free software; you can redistribute it and/or modify it |
350 | +# under the terms of the GNU General Public License as published by the |
351 | +# Free Software Foundation; either version 2 of the License, or (at your |
352 | +# option) any later version. |
353 | +# |
354 | +# Duplicity is distributed in the hope that it will be useful, but |
355 | +# WITHOUT ANY WARRANTY; without even the implied warranty of |
356 | +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
357 | +# General Public License for more details. |
358 | +# |
359 | +# You should have received a copy of the GNU General Public License |
360 | +# along with duplicity; if not, write to the Free Software Foundation, |
361 | +# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
362 | + |
363 | +import config |
364 | +import os, unittest, sys |
365 | +sys.path.insert(0, "../") |
366 | + |
367 | +config.setup() |
368 | + |
369 | +# This can be changed to select the URL to use |
370 | +backend_url = 'file://testfiles/output' |
371 | + |
372 | +class CmdError(Exception): |
373 | + """Indicates an error running an external command""" |
374 | + return_val = -1 |
375 | + def __init__(self, return_val): |
376 | + self.return_val = os.WEXITSTATUS(return_val) |
377 | + |
378 | +class BadUploadTest(unittest.TestCase): |
379 | + """ |
380 | + Test missing volume upload using duplicity binary |
381 | + """ |
382 | + def setUp(self): |
383 | + assert not os.system("tar xzf testfiles.tar.gz > /dev/null 2>&1") |
384 | + |
385 | + def tearDown(self): |
386 | + assert not os.system("rm -rf testfiles tempdir temp2.tar") |
387 | + |
388 | + def run_duplicity(self, arglist, options = []): |
389 | + """ |
390 | + Run duplicity binary with given arguments and options |
391 | + """ |
392 | + options.append("--archive-dir testfiles/cache") |
393 | + cmd_list = ["../duplicity-bin"] |
394 | + cmd_list.extend(options + ["--allow-source-mismatch"]) |
395 | + cmd_list.extend(arglist) |
396 | + cmdline = " ".join(cmd_list) |
397 | + if not os.environ.has_key('PASSPHRASE'): |
398 | + os.environ['PASSPHRASE'] = 'foobar' |
399 | + return_val = os.system(cmdline) |
400 | + if return_val: |
401 | + raise CmdError(return_val) |
402 | + |
403 | + def backup(self, type, input_dir, options = []): |
404 | + """Run duplicity backup to default directory""" |
405 | + options = options[:] |
406 | + if type == "full": |
407 | + options.insert(0, 'full') |
408 | + args = [input_dir, "'%s'" % backend_url] |
409 | + self.run_duplicity(args, options) |
410 | + |
411 | + def test_missing_file(self): |
412 | + """ |
413 | + Test basic lost file |
414 | + """ |
415 | + # we know we're going to fail this one, its forced |
416 | + try: |
417 | + self.backup("full", "testfiles/dir1", options = ["--skip-volume 1"]) |
418 | + assert False # shouldn't get this far |
419 | + except CmdError, e: |
420 | + assert e.return_val == 44, e.return_val |
421 | + |
422 | +if __name__ == "__main__": |
423 | + unittest.main() |
1 === modified file 'duplicity-bin' dest_filename) , tdp.getsize(), size) _("File %s was corrupted during upload.") % dest_filename, volume_ wrong_size, code_extra)
...
8 - def put(tdp, dest_filename):
9 + def validate_block(tdp, dest_filename):
...
16 + if size != tdp.getsize():
17 + code_extra = "%s %d %d" % (util.escape(
18 + log.FatalError(
19 + log.ErrorCode.
if we can't get a file size, we cannot assume that the file is corrupted, probably the backend only does not support it.
i am busy the next few days but will have a look at sftp/ftp implementations next week .. ede/duply.net