Merge lp:~duplicity-team/duplicity/check-volumes into lp:duplicity/0.6

Proposed by Michael Terry
Status: Merged
Merged at revision: 782
Proposed branch: lp:~duplicity-team/duplicity/check-volumes
Merge into: lp:duplicity/0.6
Diff against target: 423 lines (+244/-10)
12 files modified
duplicity-bin (+18/-4)
duplicity/backend.py (+29/-0)
duplicity/backends/botobackend.py (+19/-0)
duplicity/backends/cloudfilesbackend.py (+19/-0)
duplicity/backends/giobackend.py (+17/-0)
duplicity/backends/localbackend.py (+13/-1)
duplicity/backends/u1backend.py (+37/-5)
duplicity/commandline.py (+4/-0)
duplicity/globals.py (+3/-0)
duplicity/log.py (+1/-0)
testing/alltests (+1/-0)
testing/badupload.py (+83/-0)
To merge this branch: bzr merge lp:~duplicity-team/duplicity/check-volumes
Reviewer Review Type Date Requested Status
Michael Terry Pending
Review via email: mp+72826@code.launchpad.net

Description of the change

This is the first pass of checking each volume's size as it is uploaded, as discussed in https://code.launchpad.net/~mterry/duplicity/early-catch-498933/+merge/72607

To post a comment you must log in.
Revision history for this message
edso (ed.so) wrote :

1 === modified file 'duplicity-bin'
...
8 - def put(tdp, dest_filename):
9 + def validate_block(tdp, dest_filename):
...
16 + if size != tdp.getsize():
17 + code_extra = "%s %d %d" % (util.escape(dest_filename), tdp.getsize(), size)
18 + log.FatalError(_("File %s was corrupted during upload.") % dest_filename,
19 + log.ErrorCode.volume_wrong_size, code_extra)

if we can't get a file size, we cannot assume that the file is corrupted, probably the backend only does not support it.

i am busy the next few days but will have a look at sftp/ftp implementations next week .. ede/duply.net

Revision history for this message
Michael Terry (mterry) wrote :

Having added and tested query support to the Ubuntu One, Rackspace, Amazon S3, GIO, and local backends, I'm marking this branch 'ready for review'.

Edso said he'd look into sftp/ftp later as well.

Revision history for this message
edso (ed.so) wrote :

On 29.08.2011 05:42, Michael Terry wrote:
> Michael Terry has proposed merging lp:~duplicity-team/duplicity/check-volumes into lp:duplicity.
>
> Requested reviews:
> Michael Terry (mterry)
>
> For more details, see:
> https://code.launchpad.net/~duplicity-team/duplicity/check-volumes/+merge/72826
>
> This is the first pass of checking each volume's size as it is uploaded, as discussed in https://code.launchpad.net/~mterry/duplicity/early-catch-498933/+merge/72607
>

mt,

did you see my comment on
https://code.launchpad.net/~duplicity-team/duplicity/check-volumes/+merge/72826
?

ede

Revision history for this message
Michael Terry (mterry) wrote :

> did you see my comment on
> https://code.launchpad.net/~duplicity-team/duplicity/check-volumes/+merge/72826
> ?

Yes. The code gracefully handles backends that don't support querying metadata. It only declares a volume corrupt if the backend successfully determined size (or lack of a file altogether).

Revision history for this message
edso (ed.so) wrote :

On 29.08.2011 14:42, Michael Terry wrote:
>> did you see my comment on
>> https://code.launchpad.net/~duplicity-team/duplicity/check-volumes/+merge/72826
>> ?
>
> Yes. The code gracefully handles backends that don't support querying metadata. It only declares a volume corrupt if the backend successfully determined size (or lack of a file altogether).

ah, i see it now.. sorry for the noise.. ede

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== modified file 'duplicity-bin'
--- duplicity-bin 2011-08-23 13:37:06 +0000
+++ duplicity-bin 2011-08-29 03:36:23 +0000
@@ -269,14 +269,28 @@
269 end_block -= 1269 end_block -= 1
270 return start_index, start_block, end_index, end_block270 return start_index, start_block, end_index, end_block
271271
272 def put(tdp, dest_filename):272 def validate_block(tdp, dest_filename):
273 info = backend.query_info([dest_filename])[dest_filename]
274 if 'size' not in info:
275 return # backend didn't know how to query size
276 size = info['size']
277 if size is None:
278 return # error querying file
279 if size != tdp.getsize():
280 code_extra = "%s %d %d" % (util.escape(dest_filename), tdp.getsize(), size)
281 log.FatalError(_("File %s was corrupted during upload.") % dest_filename,
282 log.ErrorCode.volume_wrong_size, code_extra)
283
284 def put(tdp, dest_filename, vol_num):
273 """285 """
274 Retrieve file size *before* calling backend.put(), which may (at least286 Retrieve file size *before* calling backend.put(), which may (at least
275 in case of the localbackend) rename the temporary file to the target287 in case of the localbackend) rename the temporary file to the target
276 instead of copying.288 instead of copying.
277 """289 """
278 putsize = tdp.getsize()290 putsize = tdp.getsize()
279 backend.put(tdp, dest_filename)291 if globals.skip_volume != vol_num: # for testing purposes only
292 backend.put(tdp, dest_filename)
293 validate_block(tdp, dest_filename)
280 if tdp.stat:294 if tdp.stat:
281 tdp.delete()295 tdp.delete()
282 return putsize296 return putsize
@@ -350,8 +364,8 @@
350 sig_outfp.flush()364 sig_outfp.flush()
351 man_outfp.flush()365 man_outfp.flush()
352366
353 async_waiters.append(io_scheduler.schedule_task(lambda tdp, dest_filename: put(tdp, dest_filename),367 async_waiters.append(io_scheduler.schedule_task(lambda tdp, dest_filename, vol_num: put(tdp, dest_filename, vol_num),
354 (tdp, dest_filename)))368 (tdp, dest_filename, vol_num)))
355369
356 # Log human-readable version as well as raw numbers for machine consumers370 # Log human-readable version as well as raw numbers for machine consumers
357 log.Progress('Processed volume %d' % vol_num, diffdir.stats.SourceFileSize)371 log.Progress('Processed volume %d' % vol_num, diffdir.stats.SourceFileSize)
358372
=== modified file 'duplicity/backend.py'
--- duplicity/backend.py 2011-08-06 15:57:54 +0000
+++ duplicity/backend.py 2011-08-29 03:36:23 +0000
@@ -361,6 +361,35 @@
361 """361 """
362 raise NotImplementedError()362 raise NotImplementedError()
363363
364 # Should never cause FatalError.
365 # Returns a dictionary of dictionaries. The outer dictionary maps
366 # filenames to metadata dictionaries. Supported metadata are:
367 #
368 # 'size': if >= 0, size of file
369 # if -1, file is not found
370 # if None, error querying file
371 #
372 # Returned dictionary is guaranteed to contain a metadata dictionary for
373 # each filename, but not all metadata are guaranteed to be present.
374 def query_info(self, filename_list, raise_errors=True):
375 """
376 Return metadata about each filename in filename_list
377 """
378 info = {}
379 if hasattr(self, '_query_list_info'):
380 info = self._query_list_info(filename_list)
381 elif hasattr(self, '_query_file_info'):
382 for filename in filename_list:
383 info[filename] = self._query_file_info(filename)
384
385 # Fill out any missing entries (may happen if backend has no support
386 # or its query_list support is lazy)
387 for filename in filename_list:
388 if filename not in info:
389 info[filename] = {}
390
391 return info
392
364 """ use getpass by default, inherited backends may overwrite this behaviour """393 """ use getpass by default, inherited backends may overwrite this behaviour """
365 use_getpass = True394 use_getpass = True
366395
367396
=== modified file 'duplicity/backends/botobackend.py'
--- duplicity/backends/botobackend.py 2011-04-04 13:01:12 +0000
+++ duplicity/backends/botobackend.py 2011-08-29 03:36:23 +0000
@@ -26,6 +26,7 @@
26from duplicity import log26from duplicity import log
27from duplicity.errors import * #@UnusedWildImport27from duplicity.errors import * #@UnusedWildImport
28from duplicity.util import exception_traceback28from duplicity.util import exception_traceback
29from duplicity.backend import retry
2930
30class BotoBackend(duplicity.backend.Backend):31class BotoBackend(duplicity.backend.Backend):
31 """32 """
@@ -294,6 +295,24 @@
294 self.bucket.delete_key(self.key_prefix + filename)295 self.bucket.delete_key(self.key_prefix + filename)
295 log.Debug("Deleted %s/%s" % (self.straight_url, filename))296 log.Debug("Deleted %s/%s" % (self.straight_url, filename))
296297
298 @retry
299 def _query_file_info(self, filename, raise_errors=False):
300 try:
301 key = self.bucket.lookup(self.key_prefix + filename)
302 if key is None:
303 return {'size': -1}
304 return {'size': key.size}
305 except Exception, e:
306 log.Warn("Query %s/%s failed: %s"
307 "" % (self.straight_url,
308 filename,
309 str(e)))
310 self.resetConnection()
311 if raise_errors:
312 raise e
313 else:
314 return {'size': None}
315
297duplicity.backend.register_backend("s3", BotoBackend)316duplicity.backend.register_backend("s3", BotoBackend)
298duplicity.backend.register_backend("s3+http", BotoBackend)317duplicity.backend.register_backend("s3+http", BotoBackend)
299318
300319
=== modified file 'duplicity/backends/cloudfilesbackend.py'
--- duplicity/backends/cloudfilesbackend.py 2011-02-12 15:11:34 +0000
+++ duplicity/backends/cloudfilesbackend.py 2011-08-29 03:36:23 +0000
@@ -26,6 +26,7 @@
26from duplicity import log26from duplicity import log
27from duplicity.errors import * #@UnusedWildImport27from duplicity.errors import * #@UnusedWildImport
28from duplicity.util import exception_traceback28from duplicity.util import exception_traceback
29from duplicity.backend import retry
2930
30class CloudFilesBackend(duplicity.backend.Backend):31class CloudFilesBackend(duplicity.backend.Backend):
31 """32 """
@@ -140,4 +141,22 @@
140 self.container.delete_object(file)141 self.container.delete_object(file)
141 log.Debug("Deleted '%s/%s'" % (self.container, file))142 log.Debug("Deleted '%s/%s'" % (self.container, file))
142143
144 @retry
145 def _query_file_info(self, filename, raise_errors=False):
146 from cloudfiles.errors import NoSuchObject
147 try:
148 sobject = self.container.get_object(filename)
149 return {'size': sobject.size}
150 except NoSuchObject:
151 return {'size': -1}
152 except Exception, e:
153 log.Warn("Error querying '%s/%s': %s"
154 "" % (self.container,
155 filename,
156 str(e)))
157 if raise_errors:
158 raise e
159 else:
160 return {'size': None}
161
143duplicity.backend.register_backend("cf+http", CloudFilesBackend)162duplicity.backend.register_backend("cf+http", CloudFilesBackend)
144163
=== modified file 'duplicity/backends/giobackend.py'
--- duplicity/backends/giobackend.py 2011-06-12 22:25:39 +0000
+++ duplicity/backends/giobackend.py 2011-08-29 03:36:23 +0000
@@ -164,3 +164,20 @@
164 self.handle_error(raise_errors, e, 'delete',164 self.handle_error(raise_errors, e, 'delete',
165 target_file.get_parse_name())165 target_file.get_parse_name())
166 return166 return
167
168 @retry
169 def _query_file_info(self, filename, raise_errors=False):
170 """Query attributes on filename"""
171 target_file = self.remote_file.get_child(filename)
172 attrs = gio.FILE_ATTRIBUTE_STANDARD_SIZE
173 try:
174 info = target_file.query_info(attrs, gio.FILE_QUERY_INFO_NONE)
175 return {'size': info.get_size()}
176 except Exception, e:
177 if isinstance(e, gio.Error):
178 if e.code == gio.ERROR_NOT_FOUND:
179 return {'size': -1} # early exit, no need to retry
180 if raise_errors:
181 raise e
182 else:
183 return {'size': None}
167184
=== modified file 'duplicity/backends/localbackend.py'
--- duplicity/backends/localbackend.py 2011-06-17 18:22:28 +0000
+++ duplicity/backends/localbackend.py 2011-08-29 03:36:23 +0000
@@ -57,7 +57,7 @@
57 code = log.ErrorCode.backend_no_space57 code = log.ErrorCode.backend_no_space
58 extra = ' '.join([util.escape(x) for x in [file1, file2] if x])58 extra = ' '.join([util.escape(x) for x in [file1, file2] if x])
59 extra = ' '.join([op, extra])59 extra = ' '.join([op, extra])
60 if op != 'delete':60 if op != 'delete' and op != 'query':
61 log.FatalError(str(e), code, extra)61 log.FatalError(str(e), code, extra)
62 else:62 else:
63 log.Warn(str(e), code, extra)63 log.Warn(str(e), code, extra)
@@ -110,5 +110,17 @@
110 except Exception, e:110 except Exception, e:
111 self.handle_error(e, 'delete', self.remote_pathdir.append(filename).name)111 self.handle_error(e, 'delete', self.remote_pathdir.append(filename).name)
112112
113 def _query_file_info(self, filename):
114 """Query attributes on filename"""
115 try:
116 target_file = self.remote_pathdir.append(filename)
117 if not os.path.exists(target_file.name):
118 return {'size': -1}
119 target_file.setdata()
120 size = target_file.getsize()
121 return {'size': size}
122 except Exception, e:
123 self.handle_error(e, 'query', target_file.name)
124 return {'size': None}
113125
114duplicity.backend.register_backend("file", LocalBackend)126duplicity.backend.register_backend("file", LocalBackend)
115127
=== modified file 'duplicity/backends/u1backend.py'
--- duplicity/backends/u1backend.py 2011-08-17 14:25:52 +0000
+++ duplicity/backends/u1backend.py 2011-08-29 03:36:23 +0000
@@ -98,17 +98,15 @@
98 import urllib98 import urllib
99 return urllib.quote(url, safe="/~")99 return urllib.quote(url, safe="/~")
100100
101 def handle_error(self, raise_error, op, headers, file1=None, file2=None, ignore=None):101 def parse_error(self, headers, ignore=None):
102 from duplicity import log102 from duplicity import log
103 from duplicity import util
104 import json
105103
106 status = int(headers[0].get('status'))104 status = int(headers[0].get('status'))
107 if status >= 200 and status < 300:105 if status >= 200 and status < 300:
108 return106 return None
109107
110 if ignore and status in ignore:108 if ignore and status in ignore:
111 return109 return None
112110
113 if status == 400:111 if status == 400:
114 code = log.ErrorCode.backend_permission_denied112 code = log.ErrorCode.backend_permission_denied
@@ -118,6 +116,18 @@
118 code = log.ErrorCode.backend_no_space116 code = log.ErrorCode.backend_no_space
119 else:117 else:
120 code = log.ErrorCode.backend_error118 code = log.ErrorCode.backend_error
119 return code
120
121 def handle_error(self, raise_error, op, headers, file1=None, file2=None, ignore=None):
122 from duplicity import log
123 from duplicity import util
124 import json
125
126 code = self.parse_error(headers, ignore)
127 if code is None:
128 return
129
130 status = int(headers[0].get('status'))
121131
122 if file1:132 if file1:
123 file1 = file1.encode("utf8")133 file1 = file1.encode("utf8")
@@ -222,5 +232,27 @@
222 answer = auth.request(remote_full, http_method="DELETE")232 answer = auth.request(remote_full, http_method="DELETE")
223 self.handle_error(raise_errors, 'delete', answer, remote_full, ignore=[404])233 self.handle_error(raise_errors, 'delete', answer, remote_full, ignore=[404])
224234
235 @retry
236 def _query_file_info(self, filename, raise_errors=False):
237 """Query attributes on filename"""
238 import json
239 import ubuntuone.couch.auth as auth
240 from duplicity import log
241 remote_full = self.meta_base + self.quote(filename)
242 answer = auth.request(remote_full)
243
244 code = self.parse_error(answer)
245 if code is not None:
246 if code == log.ErrorCode.backend_not_found:
247 return {'size': -1}
248 elif raise_errors:
249 self.handle_error(raise_errors, 'query', answer, remote_full, filename)
250 else:
251 return {'size': None}
252
253 node = json.loads(answer[1])
254 size = node.get('size')
255 return {'size': size}
256
225duplicity.backend.register_backend("u1", U1Backend)257duplicity.backend.register_backend("u1", U1Backend)
226duplicity.backend.register_backend("u1+http", U1Backend)258duplicity.backend.register_backend("u1+http", U1Backend)
227259
=== modified file 'duplicity/commandline.py'
--- duplicity/commandline.py 2011-08-18 18:09:18 +0000
+++ duplicity/commandline.py 2011-08-29 03:36:23 +0000
@@ -292,6 +292,10 @@
292 parser.add_option("--fail-on-volume", type="int",292 parser.add_option("--fail-on-volume", type="int",
293 help=optparse.SUPPRESS_HELP)293 help=optparse.SUPPRESS_HELP)
294294
295 # used in testing only - skips upload for a given volume
296 parser.add_option("--skip-volume", type="int",
297 help=optparse.SUPPRESS_HELP)
298
295 # If set, restore only the subdirectory or file specified, not the299 # If set, restore only the subdirectory or file specified, not the
296 # whole root.300 # whole root.
297 # TRANSL: Used in usage help to represent a Unix-style path name. Example:301 # TRANSL: Used in usage help to represent a Unix-style path name. Example:
298302
=== modified file 'duplicity/globals.py'
--- duplicity/globals.py 2011-08-18 18:09:18 +0000
+++ duplicity/globals.py 2011-08-29 03:36:23 +0000
@@ -200,6 +200,9 @@
200# used in testing only - raises exception after volume200# used in testing only - raises exception after volume
201fail_on_volume = 0201fail_on_volume = 0
202202
203# used in testing only - skips uploading a particular volume
204skip_volume = 0
205
203# ignore (some) errors during operations; supposed to make it more206# ignore (some) errors during operations; supposed to make it more
204# likely that you are able to restore data under problematic207# likely that you are able to restore data under problematic
205# circumstances. the default should absolutely always be True unless208# circumstances. the default should absolutely always be True unless
206209
=== modified file 'duplicity/log.py'
--- duplicity/log.py 2011-05-31 18:07:07 +0000
+++ duplicity/log.py 2011-08-29 03:36:23 +0000
@@ -189,6 +189,7 @@
189 gio_not_available = 40189 gio_not_available = 40
190 source_dir_mismatch = 42 # 41 is reserved for par2190 source_dir_mismatch = 42 # 41 is reserved for par2
191 ftps_lftp_missing = 43191 ftps_lftp_missing = 43
192 volume_wrong_size = 44
192193
193 # 50->69 reserved for backend errors194 # 50->69 reserved for backend errors
194 backend_error = 50195 backend_error = 50
195196
=== modified file 'testing/alltests'
--- testing/alltests 2009-08-12 17:43:42 +0000
+++ testing/alltests 2011-08-29 03:36:23 +0000
@@ -24,3 +24,4 @@
24finaltest.py24finaltest.py
25restarttest.py25restarttest.py
26cleanuptest.py26cleanuptest.py
27badupload.py
2728
=== added file 'testing/badupload.py'
--- testing/badupload.py 1970-01-01 00:00:00 +0000
+++ testing/badupload.py 2011-08-29 03:36:23 +0000
@@ -0,0 +1,83 @@
1# -*- Mode:Python; indent-tabs-mode:nil; tab-width:4 -*-
2#
3# Copyright 2002 Ben Escoto <ben@emerose.org>
4# Copyright 2007 Kenneth Loafman <kenneth@loafman.com>
5# Copyright 2011 Canonical Ltd
6#
7# This file is part of duplicity.
8#
9# Duplicity is free software; you can redistribute it and/or modify it
10# under the terms of the GNU General Public License as published by the
11# Free Software Foundation; either version 2 of the License, or (at your
12# option) any later version.
13#
14# Duplicity is distributed in the hope that it will be useful, but
15# WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17# General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with duplicity; if not, write to the Free Software Foundation,
21# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22
23import config
24import os, unittest, sys
25sys.path.insert(0, "../")
26
27config.setup()
28
29# This can be changed to select the URL to use
30backend_url = 'file://testfiles/output'
31
32class CmdError(Exception):
33 """Indicates an error running an external command"""
34 return_val = -1
35 def __init__(self, return_val):
36 self.return_val = os.WEXITSTATUS(return_val)
37
38class BadUploadTest(unittest.TestCase):
39 """
40 Test missing volume upload using duplicity binary
41 """
42 def setUp(self):
43 assert not os.system("tar xzf testfiles.tar.gz > /dev/null 2>&1")
44
45 def tearDown(self):
46 assert not os.system("rm -rf testfiles tempdir temp2.tar")
47
48 def run_duplicity(self, arglist, options = []):
49 """
50 Run duplicity binary with given arguments and options
51 """
52 options.append("--archive-dir testfiles/cache")
53 cmd_list = ["../duplicity-bin"]
54 cmd_list.extend(options + ["--allow-source-mismatch"])
55 cmd_list.extend(arglist)
56 cmdline = " ".join(cmd_list)
57 if not os.environ.has_key('PASSPHRASE'):
58 os.environ['PASSPHRASE'] = 'foobar'
59 return_val = os.system(cmdline)
60 if return_val:
61 raise CmdError(return_val)
62
63 def backup(self, type, input_dir, options = []):
64 """Run duplicity backup to default directory"""
65 options = options[:]
66 if type == "full":
67 options.insert(0, 'full')
68 args = [input_dir, "'%s'" % backend_url]
69 self.run_duplicity(args, options)
70
71 def test_missing_file(self):
72 """
73 Test basic lost file
74 """
75 # we know we're going to fail this one, its forced
76 try:
77 self.backup("full", "testfiles/dir1", options = ["--skip-volume 1"])
78 assert False # shouldn't get this far
79 except CmdError, e:
80 assert e.return_val == 44, e.return_val
81
82if __name__ == "__main__":
83 unittest.main()

Subscribers

People subscribed via source and target branches