Merge lp:~duplicity-team/duplicity/check-volumes into lp:duplicity/0.6

Proposed by Michael Terry
Status: Merged
Merged at revision: 782
Proposed branch: lp:~duplicity-team/duplicity/check-volumes
Merge into: lp:duplicity/0.6
Diff against target: 423 lines (+244/-10)
12 files modified
duplicity-bin (+18/-4)
duplicity/backend.py (+29/-0)
duplicity/backends/botobackend.py (+19/-0)
duplicity/backends/cloudfilesbackend.py (+19/-0)
duplicity/backends/giobackend.py (+17/-0)
duplicity/backends/localbackend.py (+13/-1)
duplicity/backends/u1backend.py (+37/-5)
duplicity/commandline.py (+4/-0)
duplicity/globals.py (+3/-0)
duplicity/log.py (+1/-0)
testing/alltests (+1/-0)
testing/badupload.py (+83/-0)
To merge this branch: bzr merge lp:~duplicity-team/duplicity/check-volumes
Reviewer Review Type Date Requested Status
Michael Terry Pending
Review via email: mp+72826@code.launchpad.net

Description of the change

This is the first pass of checking each volume's size as it is uploaded, as discussed in https://code.launchpad.net/~mterry/duplicity/early-catch-498933/+merge/72607

To post a comment you must log in.
Revision history for this message
edso (ed.so) wrote :

1 === modified file 'duplicity-bin'
...
8 - def put(tdp, dest_filename):
9 + def validate_block(tdp, dest_filename):
...
16 + if size != tdp.getsize():
17 + code_extra = "%s %d %d" % (util.escape(dest_filename), tdp.getsize(), size)
18 + log.FatalError(_("File %s was corrupted during upload.") % dest_filename,
19 + log.ErrorCode.volume_wrong_size, code_extra)

if we can't get a file size, we cannot assume that the file is corrupted, probably the backend only does not support it.

i am busy the next few days but will have a look at sftp/ftp implementations next week .. ede/duply.net

Revision history for this message
Michael Terry (mterry) wrote :

Having added and tested query support to the Ubuntu One, Rackspace, Amazon S3, GIO, and local backends, I'm marking this branch 'ready for review'.

Edso said he'd look into sftp/ftp later as well.

Revision history for this message
edso (ed.so) wrote :

On 29.08.2011 05:42, Michael Terry wrote:
> Michael Terry has proposed merging lp:~duplicity-team/duplicity/check-volumes into lp:duplicity.
>
> Requested reviews:
> Michael Terry (mterry)
>
> For more details, see:
> https://code.launchpad.net/~duplicity-team/duplicity/check-volumes/+merge/72826
>
> This is the first pass of checking each volume's size as it is uploaded, as discussed in https://code.launchpad.net/~mterry/duplicity/early-catch-498933/+merge/72607
>

mt,

did you see my comment on
https://code.launchpad.net/~duplicity-team/duplicity/check-volumes/+merge/72826
?

ede

Revision history for this message
Michael Terry (mterry) wrote :

> did you see my comment on
> https://code.launchpad.net/~duplicity-team/duplicity/check-volumes/+merge/72826
> ?

Yes. The code gracefully handles backends that don't support querying metadata. It only declares a volume corrupt if the backend successfully determined size (or lack of a file altogether).

Revision history for this message
edso (ed.so) wrote :

On 29.08.2011 14:42, Michael Terry wrote:
>> did you see my comment on
>> https://code.launchpad.net/~duplicity-team/duplicity/check-volumes/+merge/72826
>> ?
>
> Yes. The code gracefully handles backends that don't support querying metadata. It only declares a volume corrupt if the backend successfully determined size (or lack of a file altogether).

ah, i see it now.. sorry for the noise.. ede

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'duplicity-bin'
2--- duplicity-bin 2011-08-23 13:37:06 +0000
3+++ duplicity-bin 2011-08-29 03:36:23 +0000
4@@ -269,14 +269,28 @@
5 end_block -= 1
6 return start_index, start_block, end_index, end_block
7
8- def put(tdp, dest_filename):
9+ def validate_block(tdp, dest_filename):
10+ info = backend.query_info([dest_filename])[dest_filename]
11+ if 'size' not in info:
12+ return # backend didn't know how to query size
13+ size = info['size']
14+ if size is None:
15+ return # error querying file
16+ if size != tdp.getsize():
17+ code_extra = "%s %d %d" % (util.escape(dest_filename), tdp.getsize(), size)
18+ log.FatalError(_("File %s was corrupted during upload.") % dest_filename,
19+ log.ErrorCode.volume_wrong_size, code_extra)
20+
21+ def put(tdp, dest_filename, vol_num):
22 """
23 Retrieve file size *before* calling backend.put(), which may (at least
24 in case of the localbackend) rename the temporary file to the target
25 instead of copying.
26 """
27 putsize = tdp.getsize()
28- backend.put(tdp, dest_filename)
29+ if globals.skip_volume != vol_num: # for testing purposes only
30+ backend.put(tdp, dest_filename)
31+ validate_block(tdp, dest_filename)
32 if tdp.stat:
33 tdp.delete()
34 return putsize
35@@ -350,8 +364,8 @@
36 sig_outfp.flush()
37 man_outfp.flush()
38
39- async_waiters.append(io_scheduler.schedule_task(lambda tdp, dest_filename: put(tdp, dest_filename),
40- (tdp, dest_filename)))
41+ async_waiters.append(io_scheduler.schedule_task(lambda tdp, dest_filename, vol_num: put(tdp, dest_filename, vol_num),
42+ (tdp, dest_filename, vol_num)))
43
44 # Log human-readable version as well as raw numbers for machine consumers
45 log.Progress('Processed volume %d' % vol_num, diffdir.stats.SourceFileSize)
46
47=== modified file 'duplicity/backend.py'
48--- duplicity/backend.py 2011-08-06 15:57:54 +0000
49+++ duplicity/backend.py 2011-08-29 03:36:23 +0000
50@@ -361,6 +361,35 @@
51 """
52 raise NotImplementedError()
53
54+ # Should never cause FatalError.
55+ # Returns a dictionary of dictionaries. The outer dictionary maps
56+ # filenames to metadata dictionaries. Supported metadata are:
57+ #
58+ # 'size': if >= 0, size of file
59+ # if -1, file is not found
60+ # if None, error querying file
61+ #
62+ # Returned dictionary is guaranteed to contain a metadata dictionary for
63+ # each filename, but not all metadata are guaranteed to be present.
64+ def query_info(self, filename_list, raise_errors=True):
65+ """
66+ Return metadata about each filename in filename_list
67+ """
68+ info = {}
69+ if hasattr(self, '_query_list_info'):
70+ info = self._query_list_info(filename_list)
71+ elif hasattr(self, '_query_file_info'):
72+ for filename in filename_list:
73+ info[filename] = self._query_file_info(filename)
74+
75+ # Fill out any missing entries (may happen if backend has no support
76+ # or its query_list support is lazy)
77+ for filename in filename_list:
78+ if filename not in info:
79+ info[filename] = {}
80+
81+ return info
82+
83 """ use getpass by default, inherited backends may overwrite this behaviour """
84 use_getpass = True
85
86
87=== modified file 'duplicity/backends/botobackend.py'
88--- duplicity/backends/botobackend.py 2011-04-04 13:01:12 +0000
89+++ duplicity/backends/botobackend.py 2011-08-29 03:36:23 +0000
90@@ -26,6 +26,7 @@
91 from duplicity import log
92 from duplicity.errors import * #@UnusedWildImport
93 from duplicity.util import exception_traceback
94+from duplicity.backend import retry
95
96 class BotoBackend(duplicity.backend.Backend):
97 """
98@@ -294,6 +295,24 @@
99 self.bucket.delete_key(self.key_prefix + filename)
100 log.Debug("Deleted %s/%s" % (self.straight_url, filename))
101
102+ @retry
103+ def _query_file_info(self, filename, raise_errors=False):
104+ try:
105+ key = self.bucket.lookup(self.key_prefix + filename)
106+ if key is None:
107+ return {'size': -1}
108+ return {'size': key.size}
109+ except Exception, e:
110+ log.Warn("Query %s/%s failed: %s"
111+ "" % (self.straight_url,
112+ filename,
113+ str(e)))
114+ self.resetConnection()
115+ if raise_errors:
116+ raise e
117+ else:
118+ return {'size': None}
119+
120 duplicity.backend.register_backend("s3", BotoBackend)
121 duplicity.backend.register_backend("s3+http", BotoBackend)
122
123
124=== modified file 'duplicity/backends/cloudfilesbackend.py'
125--- duplicity/backends/cloudfilesbackend.py 2011-02-12 15:11:34 +0000
126+++ duplicity/backends/cloudfilesbackend.py 2011-08-29 03:36:23 +0000
127@@ -26,6 +26,7 @@
128 from duplicity import log
129 from duplicity.errors import * #@UnusedWildImport
130 from duplicity.util import exception_traceback
131+from duplicity.backend import retry
132
133 class CloudFilesBackend(duplicity.backend.Backend):
134 """
135@@ -140,4 +141,22 @@
136 self.container.delete_object(file)
137 log.Debug("Deleted '%s/%s'" % (self.container, file))
138
139+ @retry
140+ def _query_file_info(self, filename, raise_errors=False):
141+ from cloudfiles.errors import NoSuchObject
142+ try:
143+ sobject = self.container.get_object(filename)
144+ return {'size': sobject.size}
145+ except NoSuchObject:
146+ return {'size': -1}
147+ except Exception, e:
148+ log.Warn("Error querying '%s/%s': %s"
149+ "" % (self.container,
150+ filename,
151+ str(e)))
152+ if raise_errors:
153+ raise e
154+ else:
155+ return {'size': None}
156+
157 duplicity.backend.register_backend("cf+http", CloudFilesBackend)
158
159=== modified file 'duplicity/backends/giobackend.py'
160--- duplicity/backends/giobackend.py 2011-06-12 22:25:39 +0000
161+++ duplicity/backends/giobackend.py 2011-08-29 03:36:23 +0000
162@@ -164,3 +164,20 @@
163 self.handle_error(raise_errors, e, 'delete',
164 target_file.get_parse_name())
165 return
166+
167+ @retry
168+ def _query_file_info(self, filename, raise_errors=False):
169+ """Query attributes on filename"""
170+ target_file = self.remote_file.get_child(filename)
171+ attrs = gio.FILE_ATTRIBUTE_STANDARD_SIZE
172+ try:
173+ info = target_file.query_info(attrs, gio.FILE_QUERY_INFO_NONE)
174+ return {'size': info.get_size()}
175+ except Exception, e:
176+ if isinstance(e, gio.Error):
177+ if e.code == gio.ERROR_NOT_FOUND:
178+ return {'size': -1} # early exit, no need to retry
179+ if raise_errors:
180+ raise e
181+ else:
182+ return {'size': None}
183
184=== modified file 'duplicity/backends/localbackend.py'
185--- duplicity/backends/localbackend.py 2011-06-17 18:22:28 +0000
186+++ duplicity/backends/localbackend.py 2011-08-29 03:36:23 +0000
187@@ -57,7 +57,7 @@
188 code = log.ErrorCode.backend_no_space
189 extra = ' '.join([util.escape(x) for x in [file1, file2] if x])
190 extra = ' '.join([op, extra])
191- if op != 'delete':
192+ if op != 'delete' and op != 'query':
193 log.FatalError(str(e), code, extra)
194 else:
195 log.Warn(str(e), code, extra)
196@@ -110,5 +110,17 @@
197 except Exception, e:
198 self.handle_error(e, 'delete', self.remote_pathdir.append(filename).name)
199
200+ def _query_file_info(self, filename):
201+ """Query attributes on filename"""
202+ try:
203+ target_file = self.remote_pathdir.append(filename)
204+ if not os.path.exists(target_file.name):
205+ return {'size': -1}
206+ target_file.setdata()
207+ size = target_file.getsize()
208+ return {'size': size}
209+ except Exception, e:
210+ self.handle_error(e, 'query', target_file.name)
211+ return {'size': None}
212
213 duplicity.backend.register_backend("file", LocalBackend)
214
215=== modified file 'duplicity/backends/u1backend.py'
216--- duplicity/backends/u1backend.py 2011-08-17 14:25:52 +0000
217+++ duplicity/backends/u1backend.py 2011-08-29 03:36:23 +0000
218@@ -98,17 +98,15 @@
219 import urllib
220 return urllib.quote(url, safe="/~")
221
222- def handle_error(self, raise_error, op, headers, file1=None, file2=None, ignore=None):
223+ def parse_error(self, headers, ignore=None):
224 from duplicity import log
225- from duplicity import util
226- import json
227
228 status = int(headers[0].get('status'))
229 if status >= 200 and status < 300:
230- return
231+ return None
232
233 if ignore and status in ignore:
234- return
235+ return None
236
237 if status == 400:
238 code = log.ErrorCode.backend_permission_denied
239@@ -118,6 +116,18 @@
240 code = log.ErrorCode.backend_no_space
241 else:
242 code = log.ErrorCode.backend_error
243+ return code
244+
245+ def handle_error(self, raise_error, op, headers, file1=None, file2=None, ignore=None):
246+ from duplicity import log
247+ from duplicity import util
248+ import json
249+
250+ code = self.parse_error(headers, ignore)
251+ if code is None:
252+ return
253+
254+ status = int(headers[0].get('status'))
255
256 if file1:
257 file1 = file1.encode("utf8")
258@@ -222,5 +232,27 @@
259 answer = auth.request(remote_full, http_method="DELETE")
260 self.handle_error(raise_errors, 'delete', answer, remote_full, ignore=[404])
261
262+ @retry
263+ def _query_file_info(self, filename, raise_errors=False):
264+ """Query attributes on filename"""
265+ import json
266+ import ubuntuone.couch.auth as auth
267+ from duplicity import log
268+ remote_full = self.meta_base + self.quote(filename)
269+ answer = auth.request(remote_full)
270+
271+ code = self.parse_error(answer)
272+ if code is not None:
273+ if code == log.ErrorCode.backend_not_found:
274+ return {'size': -1}
275+ elif raise_errors:
276+ self.handle_error(raise_errors, 'query', answer, remote_full, filename)
277+ else:
278+ return {'size': None}
279+
280+ node = json.loads(answer[1])
281+ size = node.get('size')
282+ return {'size': size}
283+
284 duplicity.backend.register_backend("u1", U1Backend)
285 duplicity.backend.register_backend("u1+http", U1Backend)
286
287=== modified file 'duplicity/commandline.py'
288--- duplicity/commandline.py 2011-08-18 18:09:18 +0000
289+++ duplicity/commandline.py 2011-08-29 03:36:23 +0000
290@@ -292,6 +292,10 @@
291 parser.add_option("--fail-on-volume", type="int",
292 help=optparse.SUPPRESS_HELP)
293
294+ # used in testing only - skips upload for a given volume
295+ parser.add_option("--skip-volume", type="int",
296+ help=optparse.SUPPRESS_HELP)
297+
298 # If set, restore only the subdirectory or file specified, not the
299 # whole root.
300 # TRANSL: Used in usage help to represent a Unix-style path name. Example:
301
302=== modified file 'duplicity/globals.py'
303--- duplicity/globals.py 2011-08-18 18:09:18 +0000
304+++ duplicity/globals.py 2011-08-29 03:36:23 +0000
305@@ -200,6 +200,9 @@
306 # used in testing only - raises exception after volume
307 fail_on_volume = 0
308
309+# used in testing only - skips uploading a particular volume
310+skip_volume = 0
311+
312 # ignore (some) errors during operations; supposed to make it more
313 # likely that you are able to restore data under problematic
314 # circumstances. the default should absolutely always be True unless
315
316=== modified file 'duplicity/log.py'
317--- duplicity/log.py 2011-05-31 18:07:07 +0000
318+++ duplicity/log.py 2011-08-29 03:36:23 +0000
319@@ -189,6 +189,7 @@
320 gio_not_available = 40
321 source_dir_mismatch = 42 # 41 is reserved for par2
322 ftps_lftp_missing = 43
323+ volume_wrong_size = 44
324
325 # 50->69 reserved for backend errors
326 backend_error = 50
327
328=== modified file 'testing/alltests'
329--- testing/alltests 2009-08-12 17:43:42 +0000
330+++ testing/alltests 2011-08-29 03:36:23 +0000
331@@ -24,3 +24,4 @@
332 finaltest.py
333 restarttest.py
334 cleanuptest.py
335+badupload.py
336
337=== added file 'testing/badupload.py'
338--- testing/badupload.py 1970-01-01 00:00:00 +0000
339+++ testing/badupload.py 2011-08-29 03:36:23 +0000
340@@ -0,0 +1,83 @@
341+# -*- Mode:Python; indent-tabs-mode:nil; tab-width:4 -*-
342+#
343+# Copyright 2002 Ben Escoto <ben@emerose.org>
344+# Copyright 2007 Kenneth Loafman <kenneth@loafman.com>
345+# Copyright 2011 Canonical Ltd
346+#
347+# This file is part of duplicity.
348+#
349+# Duplicity is free software; you can redistribute it and/or modify it
350+# under the terms of the GNU General Public License as published by the
351+# Free Software Foundation; either version 2 of the License, or (at your
352+# option) any later version.
353+#
354+# Duplicity is distributed in the hope that it will be useful, but
355+# WITHOUT ANY WARRANTY; without even the implied warranty of
356+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
357+# General Public License for more details.
358+#
359+# You should have received a copy of the GNU General Public License
360+# along with duplicity; if not, write to the Free Software Foundation,
361+# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
362+
363+import config
364+import os, unittest, sys
365+sys.path.insert(0, "../")
366+
367+config.setup()
368+
369+# This can be changed to select the URL to use
370+backend_url = 'file://testfiles/output'
371+
372+class CmdError(Exception):
373+ """Indicates an error running an external command"""
374+ return_val = -1
375+ def __init__(self, return_val):
376+ self.return_val = os.WEXITSTATUS(return_val)
377+
378+class BadUploadTest(unittest.TestCase):
379+ """
380+ Test missing volume upload using duplicity binary
381+ """
382+ def setUp(self):
383+ assert not os.system("tar xzf testfiles.tar.gz > /dev/null 2>&1")
384+
385+ def tearDown(self):
386+ assert not os.system("rm -rf testfiles tempdir temp2.tar")
387+
388+ def run_duplicity(self, arglist, options = []):
389+ """
390+ Run duplicity binary with given arguments and options
391+ """
392+ options.append("--archive-dir testfiles/cache")
393+ cmd_list = ["../duplicity-bin"]
394+ cmd_list.extend(options + ["--allow-source-mismatch"])
395+ cmd_list.extend(arglist)
396+ cmdline = " ".join(cmd_list)
397+ if not os.environ.has_key('PASSPHRASE'):
398+ os.environ['PASSPHRASE'] = 'foobar'
399+ return_val = os.system(cmdline)
400+ if return_val:
401+ raise CmdError(return_val)
402+
403+ def backup(self, type, input_dir, options = []):
404+ """Run duplicity backup to default directory"""
405+ options = options[:]
406+ if type == "full":
407+ options.insert(0, 'full')
408+ args = [input_dir, "'%s'" % backend_url]
409+ self.run_duplicity(args, options)
410+
411+ def test_missing_file(self):
412+ """
413+ Test basic lost file
414+ """
415+ # we know we're going to fail this one, its forced
416+ try:
417+ self.backup("full", "testfiles/dir1", options = ["--skip-volume 1"])
418+ assert False # shouldn't get this far
419+ except CmdError, e:
420+ assert e.return_val == 44, e.return_val
421+
422+if __name__ == "__main__":
423+ unittest.main()

Subscribers

People subscribed via source and target branches