Merge lp:~mterry/duplicity/disappearing-source into lp:duplicity/0.6

Proposed by Michael Terry
Status: Merged
Merged at revision: 935
Proposed branch: lp:~mterry/duplicity/disappearing-source
Merge into: lp:duplicity/0.6
Prerequisite: lp:~mterry/duplicity/manifest-oddities
Diff against target: 103 lines (+42/-2)
3 files modified
bin/duplicity (+5/-2)
duplicity/diffdir.py (+13/-0)
testing/tests/restarttest.py (+24/-0)
To merge this branch: bzr merge lp:~mterry/duplicity/disappearing-source
Reviewer Review Type Date Requested Status
duplicity-team Pending
Review via email: mp+195461@code.launchpad.net

Description of the change

When restarting a backup, we may accidentally skip the first chunk of one of the source files. To reproduce this,:
1) interrupt a backup
2) delete the source file it was in the middle of
3) restart the backup

When replaying the source iterator to find where to resume from, we can't notice that the file is gone until we've already iterated past where it would be!

The solution I came up with is to just let duplicity stuff the data we accidentally read back into the source iterator.

This is actually a data loss bug, because it's possible to back up corrupted files (that are missing their first chunk).

To post a comment you must log in.
933. By Michael Terry

Merge latest manifest-oddities changes

Revision history for this message
Michael Terry (mterry) wrote :

I wrote in https://code.launchpad.net/~mterry/duplicity/catch-seq-copy-error/+merge/186106:

"I'm getting the feeling that all three bugs have the same root cause: a missing sequence object. 662442 can be explained by a missing patch in the sequence, and the other two can be explained by a missing first snapshot..."

This bug might explain those problems... A missing first snapshot if the file is tiny or a missing first patch in the sequence if it's larger.

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== modified file 'bin/duplicity'
--- bin/duplicity 2013-09-20 01:32:31 +0000
+++ bin/duplicity 2013-11-16 02:23:59 +0000
@@ -223,7 +223,8 @@
223 last_block = globals.restart.last_block223 last_block = globals.restart.last_block
224 try:224 try:
225 # Just spin our wheels225 # Just spin our wheels
226 while tarblock_iter.next():226 iter_result = tarblock_iter.next()
227 while iter_result:
227 if (tarblock_iter.previous_index == last_index):228 if (tarblock_iter.previous_index == last_index):
228 # If both the previous index and this index are done, exit now229 # If both the previous index and this index are done, exit now
229 # before we hit the next index, to prevent skipping its first230 # before we hit the next index, to prevent skipping its first
@@ -238,13 +239,15 @@
238 "Continuing restart on file %s.") %239 "Continuing restart on file %s.") %
239 ("/".join(last_index), "/".join(tarblock_iter.previous_index)),240 ("/".join(last_index), "/".join(tarblock_iter.previous_index)),
240 log.ErrorCode.restart_file_not_found)241 log.ErrorCode.restart_file_not_found)
242 # We went too far! Stuff the data back into place before restarting
243 tarblock_iter.queue_index_data(iter_result)
241 break244 break
245 iter_result = tarblock_iter.next()
242 except StopIteration:246 except StopIteration:
243 log.Warn(_("File %s missing in backup set.\n"247 log.Warn(_("File %s missing in backup set.\n"
244 "Continuing restart on file %s.") %248 "Continuing restart on file %s.") %
245 ("/".join(last_index), "/".join(tarblock_iter.previous_index)),249 ("/".join(last_index), "/".join(tarblock_iter.previous_index)),
246 log.ErrorCode.restart_file_not_found)250 log.ErrorCode.restart_file_not_found)
247 return 0
248251
249252
250def write_multivol(backup_type, tarblock_iter, man_outfp, sig_outfp, backend):253def write_multivol(backup_type, tarblock_iter, man_outfp, sig_outfp, backend):
251254
=== modified file 'duplicity/diffdir.py'
--- duplicity/diffdir.py 2013-04-27 14:48:39 +0000
+++ duplicity/diffdir.py 2013-11-16 02:23:59 +0000
@@ -469,6 +469,7 @@
469 self.remember_next = False # see remember_next_index()469 self.remember_next = False # see remember_next_index()
470 self.remember_value = None # holds index of next block470 self.remember_value = None # holds index of next block
471 self.remember_block = None # holds block of next block471 self.remember_block = None # holds block of next block
472 self.queued_data = None # data to return in next next() call
472473
473 def tarinfo2tarblock(self, index, tarinfo, file_data = ""):474 def tarinfo2tarblock(self, index, tarinfo, file_data = ""):
474 """475 """
@@ -504,6 +505,12 @@
504 """505 """
505 Return next block and update offset506 Return next block and update offset
506 """507 """
508 if self.queued_data is not None:
509 result = self.queued_data
510 self.queued_data = None
511 # Keep rest of metadata as is (like previous_index)
512 return result
513
507 if self.process_waiting:514 if self.process_waiting:
508 result = self.process_continued()515 result = self.process_continued()
509 else:516 else:
@@ -532,6 +539,12 @@
532 """539 """
533 return self.previous_index, self.previous_block540 return self.previous_index, self.previous_block
534541
542 def queue_index_data(self, data):
543 """
544 Next time next() is called, we will return data instead of processing
545 """
546 self.queued_data = data
547
535 def remember_next_index(self):548 def remember_next_index(self):
536 """549 """
537 When called, remember the index of the next block iterated550 When called, remember the index of the next block iterated
538551
=== modified file 'testing/tests/restarttest.py'
--- testing/tests/restarttest.py 2013-11-16 02:23:59 +0000
+++ testing/tests/restarttest.py 2013-11-16 02:23:59 +0000
@@ -471,6 +471,30 @@
471 # and verify we can restore471 # and verify we can restore
472 self.restore()472 self.restore()
473473
474 def test_changed_source_file_disappears(self):
475 """
476 Make sure we correctly handle restarting a backup when a file
477 disappears when we had been in the middle of backing it up. It's
478 possible that the first chunk of the next file will be skipped unless
479 we're careful.
480 """
481 source = 'testfiles/largefiles'
482 self.make_largefiles(count=1)
483 # intentionally interrupt initial backup
484 try:
485 self.backup("full", source, options = ["--vol 1", "--fail 2"])
486 self.fail()
487 except CmdError, e:
488 self.assertEqual(30, e.exit_status)
489 # now remove starting source data and make sure we add something after
490 assert not os.system("rm %s/*" % source)
491 assert not os.system("echo hello > %s/z" % source)
492 # finish backup
493 self.backup("full", source)
494 # and verify we can restore
495 self.restore()
496 assert not os.system("diff %s/z testfiles/restore_out/z" % source)
497
474498
475# Note that this class duplicates all the tests in RestartTest499# Note that this class duplicates all the tests in RestartTest
476class RestartTestWithoutEncryption(RestartTest):500class RestartTestWithoutEncryption(RestartTest):

Subscribers

People subscribed via source and target branches

to all changes: