Merge lp:~mterry/duplicity/leftover-sigtar into lp:duplicity/0.6

Proposed by Michael Terry
Status: Merged
Merged at revision: 866
Proposed branch: lp:~mterry/duplicity/leftover-sigtar
Merge into: lp:duplicity/0.6
Diff against target: 387 lines (+123/-102)
3 files modified
bin/duplicity (+30/-19)
duplicity/collections.py (+45/-15)
testing/tests/cleanuptest.py (+48/-68)
To merge this branch: bzr merge lp:~mterry/duplicity/leftover-sigtar
Reviewer Review Type Date Requested Status
duplicity-team Pending
Review via email: mp+125285@code.launchpad.net

Description of the change

So currently, duplicity does not delete signature files when doing a remove-all-but-n operation. Seems wrong, since those signature files are now useless and take up space.

This branch does several things:
1) Make remove-all-but-n operate on chains. In practice it did before, since the sets it operated on always came from complete chains (i.e. it never used only some of the sets from a chain)
2) Add a new method to get all signature chains before a certain time.
3) Use this new method to also delete signature chains during remove-all-but operations.

And it cleans up the cleanuptest.py file:
1) Removes crufty, unused code
2) Disallows changing the destination folder for the test, which no one would ever want to do and isn't really supported anyway
3) Add some additional checks to the existing test
4) Adds two new methods to test remove-all-but-n and remove-all-inc-of-but-n-full

To post a comment you must log in.

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== modified file 'bin/duplicity'
--- bin/duplicity 2012-09-13 14:08:52 +0000
+++ bin/duplicity 2012-09-19 17:36:20 +0000
@@ -833,6 +833,10 @@
833 """Return string listing times of sets in setlist"""833 """Return string listing times of sets in setlist"""
834 return "\n".join(map(lambda s: dup_time.timetopretty(s.get_time()),834 return "\n".join(map(lambda s: dup_time.timetopretty(s.get_time()),
835 setlist))835 setlist))
836 def chain_times_str(chainlist):
837 """Return string listing times of chains in chainlist"""
838 return "\n".join(map(lambda s: dup_time.timetopretty(s.end_time),
839 chainlist))
836840
837 req_list = col_stats.get_older_than_required(globals.remove_time)841 req_list = col_stats.get_older_than_required(globals.remove_time)
838 if req_list:842 if req_list:
@@ -847,32 +851,39 @@
847 "However, it will not be deleted. To remove all your backups, "851 "However, it will not be deleted. To remove all your backups, "
848 "manually purge the repository."))852 "manually purge the repository."))
849853
850 setlist = col_stats.get_older_than(globals.remove_time)854 chainlist = col_stats.get_chains_older_than(globals.remove_time)
851 if not setlist:855 if not chainlist:
852 log.Notice(_("No old backup sets found, nothing deleted."))856 log.Notice(_("No old backup sets found, nothing deleted."))
853 return857 return
854 if globals.force:858 if globals.force:
855 log.Notice(gettext.ngettext("Deleting backup set at time:",859 log.Notice(gettext.ngettext("Deleting backup chain at time:",
856 "Deleting backup sets at times:",860 "Deleting backup chains at times:",
857 len(setlist)) +861 len(chainlist)) +
858 "\n" + set_times_str(setlist))862 "\n" + chain_times_str(chainlist))
859 setlist.reverse() # save oldest for last863 # Add signature files too, since they won't be needed anymore
860 for set in setlist:864 chainlist += col_stats.get_signature_chains_older_than(globals.remove_time)
865 chainlist.reverse() # save oldest for last
866 for chain in chainlist:
861 # if remove_all_inc_of_but_n_full_mode mode, remove only incrementals one and not full867 # if remove_all_inc_of_but_n_full_mode mode, remove only incrementals one and not full
862 if globals.dry_run:868 if globals.remove_all_inc_of_but_n_full_mode:
863 log.Notice("(Not: dry-run) Deleting set " + set.type + " " + dup_time.timetopretty(set.get_time()))869 if isinstance(chain, collections.SignatureChain):
870 chain_desc = _("Deleting incremental signature chain %s")
871 else:
872 chain_desc = _("Deleting incremental backup chain %s")
864 else:873 else:
865 if globals.remove_all_inc_of_but_n_full_mode and (set.type != "inc") :874 if isinstance(chain, collections.SignatureChain):
866 log.Notice("Not deleting set " + set.type + " " + dup_time.timetopretty(set.get_time()))875 chain_desc = _("Deleting complete signature chain %s")
867 else :876 else:
868 log.Notice("Deleting set " + set.type + " " + dup_time.timetopretty(set.get_time()))877 chain_desc = _("Deleting complete backup chain %s")
869 set.delete()878 log.Notice(chain_desc % dup_time.timetopretty(chain.end_time))
879 if not globals.dry_run:
880 chain.delete(keep_full=globals.remove_all_inc_of_but_n_full_mode)
870 col_stats.set_values(sig_chain_warning=None)881 col_stats.set_values(sig_chain_warning=None)
871 else:882 else:
872 log.Notice(gettext.ngettext("Found old backup set at the following time:",883 log.Notice(gettext.ngettext("Found old backup chain at the following time:",
873 "Found old backup sets at the following times:",884 "Found old backup chains at the following times:",
874 len(setlist)) +885 len(chainlist)) +
875 "\n" + set_times_str(setlist) + "\n" +886 "\n" + chain_times_str(chainlist) + "\n" +
876 _("Rerun command with --force option to actually delete."))887 _("Rerun command with --force option to actually delete."))
877888
878889
879890
=== modified file 'duplicity/collections.py'
--- duplicity/collections.py 2011-11-20 16:53:09 +0000
+++ duplicity/collections.py 2012-09-19 17:36:20 +0000
@@ -326,13 +326,13 @@
326 assert self.end_time326 assert self.end_time
327 return True327 return True
328328
329 def delete(self):329 def delete(self, keep_full=False):
330 """330 """
331 Delete all sets in chain, in reverse order331 Delete all sets in chain, in reverse order
332 """332 """
333 for i in range(len(self.incset_list)-1, -1, -1):333 for i in range(len(self.incset_list)-1, -1, -1):
334 self.incset_list[i].delete()334 self.incset_list[i].delete()
335 if self.fullset:335 if self.fullset and not keep_full:
336 self.fullset.delete()336 self.fullset.delete()
337337
338 def get_sets_at_time(self, time):338 def get_sets_at_time(self, time):
@@ -530,7 +530,7 @@
530 filename_to_fileobj = self.backend.get_fileobj_read530 filename_to_fileobj = self.backend.get_fileobj_read
531 return map(filename_to_fileobj, self.get_filenames(time))531 return map(filename_to_fileobj, self.get_filenames(time))
532532
533 def delete(self):533 def delete(self, keep_full=False):
534 """534 """
535 Remove all files in signature set535 Remove all files in signature set
536 """536 """
@@ -538,12 +538,14 @@
538 if self.archive_dir:538 if self.archive_dir:
539 for i in range(len(self.inclist)-1, -1, -1):539 for i in range(len(self.inclist)-1, -1, -1):
540 self.archive_dir.append(self.inclist[i]).delete()540 self.archive_dir.append(self.inclist[i]).delete()
541 self.archive_dir.append(self.fullsig).delete()541 if not keep_full:
542 self.archive_dir.append(self.fullsig).delete()
542 else:543 else:
543 assert self.backend544 assert self.backend
544 inclist_copy = self.inclist[:]545 inclist_copy = self.inclist[:]
545 inclist_copy.reverse()546 inclist_copy.reverse()
546 inclist_copy.append(self.fullsig)547 if not keep_full:
548 inclist_copy.append(self.fullsig)
547 self.backend.delete(inclist_copy)549 self.backend.delete(inclist_copy)
548550
549 def get_filenames(self, time = None):551 def get_filenames(self, time = None):
@@ -1009,8 +1011,6 @@
1009 if self.matched_chain_pair:1011 if self.matched_chain_pair:
1010 matched_sig_chain = self.matched_chain_pair[0]1012 matched_sig_chain = self.matched_chain_pair[0]
1011 for sig_chain in self.all_sig_chains:1013 for sig_chain in self.all_sig_chains:
1012 print sig_chain.start_time, matched_sig_chain.start_time,
1013 print sig_chain.end_time, matched_sig_chain.end_time
1014 if (sig_chain.start_time == matched_sig_chain.start_time and1014 if (sig_chain.start_time == matched_sig_chain.start_time and
1015 sig_chain.end_time == matched_sig_chain.end_time):1015 sig_chain.end_time == matched_sig_chain.end_time):
1016 old_sig_chains.remove(sig_chain)1016 old_sig_chains.remove(sig_chain)
@@ -1032,10 +1032,43 @@
10321032
1033 def get_chains_older_than(self, t):1033 def get_chains_older_than(self, t):
1034 """1034 """
1035 Return a list of chains older than time t1035 Returns a list of backup chains older than the given time t
1036 """1036
1037 assert self.values_set1037 All of the times will be associated with an intact chain.
1038 return filter(lambda c: c.end_time < t, self.all_backup_chains)1038 Furthermore, none of the times will be of a chain which a newer
1039 set may depend on. For instance, if set A is a full set older
1040 than t, and set B is an incremental based on A which is newer
1041 than t, then the time of set A will not be returned.
1042 """
1043 assert self.values_set
1044 old_chains = []
1045 for chain in self.all_backup_chains:
1046 if chain.end_time < t and (
1047 not self.matched_chain_pair or
1048 chain is not self.matched_chain_pair[1]):
1049 # don't delete the active (matched) chain
1050 old_chains.append(chain)
1051 return old_chains
1052
1053 def get_signature_chains_older_than(self, t):
1054 """
1055 Returns a list of signature chains older than the given time t
1056
1057 All of the times will be associated with an intact chain.
1058 Furthermore, none of the times will be of a chain which a newer
1059 set may depend on. For instance, if set A is a full set older
1060 than t, and set B is an incremental based on A which is newer
1061 than t, then the time of set A will not be returned.
1062 """
1063 assert self.values_set
1064 old_chains = []
1065 for chain in self.all_sig_chains:
1066 if chain.end_time < t and (
1067 not self.matched_chain_pair or
1068 chain is not self.matched_chain_pair[0]):
1069 # don't delete the active (matched) chain
1070 old_chains.append(chain)
1071 return old_chains
10391072
1040 def get_last_full_backup_time(self):1073 def get_last_full_backup_time(self):
1041 """1074 """
@@ -1098,10 +1131,7 @@
1098 """1131 """
1099 old_sets = []1132 old_sets = []
1100 for chain in self.get_chains_older_than(t):1133 for chain in self.get_chains_older_than(t):
1101 if (not self.matched_chain_pair or1134 old_sets.extend(chain.get_all_sets())
1102 chain is not self.matched_chain_pair[1]):
1103 # don't delete the active (matched) chain
1104 old_sets.extend(chain.get_all_sets())
1105 return self.sort_sets(old_sets)1135 return self.sort_sets(old_sets)
11061136
1107 def get_older_than_required(self, t):1137 def get_older_than_required(self, t):
11081138
=== modified file 'testing/tests/cleanuptest.py'
--- testing/tests/cleanuptest.py 2011-12-05 21:49:18 +0000
+++ testing/tests/cleanuptest.py 2012-09-19 17:36:20 +0000
@@ -27,16 +27,10 @@
2727
28helper.setup()28helper.setup()
2929
30# This can be changed to select the URL to use
31backend_url = "file://testfiles/output"
32
33# Extra arguments to be passed to duplicity30# Extra arguments to be passed to duplicity
34other_args = ["-v0", "--no-print-statistics"]31other_args = ["-v0", "--no-print-statistics"]
35#other_args = []32#other_args = []
3633
37# If this is set to true, after each backup, verify contents
38verify = 1
39
40class CmdError(Exception):34class CmdError(Exception):
41 """Indicates an error running an external command"""35 """Indicates an error running an external command"""
42 pass36 pass
@@ -47,6 +41,7 @@
47 """41 """
48 def setUp(self):42 def setUp(self):
49 assert not os.system("tar xzf testfiles.tar.gz > /dev/null 2>&1")43 assert not os.system("tar xzf testfiles.tar.gz > /dev/null 2>&1")
44 self.deltmp()
5045
51 def tearDown(self):46 def tearDown(self):
52 assert not os.system("rm -rf testfiles tempdir temp2.tar")47 assert not os.system("rm -rf testfiles tempdir temp2.tar")
@@ -55,6 +50,7 @@
55 """50 """
56 Run duplicity binary with given arguments and options51 Run duplicity binary with given arguments and options
57 """52 """
53 before_files = set(os.listdir("testfiles/output"))
58 options.append("--archive-dir testfiles/cache")54 options.append("--archive-dir testfiles/cache")
59 cmd_list = ["duplicity"]55 cmd_list = ["duplicity"]
60 cmd_list.extend(options + ["--allow-source-mismatch"])56 cmd_list.extend(options + ["--allow-source-mismatch"])
@@ -71,6 +67,8 @@
71 return_val = os.system(cmdline)67 return_val = os.system(cmdline)
72 if return_val:68 if return_val:
73 raise CmdError(return_val)69 raise CmdError(return_val)
70 after_files = set(os.listdir("testfiles/output"))
71 return after_files - before_files
7472
75 def backup(self, type, input_dir, options = [], current_time = None):73 def backup(self, type, input_dir, options = [], current_time = None):
76 """74 """
@@ -79,108 +77,90 @@
79 options = options[:]77 options = options[:]
80 if type == "full":78 if type == "full":
81 options.insert(0, 'full')79 options.insert(0, 'full')
82 args = [input_dir, "'%s'" % backend_url]80 args = [input_dir, "file://testfiles/output"]
83 self.run_duplicity(args, options, current_time)81 new_files = self.run_duplicity(args, options, current_time)
84 # If a chain ends with time X and the next full chain begins at time X,82 # If a chain ends with time X and the next full chain begins at time X,
85 # we may trigger an assert in collections.py. This way, we avoid83 # we may trigger an assert in collections.py. This way, we avoid
86 # such problems84 # such problems
87 time.sleep(1)85 time.sleep(1)
8886 return new_files
89 def restore(self, file_to_restore = None, time = None, options = [],
90 current_time = None):
91 options = options[:] # just nip any mutability problems in bud
92 assert not os.system("rm -rf testfiles/restore_out")
93 args = ["'%s'" % backend_url, "testfiles/restore_out"]
94 if file_to_restore:
95 options.extend(['--file-to-restore', file_to_restore])
96 if time:
97 options.extend(['--restore-time', str(time)])
98 self.run_duplicity(args, options, current_time)
9987
100 def verify(self, dirname, file_to_verify = None, time = None, options = [],88 def verify(self, dirname, file_to_verify = None, time = None, options = [],
101 current_time = None):89 current_time = None):
102 options = ["verify"] + options[:]90 options = ["verify"] + options[:]
103 args = ["'%s'" % backend_url, dirname]91 args = ["file://testfiles/output", dirname]
104 if file_to_verify:92 if file_to_verify:
105 options.extend(['--file-to-restore', file_to_verify])93 options.extend(['--file-to-restore', file_to_verify])
106 if time:94 if time:
107 options.extend(['--restore-time', str(time)])95 options.extend(['--restore-time', str(time)])
108 self.run_duplicity(args, options, current_time)96 return self.run_duplicity(args, options, current_time)
10997
110 def cleanup(self, options = []):98 def cleanup(self, options = []):
111 """99 """
112 Run duplicity cleanup to default directory100 Run duplicity cleanup to default directory
113 """101 """
114 options = ["cleanup"] + options[:]102 options = ["cleanup"] + options[:]
115 args = ["'%s'" % backend_url]103 args = ["file://testfiles/output"]
116 self.run_duplicity(args, options)104 return self.run_duplicity(args, options)
117105
118 def deltmp(self):106 def deltmp(self):
119 """107 """
120 Delete temporary directories108 Delete temporary directories
121 """109 """
122 assert not os.system("rm -rf testfiles/output "110 assert not os.system("rm -rf testfiles/output testfiles/cache")
123 "testfiles/restore_out testfiles/cache")
124 assert not os.system("mkdir testfiles/output testfiles/cache")111 assert not os.system("mkdir testfiles/output testfiles/cache")
125 backend = duplicity.backend.get_backend(backend_url)112 backend = duplicity.backend.get_backend("file://testfiles/output")
126 bl = backend.list()113 bl = backend.list()
127 if bl:114 if bl:
128 backend.delete(backend.list())115 backend.delete(backend.list())
129 backend.close()116 backend.close()
130117
131 def runtest(self, dirlist, backup_options = [], restore_options = []):
132 """
133 Run backup/restore test on directories in dirlist
134 """
135 assert len(dirlist) >= 1
136 self.deltmp()
137
138 # Back up directories to local backend
139 current_time = 100000
140 self.backup("full", dirlist[0], current_time = current_time,
141 options = backup_options)
142 for new_dir in dirlist[1:]:
143 current_time += 100000
144 self.backup("inc", new_dir, current_time = current_time,
145 options = backup_options)
146
147 # Restore each and compare them
148 for i in range(len(dirlist)):
149 dirname = dirlist[i]
150 current_time = 100000*(i + 1)
151 self.restore(time = current_time, options = restore_options)
152 self.check_same(dirname, "testfiles/restore_out")
153 if verify:
154 self.verify(dirname,
155 time = current_time, options = restore_options)
156
157 def check_same(self, filename1, filename2):
158 """
159 Verify two filenames are the same
160 """
161 path1, path2 = path.Path(filename1), path.Path(filename2)
162 assert path1.compare_recursive(path2, verbose = 1)
163
164 def test_cleanup_after_partial(self):118 def test_cleanup_after_partial(self):
165 """119 """
166 Regression test for https://bugs.launchpad.net/bugs/409593120 Regression test for https://bugs.launchpad.net/bugs/409593
167 where duplicity deletes all the signatures during a cleanup121 where duplicity deletes all the signatures during a cleanup
168 after a failed backup.122 after a failed backup.
169 """123 """
170 #TODO: find something better than /etc for source124 good_files = self.backup("full", "/bin", options = ["--vol 1"])
171 self.deltmp()125 good_files |= self.backup("inc", "/bin", options = ["--vol 1"])
172 self.backup("full", "/etc", options = ["--vol 1"])126 good_files |= self.backup("inc", "/bin", options = ["--vol 1"])
173 self.backup("inc", "/etc", options = ["--vol 1"])
174 self.backup("inc", "/etc", options = ["--vol 1"])
175 # we know we're going to fail these, they are forced127 # we know we're going to fail these, they are forced
176 try:128 try:
177 self.backup("full", "/etc", options = ["--vol 1", "--fail 1"])129 self.backup("full", "/bin", options = ["--vol 1", "--fail 1"])
130 self.fail("Not supposed to reach this far")
178 except CmdError:131 except CmdError:
179 pass132 bad_files = set(os.listdir("testfiles/output"))
133 bad_files -= good_files
134 self.assertNotEqual(bad_files, set())
180 # the cleanup should go OK135 # the cleanup should go OK
181 self.cleanup(options = ["--force"])136 self.cleanup(options = ["--force"])
182 self.backup("inc", "/etc", options = ["--vol 1"])137 leftovers = set(os.listdir("testfiles/output"))
183 self.verify("/etc")138 self.assertSetEqual(good_files, leftovers)
139 self.backup("inc", "/bin", options = ["--vol 1"])
140 self.verify("/bin")
141
142 def test_remove_all_but_n(self):
143 """
144 Test that remove-all-but-n works in the simple case.
145 """
146 full1_files = self.backup("full", "testfiles/empty_dir")
147 full2_files = self.backup("full", "testfiles/empty_dir")
148 self.run_duplicity(["file://testfiles/output"],
149 ["remove-all-but-n", "1", "--force"])
150 leftovers = set(os.listdir("testfiles/output"))
151 self.assertSetEqual(full2_files, leftovers)
152
153 def test_remove_all_inc_of_but_n(self):
154 """
155 Test that remove-all-inc-of-but-n-full works in the simple case.
156 """
157 full1_files = self.backup("full", "testfiles/empty_dir")
158 inc1_files = self.backup("inc", "testfiles/empty_dir")
159 full2_files = self.backup("full", "testfiles/empty_dir")
160 self.run_duplicity(["file://testfiles/output"],
161 ["remove-all-inc-of-but-n-full", "1", "--force"])
162 leftovers = set(os.listdir("testfiles/output"))
163 self.assertSetEqual(full1_files | full2_files, leftovers)
184164
185165
186if __name__ == "__main__":166if __name__ == "__main__":

Subscribers

People subscribed via source and target branches

to all changes: