Merge lp:~mterry/duplicity/leftover-sigtar into lp:duplicity/0.6-series

Proposed by Michael Terry on 2012-09-19
Status: Merged
Merged at revision: 866
Proposed branch: lp:~mterry/duplicity/leftover-sigtar
Merge into: lp:duplicity/0.6-series
Diff against target: 387 lines (+123/-102)
3 files modified
bin/duplicity (+30/-19)
duplicity/collections.py (+45/-15)
testing/tests/cleanuptest.py (+48/-68)
To merge this branch: bzr merge lp:~mterry/duplicity/leftover-sigtar
Reviewer Review Type Date Requested Status
duplicity-team 2012-09-19 Pending
Review via email: mp+125285@code.launchpad.net

Description of the change

So currently, duplicity does not delete signature files when doing a remove-all-but-n operation. Seems wrong, since those signature files are now useless and take up space.

This branch does several things:
1) Make remove-all-but-n operate on chains. In practice it did before, since the sets it operated on always came from complete chains (i.e. it never used only some of the sets from a chain)
2) Add a new method to get all signature chains before a certain time.
3) Use this new method to also delete signature chains during remove-all-but operations.

And it cleans up the cleanuptest.py file:
1) Removes crufty, unused code
2) Disallows changing the destination folder for the test, which no one would ever want to do and isn't really supported anyway
3) Add some additional checks to the existing test
4) Adds two new methods to test remove-all-but-n and remove-all-inc-of-but-n-full

To post a comment you must log in.

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'bin/duplicity'
2--- bin/duplicity 2012-09-13 14:08:52 +0000
3+++ bin/duplicity 2012-09-19 17:36:20 +0000
4@@ -833,6 +833,10 @@
5 """Return string listing times of sets in setlist"""
6 return "\n".join(map(lambda s: dup_time.timetopretty(s.get_time()),
7 setlist))
8+ def chain_times_str(chainlist):
9+ """Return string listing times of chains in chainlist"""
10+ return "\n".join(map(lambda s: dup_time.timetopretty(s.end_time),
11+ chainlist))
12
13 req_list = col_stats.get_older_than_required(globals.remove_time)
14 if req_list:
15@@ -847,32 +851,39 @@
16 "However, it will not be deleted. To remove all your backups, "
17 "manually purge the repository."))
18
19- setlist = col_stats.get_older_than(globals.remove_time)
20- if not setlist:
21+ chainlist = col_stats.get_chains_older_than(globals.remove_time)
22+ if not chainlist:
23 log.Notice(_("No old backup sets found, nothing deleted."))
24 return
25 if globals.force:
26- log.Notice(gettext.ngettext("Deleting backup set at time:",
27- "Deleting backup sets at times:",
28- len(setlist)) +
29- "\n" + set_times_str(setlist))
30- setlist.reverse() # save oldest for last
31- for set in setlist:
32+ log.Notice(gettext.ngettext("Deleting backup chain at time:",
33+ "Deleting backup chains at times:",
34+ len(chainlist)) +
35+ "\n" + chain_times_str(chainlist))
36+ # Add signature files too, since they won't be needed anymore
37+ chainlist += col_stats.get_signature_chains_older_than(globals.remove_time)
38+ chainlist.reverse() # save oldest for last
39+ for chain in chainlist:
40 # if remove_all_inc_of_but_n_full_mode mode, remove only incrementals one and not full
41- if globals.dry_run:
42- log.Notice("(Not: dry-run) Deleting set " + set.type + " " + dup_time.timetopretty(set.get_time()))
43+ if globals.remove_all_inc_of_but_n_full_mode:
44+ if isinstance(chain, collections.SignatureChain):
45+ chain_desc = _("Deleting incremental signature chain %s")
46+ else:
47+ chain_desc = _("Deleting incremental backup chain %s")
48 else:
49- if globals.remove_all_inc_of_but_n_full_mode and (set.type != "inc") :
50- log.Notice("Not deleting set " + set.type + " " + dup_time.timetopretty(set.get_time()))
51- else :
52- log.Notice("Deleting set " + set.type + " " + dup_time.timetopretty(set.get_time()))
53- set.delete()
54+ if isinstance(chain, collections.SignatureChain):
55+ chain_desc = _("Deleting complete signature chain %s")
56+ else:
57+ chain_desc = _("Deleting complete backup chain %s")
58+ log.Notice(chain_desc % dup_time.timetopretty(chain.end_time))
59+ if not globals.dry_run:
60+ chain.delete(keep_full=globals.remove_all_inc_of_but_n_full_mode)
61 col_stats.set_values(sig_chain_warning=None)
62 else:
63- log.Notice(gettext.ngettext("Found old backup set at the following time:",
64- "Found old backup sets at the following times:",
65- len(setlist)) +
66- "\n" + set_times_str(setlist) + "\n" +
67+ log.Notice(gettext.ngettext("Found old backup chain at the following time:",
68+ "Found old backup chains at the following times:",
69+ len(chainlist)) +
70+ "\n" + chain_times_str(chainlist) + "\n" +
71 _("Rerun command with --force option to actually delete."))
72
73
74
75=== modified file 'duplicity/collections.py'
76--- duplicity/collections.py 2011-11-20 16:53:09 +0000
77+++ duplicity/collections.py 2012-09-19 17:36:20 +0000
78@@ -326,13 +326,13 @@
79 assert self.end_time
80 return True
81
82- def delete(self):
83+ def delete(self, keep_full=False):
84 """
85 Delete all sets in chain, in reverse order
86 """
87 for i in range(len(self.incset_list)-1, -1, -1):
88 self.incset_list[i].delete()
89- if self.fullset:
90+ if self.fullset and not keep_full:
91 self.fullset.delete()
92
93 def get_sets_at_time(self, time):
94@@ -530,7 +530,7 @@
95 filename_to_fileobj = self.backend.get_fileobj_read
96 return map(filename_to_fileobj, self.get_filenames(time))
97
98- def delete(self):
99+ def delete(self, keep_full=False):
100 """
101 Remove all files in signature set
102 """
103@@ -538,12 +538,14 @@
104 if self.archive_dir:
105 for i in range(len(self.inclist)-1, -1, -1):
106 self.archive_dir.append(self.inclist[i]).delete()
107- self.archive_dir.append(self.fullsig).delete()
108+ if not keep_full:
109+ self.archive_dir.append(self.fullsig).delete()
110 else:
111 assert self.backend
112 inclist_copy = self.inclist[:]
113 inclist_copy.reverse()
114- inclist_copy.append(self.fullsig)
115+ if not keep_full:
116+ inclist_copy.append(self.fullsig)
117 self.backend.delete(inclist_copy)
118
119 def get_filenames(self, time = None):
120@@ -1009,8 +1011,6 @@
121 if self.matched_chain_pair:
122 matched_sig_chain = self.matched_chain_pair[0]
123 for sig_chain in self.all_sig_chains:
124- print sig_chain.start_time, matched_sig_chain.start_time,
125- print sig_chain.end_time, matched_sig_chain.end_time
126 if (sig_chain.start_time == matched_sig_chain.start_time and
127 sig_chain.end_time == matched_sig_chain.end_time):
128 old_sig_chains.remove(sig_chain)
129@@ -1032,10 +1032,43 @@
130
131 def get_chains_older_than(self, t):
132 """
133- Return a list of chains older than time t
134- """
135- assert self.values_set
136- return filter(lambda c: c.end_time < t, self.all_backup_chains)
137+ Returns a list of backup chains older than the given time t
138+
139+ All of the times will be associated with an intact chain.
140+ Furthermore, none of the times will be of a chain which a newer
141+ set may depend on. For instance, if set A is a full set older
142+ than t, and set B is an incremental based on A which is newer
143+ than t, then the time of set A will not be returned.
144+ """
145+ assert self.values_set
146+ old_chains = []
147+ for chain in self.all_backup_chains:
148+ if chain.end_time < t and (
149+ not self.matched_chain_pair or
150+ chain is not self.matched_chain_pair[1]):
151+ # don't delete the active (matched) chain
152+ old_chains.append(chain)
153+ return old_chains
154+
155+ def get_signature_chains_older_than(self, t):
156+ """
157+ Returns a list of signature chains older than the given time t
158+
159+ All of the times will be associated with an intact chain.
160+ Furthermore, none of the times will be of a chain which a newer
161+ set may depend on. For instance, if set A is a full set older
162+ than t, and set B is an incremental based on A which is newer
163+ than t, then the time of set A will not be returned.
164+ """
165+ assert self.values_set
166+ old_chains = []
167+ for chain in self.all_sig_chains:
168+ if chain.end_time < t and (
169+ not self.matched_chain_pair or
170+ chain is not self.matched_chain_pair[0]):
171+ # don't delete the active (matched) chain
172+ old_chains.append(chain)
173+ return old_chains
174
175 def get_last_full_backup_time(self):
176 """
177@@ -1098,10 +1131,7 @@
178 """
179 old_sets = []
180 for chain in self.get_chains_older_than(t):
181- if (not self.matched_chain_pair or
182- chain is not self.matched_chain_pair[1]):
183- # don't delete the active (matched) chain
184- old_sets.extend(chain.get_all_sets())
185+ old_sets.extend(chain.get_all_sets())
186 return self.sort_sets(old_sets)
187
188 def get_older_than_required(self, t):
189
190=== modified file 'testing/tests/cleanuptest.py'
191--- testing/tests/cleanuptest.py 2011-12-05 21:49:18 +0000
192+++ testing/tests/cleanuptest.py 2012-09-19 17:36:20 +0000
193@@ -27,16 +27,10 @@
194
195 helper.setup()
196
197-# This can be changed to select the URL to use
198-backend_url = "file://testfiles/output"
199-
200 # Extra arguments to be passed to duplicity
201 other_args = ["-v0", "--no-print-statistics"]
202 #other_args = []
203
204-# If this is set to true, after each backup, verify contents
205-verify = 1
206-
207 class CmdError(Exception):
208 """Indicates an error running an external command"""
209 pass
210@@ -47,6 +41,7 @@
211 """
212 def setUp(self):
213 assert not os.system("tar xzf testfiles.tar.gz > /dev/null 2>&1")
214+ self.deltmp()
215
216 def tearDown(self):
217 assert not os.system("rm -rf testfiles tempdir temp2.tar")
218@@ -55,6 +50,7 @@
219 """
220 Run duplicity binary with given arguments and options
221 """
222+ before_files = set(os.listdir("testfiles/output"))
223 options.append("--archive-dir testfiles/cache")
224 cmd_list = ["duplicity"]
225 cmd_list.extend(options + ["--allow-source-mismatch"])
226@@ -71,6 +67,8 @@
227 return_val = os.system(cmdline)
228 if return_val:
229 raise CmdError(return_val)
230+ after_files = set(os.listdir("testfiles/output"))
231+ return after_files - before_files
232
233 def backup(self, type, input_dir, options = [], current_time = None):
234 """
235@@ -79,108 +77,90 @@
236 options = options[:]
237 if type == "full":
238 options.insert(0, 'full')
239- args = [input_dir, "'%s'" % backend_url]
240- self.run_duplicity(args, options, current_time)
241+ args = [input_dir, "file://testfiles/output"]
242+ new_files = self.run_duplicity(args, options, current_time)
243 # If a chain ends with time X and the next full chain begins at time X,
244 # we may trigger an assert in collections.py. This way, we avoid
245 # such problems
246 time.sleep(1)
247-
248- def restore(self, file_to_restore = None, time = None, options = [],
249- current_time = None):
250- options = options[:] # just nip any mutability problems in bud
251- assert not os.system("rm -rf testfiles/restore_out")
252- args = ["'%s'" % backend_url, "testfiles/restore_out"]
253- if file_to_restore:
254- options.extend(['--file-to-restore', file_to_restore])
255- if time:
256- options.extend(['--restore-time', str(time)])
257- self.run_duplicity(args, options, current_time)
258+ return new_files
259
260 def verify(self, dirname, file_to_verify = None, time = None, options = [],
261 current_time = None):
262 options = ["verify"] + options[:]
263- args = ["'%s'" % backend_url, dirname]
264+ args = ["file://testfiles/output", dirname]
265 if file_to_verify:
266 options.extend(['--file-to-restore', file_to_verify])
267 if time:
268 options.extend(['--restore-time', str(time)])
269- self.run_duplicity(args, options, current_time)
270+ return self.run_duplicity(args, options, current_time)
271
272 def cleanup(self, options = []):
273 """
274 Run duplicity cleanup to default directory
275 """
276 options = ["cleanup"] + options[:]
277- args = ["'%s'" % backend_url]
278- self.run_duplicity(args, options)
279+ args = ["file://testfiles/output"]
280+ return self.run_duplicity(args, options)
281
282 def deltmp(self):
283 """
284 Delete temporary directories
285 """
286- assert not os.system("rm -rf testfiles/output "
287- "testfiles/restore_out testfiles/cache")
288+ assert not os.system("rm -rf testfiles/output testfiles/cache")
289 assert not os.system("mkdir testfiles/output testfiles/cache")
290- backend = duplicity.backend.get_backend(backend_url)
291+ backend = duplicity.backend.get_backend("file://testfiles/output")
292 bl = backend.list()
293 if bl:
294 backend.delete(backend.list())
295 backend.close()
296
297- def runtest(self, dirlist, backup_options = [], restore_options = []):
298- """
299- Run backup/restore test on directories in dirlist
300- """
301- assert len(dirlist) >= 1
302- self.deltmp()
303-
304- # Back up directories to local backend
305- current_time = 100000
306- self.backup("full", dirlist[0], current_time = current_time,
307- options = backup_options)
308- for new_dir in dirlist[1:]:
309- current_time += 100000
310- self.backup("inc", new_dir, current_time = current_time,
311- options = backup_options)
312-
313- # Restore each and compare them
314- for i in range(len(dirlist)):
315- dirname = dirlist[i]
316- current_time = 100000*(i + 1)
317- self.restore(time = current_time, options = restore_options)
318- self.check_same(dirname, "testfiles/restore_out")
319- if verify:
320- self.verify(dirname,
321- time = current_time, options = restore_options)
322-
323- def check_same(self, filename1, filename2):
324- """
325- Verify two filenames are the same
326- """
327- path1, path2 = path.Path(filename1), path.Path(filename2)
328- assert path1.compare_recursive(path2, verbose = 1)
329-
330 def test_cleanup_after_partial(self):
331 """
332 Regression test for https://bugs.launchpad.net/bugs/409593
333 where duplicity deletes all the signatures during a cleanup
334 after a failed backup.
335 """
336- #TODO: find something better than /etc for source
337- self.deltmp()
338- self.backup("full", "/etc", options = ["--vol 1"])
339- self.backup("inc", "/etc", options = ["--vol 1"])
340- self.backup("inc", "/etc", options = ["--vol 1"])
341+ good_files = self.backup("full", "/bin", options = ["--vol 1"])
342+ good_files |= self.backup("inc", "/bin", options = ["--vol 1"])
343+ good_files |= self.backup("inc", "/bin", options = ["--vol 1"])
344 # we know we're going to fail these, they are forced
345 try:
346- self.backup("full", "/etc", options = ["--vol 1", "--fail 1"])
347+ self.backup("full", "/bin", options = ["--vol 1", "--fail 1"])
348+ self.fail("Not supposed to reach this far")
349 except CmdError:
350- pass
351+ bad_files = set(os.listdir("testfiles/output"))
352+ bad_files -= good_files
353+ self.assertNotEqual(bad_files, set())
354 # the cleanup should go OK
355 self.cleanup(options = ["--force"])
356- self.backup("inc", "/etc", options = ["--vol 1"])
357- self.verify("/etc")
358+ leftovers = set(os.listdir("testfiles/output"))
359+ self.assertSetEqual(good_files, leftovers)
360+ self.backup("inc", "/bin", options = ["--vol 1"])
361+ self.verify("/bin")
362+
363+ def test_remove_all_but_n(self):
364+ """
365+ Test that remove-all-but-n works in the simple case.
366+ """
367+ full1_files = self.backup("full", "testfiles/empty_dir")
368+ full2_files = self.backup("full", "testfiles/empty_dir")
369+ self.run_duplicity(["file://testfiles/output"],
370+ ["remove-all-but-n", "1", "--force"])
371+ leftovers = set(os.listdir("testfiles/output"))
372+ self.assertSetEqual(full2_files, leftovers)
373+
374+ def test_remove_all_inc_of_but_n(self):
375+ """
376+ Test that remove-all-inc-of-but-n-full works in the simple case.
377+ """
378+ full1_files = self.backup("full", "testfiles/empty_dir")
379+ inc1_files = self.backup("inc", "testfiles/empty_dir")
380+ full2_files = self.backup("full", "testfiles/empty_dir")
381+ self.run_duplicity(["file://testfiles/output"],
382+ ["remove-all-inc-of-but-n-full", "1", "--force"])
383+ leftovers = set(os.listdir("testfiles/output"))
384+ self.assertSetEqual(full1_files | full2_files, leftovers)
385
386
387 if __name__ == "__main__":

Subscribers

People subscribed via source and target branches

to all changes: