Merge lp:~gary/launchpad/bug562828 into lp:launchpad

Proposed by Gary Poster
Status: Merged
Approved by: Aaron Bentley
Approved revision: no longer in the source branch.
Merged at revision: not available
Proposed branch: lp:~gary/launchpad/bug562828
Merge into: lp:launchpad
Diff against target: 170 lines (+129/-2)
2 files modified
lib/canonical/librarian/ftests/test_gc.py (+31/-0)
lib/canonical/librarian/librariangc.py (+98/-2)
To merge this branch: bzr merge lp:~gary/launchpad/bug562828
Reviewer Review Type Date Requested Status
Aaron Bentley (community) Approve
Review via email: mp+24044@code.launchpad.net

Commit message

Switch lib/canonical/librarian/librariangc.py to using a copy of Python 2.6's os.walk in order to support symlinks in our librarian file directory.

Description of the change

This switches lib/canonical/librarian/librariangc.py to using a copy of Python 2.6's os.walk per the discussion of bug 562828, in order to support symlinks in our librarian file directory.

To post a comment you must log in.
Revision history for this message
Gary Poster (gary) wrote :

Note that _walk in the diff is an almost-pure copy and paste from Python 2.6's os.walk. The only difference is that listdir became os.listdir.

Revision history for this message
Gary Poster (gary) wrote :

Note that I initialized content_id because it was failing when I did a "does my test fail without my fix" test. I don't know if it is a "real" problem but it seemed reasonable to do.

Revision history for this message
Aaron Bentley (abentley) :
review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'lib/canonical/librarian/ftests/test_gc.py'
2--- lib/canonical/librarian/ftests/test_gc.py 2010-03-16 10:08:38 +0000
3+++ lib/canonical/librarian/ftests/test_gc.py 2010-04-23 22:13:24 +0000
4@@ -7,6 +7,7 @@
5
6 import shutil
7 import sys
8+import tempfile
9 import os
10 from subprocess import Popen, PIPE, STDOUT
11 from cStringIO import StringIO
12@@ -559,6 +560,36 @@
13 # This should cope.
14 librariangc.delete_unwanted_files(self.con)
15
16+ def test_delete_unwanted_files_follows_symlinks(self):
17+ # In production, our tree has symlinks in it now. We need to be able
18+ # to cope.
19+ # First, let's make sure we have some trash.
20+ self.layer.switchDbUser(dbuser='testadmin')
21+ content = 'foo'
22+ self.client.addFile(
23+ 'foo.txt', len(content), StringIO(content), 'text/plain')
24+ # Roll back the database changes, leaving the file on disk.
25+ transaction.abort()
26+
27+ self.layer.switchDbUser(config.librarian_gc.dbuser)
28+
29+ # Now, we will move the directory containing the trash somewhere else
30+ # and make a symlink to it.
31+ original = os.path.join(config.librarian_server.root, '00', '00')
32+ newdir = tempfile.mkdtemp()
33+ alt = os.path.join(newdir, '00')
34+ shutil.move(original, alt)
35+ os.symlink(alt, original)
36+
37+ # Now we will do our thing. This is the actual test. It used to
38+ # fail.
39+ librariangc.delete_unwanted_files(self.con)
40+
41+ # Clean up.
42+ os.remove(original)
43+ shutil.move(alt, original)
44+ shutil.rmtree(newdir)
45+
46 def test_cronscript(self):
47 script_path = os.path.join(
48 config.root, 'cronscripts', 'librarian-gc.py'
49
50=== modified file 'lib/canonical/librarian/librariangc.py'
51--- lib/canonical/librarian/librariangc.py 2010-03-16 10:08:38 +0000
52+++ lib/canonical/librarian/librariangc.py 2010-04-23 22:13:24 +0000
53@@ -495,6 +495,99 @@
54 loop_tuner = DBLoopTuner(UnreferencedContentPruner(con), 5, log=log)
55 loop_tuner.run()
56
57+# XXX gary 2010-04-22 bug=569217
58+# We should remove this and use Python 2.6's os.walk once we switch to
59+# Python 2.6.
60+def _walk(top, topdown=True, onerror=None, followlinks=False):
61+ """Directory tree generator.
62+
63+ For each directory in the directory tree rooted at top (including top
64+ itself, but excluding '.' and '..'), yields a 3-tuple
65+
66+ dirpath, dirnames, filenames
67+
68+ dirpath is a string, the path to the directory. dirnames is a list of
69+ the names of the subdirectories in dirpath (excluding '.' and '..').
70+ filenames is a list of the names of the non-directory files in dirpath.
71+ Note that the names in the lists are just names, with no path components.
72+ To get a full path (which begins with top) to a file or directory in
73+ dirpath, do os.path.join(dirpath, name).
74+
75+ If optional arg 'topdown' is true or not specified, the triple for a
76+ directory is generated before the triples for any of its subdirectories
77+ (directories are generated top down). If topdown is false, the triple
78+ for a directory is generated after the triples for all of its
79+ subdirectories (directories are generated bottom up).
80+
81+ When topdown is true, the caller can modify the dirnames list in-place
82+ (e.g., via del or slice assignment), and walk will only recurse into the
83+ subdirectories whose names remain in dirnames; this can be used to prune
84+ the search, or to impose a specific order of visiting. Modifying
85+ dirnames when topdown is false is ineffective, since the directories in
86+ dirnames have already been generated by the time dirnames itself is
87+ generated.
88+
89+ By default errors from the os.listdir() call are ignored. If
90+ optional arg 'onerror' is specified, it should be a function; it
91+ will be called with one argument, an os.error instance. It can
92+ report the error to continue with the walk, or raise the exception
93+ to abort the walk. Note that the filename is available as the
94+ filename attribute of the exception object.
95+
96+ By default, os.walk does not follow symbolic links to subdirectories on
97+ systems that support them. In order to get this functionality, set the
98+ optional argument 'followlinks' to true.
99+
100+ Caution: if you pass a relative pathname for top, don't change the
101+ current working directory between resumptions of walk. walk never
102+ changes the current directory, and assumes that the client doesn't
103+ either.
104+
105+ Example:
106+
107+ import os
108+ from os.path import join, getsize
109+ for root, dirs, files in os.walk('python/Lib/email'):
110+ print root, "consumes",
111+ print sum([getsize(join(root, name)) for name in files]),
112+ print "bytes in", len(files), "non-directory files"
113+ if 'CVS' in dirs:
114+ dirs.remove('CVS') # don't visit CVS directories
115+ """
116+
117+ from os.path import join, isdir, islink
118+
119+ # We may not have read permission for top, in which case we can't
120+ # get a list of the files the directory contains. os.path.walk
121+ # always suppressed the exception then, rather than blow up for a
122+ # minor reason when (say) a thousand readable directories are still
123+ # left to visit. That logic is copied here.
124+ try:
125+ # Note that listdir and error are globals in this module due
126+ # to earlier import-*.
127+ names = os.listdir(top)
128+ except error, err:
129+ if onerror is not None:
130+ onerror(err)
131+ return
132+
133+ dirs, nondirs = [], []
134+ for name in names:
135+ if isdir(join(top, name)):
136+ dirs.append(name)
137+ else:
138+ nondirs.append(name)
139+
140+ if topdown:
141+ yield top, dirs, nondirs
142+ for name in dirs:
143+ path = join(top, name)
144+ if followlinks or not islink(path):
145+ for x in _walk(path, topdown, onerror, followlinks):
146+ yield x
147+ if not topdown:
148+ yield top, dirs, nondirs
149+
150
151 def delete_unwanted_files(con):
152 """Delete files found on disk that have no corresponding record in the
153@@ -524,12 +617,15 @@
154 return result[0]
155
156 removed_count = 0
157- next_wanted_content_id = -1
158+ content_id = next_wanted_content_id = -1
159
160 hex_content_id_re = re.compile('^[0-9a-f]{8}$')
161 ONE_DAY = 24 * 60 * 60
162
163- for dirpath, dirnames, filenames in os.walk(get_storage_root()):
164+ # XXX gary 2010-04-22 bug=569217
165+ # We should switch back to os.walk once we switch to Python 2.6.
166+ for dirpath, dirnames, filenames in _walk(
167+ get_storage_root(), followlinks=True):
168
169 # Ignore known and harmless noise in the Librarian storage area.
170 if 'incoming' in dirnames: