Merge lp:~lifeless/python-oops-datedir-repo/less-rsync into lp:python-oops-datedir-repo

Proposed by Robert Collins
Status: Merged
Merged at revision: 20
Proposed branch: lp:~lifeless/python-oops-datedir-repo/less-rsync
Merge into: lp:python-oops-datedir-repo
Diff against target: 232 lines (+154/-3)
5 files modified
NEWS (+12/-0)
oops_datedir_repo/__init__.py (+1/-1)
oops_datedir_repo/repository.py (+62/-1)
oops_datedir_repo/tests/test_repository.py (+78/-0)
setup.py (+1/-1)
To merge this branch: bzr merge lp:~lifeless/python-oops-datedir-repo/less-rsync
Reviewer Review Type Date Requested Status
Steve Kowalik (community) code Approve
Review via email: mp+80861@code.launchpad.net

Commit message

This adds necessary support to have the datedir repo on each server scanner and uploaded to amqp (or any other publisher).

Description of the change

This adds necessary support to have the datedir repo on each server scanner and uploaded to amqp (or any other publisher) in the future. The glue code for this will go in a separate project to avoid inappropriate dependencies between the repo and the amqp publisher.

To post a comment you must log in.
Revision history for this message
Steve Kowalik (stevenk) :
review: Approve (code)

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== modified file 'NEWS'
--- NEWS 2011-10-10 02:50:20 +0000
+++ NEWS 2011-11-01 02:13:23 +0000
@@ -6,6 +6,18 @@
6NEXT6NEXT
7----7----
88
90.0.10
10------
11
12* New files are written to $name.tmp and then renamed to $name, allowing
13 readers to detect whether the file was finished or not.
14 (Robert Collins)
15
16* DateDirRepo.republish(publisher) can be used to treat a DateDirRepo as the
17 source of reports for feeding into a different publisher. This will remove
18 reports that are successfully republished.
19 (Robert Collins, #884551)
20
90.0.9210.0.9
10-----22-----
1123
1224
=== modified file 'oops_datedir_repo/__init__.py'
--- oops_datedir_repo/__init__.py 2011-10-10 02:51:42 +0000
+++ oops_datedir_repo/__init__.py 2011-11-01 02:13:23 +0000
@@ -25,7 +25,7 @@
25# established at this point, and setup.py will use a version of next-$(revno).25# established at this point, and setup.py will use a version of next-$(revno).
26# If the releaselevel is 'final', then the tarball will be major.minor.micro.26# If the releaselevel is 'final', then the tarball will be major.minor.micro.
27# Otherwise it is major.minor.micro~$(revno).27# Otherwise it is major.minor.micro~$(revno).
28__version__ = (0, 0, 9, 'beta', 0)28__version__ = (0, 0, 10, 'beta', 0)
2929
30__all__ = [30__all__ = [
31 'DateDirRepo',31 'DateDirRepo',
3232
=== modified file 'oops_datedir_repo/repository.py'
--- oops_datedir_repo/repository.py 2011-10-13 02:14:59 +0000
+++ oops_datedir_repo/repository.py 2011-11-01 02:13:23 +0000
@@ -23,12 +23,14 @@
23 ]23 ]
2424
25import datetime25import datetime
26from functools import partial
26from hashlib import md527from hashlib import md5
27import os.path28import os.path
28import stat29import stat
2930
30from pytz import utc31from pytz import utc
3132
33import serializer
32import serializer_bson34import serializer_bson
33import serializer_rfc82235import serializer_rfc822
34from uniquefileallocator import UniqueFileAllocator36from uniquefileallocator import UniqueFileAllocator
@@ -78,6 +80,10 @@
78 def publish(self, report, now=None):80 def publish(self, report, now=None):
79 """Write the report to disk.81 """Write the report to disk.
8082
83 The report is written to a temporary file, and then renamed to its
84 final location. Programs concurrently reading from a DateDirRepo
85 should ignore files ending in .tmp.
86
81 :param now: The datetime to use as the current time. Will be87 :param now: The datetime to use as the current time. Will be
82 determined if not supplied. Useful for testing.88 determined if not supplied. Useful for testing.
83 """89 """
@@ -100,7 +106,8 @@
100 if self.inherit_id:106 if self.inherit_id:
101 oopsid = report.get('id') or oopsid107 oopsid = report.get('id') or oopsid
102 report['id'] = oopsid108 report['id'] = oopsid
103 self.serializer.write(report, open(filename, 'wb'))109 self.serializer.write(report, open(filename + '.tmp', 'wb'))
110 os.rename(filename + '.tmp', filename)
104 if self.stash_path:111 if self.stash_path:
105 original_report['datedir_repo_filepath'] = filename112 original_report['datedir_repo_filepath'] = filename
106 # Set file permission to: rw-r--r-- (so that reports from113 # Set file permission to: rw-r--r-- (so that reports from
@@ -110,3 +117,57 @@
110 stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH)117 stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH)
111 os.chmod(filename, wanted_permission)118 os.chmod(filename, wanted_permission)
112 return report['id']119 return report['id']
120
121 def republish(self, publisher):
122 """Republish the contents of the DateDirRepo to another publisher.
123
124 This makes it easy to treat a DateDirRepo as a backing store in message
125 queue environments: if the message queue is down, flush to the
126 DateDirRepo, then later pick the OOPSes up and send them to the message
127 queue environment.
128
129 For instance:
130
131 >>> repo = DateDirRepo('.')
132 >>> repo.publish({'some':'report'})
133 >>> queue = []
134 >>> def queue_publisher(report):
135 ... queue.append(report)
136 ... return report['id']
137 >>> repo.republish(queue_publisher)
138
139 Will scan the disk and send the single found report to queue_publisher,
140 deleting the report afterwards.
141
142 Empty datedir directories are automatically cleaned up, as are stale
143 .tmp files.
144
145 If the publisher returns None, signalling that it did not publish the
146 report, then the report is not deleted from disk.
147 """
148 two_days = datetime.timedelta(2)
149 now = datetime.date.today()
150 old = now - two_days
151 for dirname in os.listdir(self.root):
152 try:
153 y, m, d = dirname.split('-')
154 except ValueError:
155 # Not a datedir
156 continue
157 date = datetime.date(int(y),int(m),int(d))
158 prune = date < old
159 dirpath = os.path.join(self.root, dirname)
160 files = os.listdir(dirpath)
161 if not files and prune:
162 # Cleanup no longer needed directory.
163 os.rmdir(dirpath)
164 for candidate in map(partial(os.path.join, dirpath), files):
165 if candidate.endswith('.tmp'):
166 if prune:
167 os.unlink(candidate)
168 continue
169 with file(candidate, 'rb') as report_file:
170 report = serializer.read(report_file)
171 oopsid = publisher(report)
172 if oopsid:
173 os.unlink(candidate)
113174
=== modified file 'oops_datedir_repo/tests/test_repository.py'
--- oops_datedir_repo/tests/test_repository.py 2011-10-13 02:14:59 +0000
+++ oops_datedir_repo/tests/test_repository.py 2011-11-01 02:13:23 +0000
@@ -157,3 +157,81 @@
157 with open(expected_path, 'rb') as fp:157 with open(expected_path, 'rb') as fp:
158 self.assertEqual(expected_disk_report, bson.loads(fp.read()))158 self.assertEqual(expected_disk_report, bson.loads(fp.read()))
159159
160 def test_republish_not_published(self):
161 # If an OOPS being republished is not republished, it is preserved on
162 # disk.
163 repo = DateDirRepo(self.useFixture(TempDir()).path)
164 now = datetime.datetime(2006, 04, 01, 00, 30, 00, tzinfo=utc)
165 report = {'time': now}
166 repo.publish(report, now)
167 dir = repo.root + '/2006-04-01/'
168 files = os.listdir(dir)
169 expected_path = dir + files[0]
170 oopses = []
171 # append() returns None
172 publisher = oopses.append
173 repo.republish(publisher)
174 self.assertTrue(os.path.isfile(expected_path))
175 self.assertEqual(1, len(oopses))
176
177 def test_republish_ignores_current_dot_tmp_files(self):
178 # .tmp files are in-progress writes and not to be touched.
179 repo = DateDirRepo(self.useFixture(TempDir()).path, stash_path=True)
180 report = {}
181 repo.publish(report)
182 finished_path = report['datedir_repo_filepath']
183 inprogress_path = finished_path + '.tmp'
184 # Move the file to a temp path, simulating an in-progress write.
185 os.rename(finished_path, inprogress_path)
186 oopses = []
187 publisher = oopses.append
188 repo.republish(publisher)
189 self.assertTrue(os.path.isfile(inprogress_path))
190 self.assertEqual([], oopses)
191
192 def test_republish_republishes_and_removes(self):
193 # When a report is republished it is then removed from disk.
194 repo = DateDirRepo(self.useFixture(TempDir()).path, stash_path=True)
195 report = {}
196 repo.publish(report)
197 finished_path = report['datedir_repo_filepath']
198 oopses = []
199 def publish(report):
200 oopses.append(report)
201 return report['id']
202 repo.republish(publish)
203 self.assertFalse(os.path.isfile(finished_path))
204 self.assertEqual(1, len(oopses))
205
206 def test_republish_cleans_empty_old_directories(self):
207 # An empty old datedir directory cannot get new reports in it, so gets
208 # cleaned up to keep the worker efficient.
209 repo = DateDirRepo(self.useFixture(TempDir()).path)
210 os.mkdir(repo.root + '/2006-04-12')
211 repo.republish([].append)
212 self.assertFalse(os.path.exists(repo.root + '/2006-04-12'))
213
214 def test_republish_removes_old_dot_tmp_files(self):
215 # A .tmp file more than 24 hours old is probably never going to get
216 # renamed into place, so we just unlink it.
217 repo = DateDirRepo(self.useFixture(TempDir()).path)
218 now = datetime.datetime(2006, 04, 01, 00, 30, 00, tzinfo=utc)
219 report = {'time': now}
220 repo.publish(report, now)
221 dir = repo.root + '/2006-04-01/'
222 files = os.listdir(dir)
223 finished_path = dir + files[0]
224 inprogress_path = finished_path + '.tmp'
225 os.rename(finished_path, inprogress_path)
226 oopses = []
227 publisher = oopses.append
228 repo.republish(publisher)
229 self.assertFalse(os.path.isfile(inprogress_path))
230 self.assertEqual([], oopses)
231
232 def test_republish_no_error_non_datedir(self):
233 # The present of a non datedir directory in a datedir repo doesn't
234 # break things.
235 repo = DateDirRepo(self.useFixture(TempDir()).path)
236 os.mkdir(repo.root + '/foo')
237 repo.republish([].append)
160238
=== modified file 'setup.py'
--- setup.py 2011-10-10 02:51:42 +0000
+++ setup.py 2011-11-01 02:13:23 +0000
@@ -22,7 +22,7 @@
22description = file(os.path.join(os.path.dirname(__file__), 'README'), 'rb').read()22description = file(os.path.join(os.path.dirname(__file__), 'README'), 'rb').read()
2323
24setup(name="oops_datedir_repo",24setup(name="oops_datedir_repo",
25 version="0.0.9",25 version="0.0.10",
26 description="OOPS disk serialisation and repository management.",26 description="OOPS disk serialisation and repository management.",
27 long_description=description,27 long_description=description,
28 maintainer="Launchpad Developers",28 maintainer="Launchpad Developers",

Subscribers

People subscribed via source and target branches

to all changes: