Merge lp:~roadmr/checkbox/lightning into lp:checkbox

Proposed by Daniel Manrique
Status: Merged
Merged at revision: 1276
Proposed branch: lp:~roadmr/checkbox/lightning
Merge into: lp:checkbox
Diff against target: 180 lines (+53/-14)
6 files modified
checkbox/contrib/persist.py (+12/-9)
checkbox/lib/safe.py (+4/-3)
checkbox/message.py (+6/-2)
debian/changelog (+2/-0)
plugins/jobs_prompt.py (+15/-0)
plugins/persist_info.py (+14/-0)
To merge this branch: bzr merge lp:~roadmr/checkbox/lightning
Reviewer Review Type Date Requested Status
Marc Tardif (community) Approve
Review via email: mp+94284@code.launchpad.net

Description of the change

This branch adds a way for the two main objects dealing with persisting stuff to disk (MessageStore and Persist) to decide whether or not they should use "safe" file closing (which flushes the file buffer and syncs to disk to ensure data integrity).

The default (and very robust) behavior is to use safe file closing, just as it has been since this feature was introduced in rev 1090.

It then adds a few methods in the persist_info and jobs_prompt plugins to *disable* safeness prior to starting the gathering phase, and reenable it right after finishing gathering.

In principle this should cause no data loss, since the gathered data is obtained automatically and thus the process can be very easily repeated if needed.

The benefit of doing all this juggling is a significant speed increase in gathering. My system goes from 37-40 seconds on the gathering phase to just under 7 seconds with these changes.

I'm not entirely happy by the handling of Persist's _backend directly (in persist_info). Maybe adding a method to Persist proper to then set the backend's safeness would be more kosher. But this seemed so much easier!

To post a comment you must log in.
Revision history for this message
Marc Tardif (cr3) wrote :

Very useful, thanks man! Just a couple comments about your code:

1. It is more Pythonish (or Pythonic?) to pass keyword arguments to function without spaces:

  foo(a=1, b=2).

2. In plugins/persist_info.py, you access private variables from the persist object:

  self.persist._backend.safe_file_closing = True

Instead, you might have liked to consider adding a public getter and setter on the Persist class:

  self.persist.setSafeFileClosing(True)

However, this doesn't prevent the code from working so I'm merging. All good!

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== modified file 'checkbox/contrib/persist.py'
--- checkbox/contrib/persist.py 2011-10-07 07:16:47 +0000
+++ checkbox/contrib/persist.py 2012-02-22 22:10:23 +0000
@@ -506,8 +506,11 @@
506 def save(self, filepath, map):506 def save(self, filepath, map):
507 self._store[filepath] = map507 self._store[filepath] = map
508508
509509class DiskBackend(Backend):
510class PickleBackend(Backend):510
511 safe_file_closing = True
512
513class PickleBackend(DiskBackend):
511514
512 def __init__(self):515 def __init__(self):
513 import cPickle516 import cPickle
@@ -518,17 +521,17 @@
518 try:521 try:
519 return self._pickle.load(file)522 return self._pickle.load(file)
520 finally:523 finally:
521 safe_close(file)524 safe_close(file, self.safe_file_closing)
522525
523 def save(self, filepath, map):526 def save(self, filepath, map):
524 file = open(filepath, "w")527 file = open(filepath, "w")
525 try:528 try:
526 self._pickle.dump(map, file, 2)529 self._pickle.dump(map, file, 2)
527 finally:530 finally:
528 safe_close(file)531 safe_close(file, self.safe_file_closing)
529532
530533
531class BPickleBackend(Backend):534class BPickleBackend(DiskBackend):
532535
533 def __init__(self):536 def __init__(self):
534 from checkbox.contrib import bpickle537 from checkbox.contrib import bpickle
@@ -539,11 +542,11 @@
539 try:542 try:
540 return self._bpickle.loads(file.read())543 return self._bpickle.loads(file.read())
541 finally:544 finally:
542 safe_close(file)545 safe_close(file, self.safe_file_closing)
543546
544 def save(self, filepath, map):547 def save(self, filepath, map):
545 file = open(filepath, "w")548 file = open(filepath, "w")
546 try:549 try:
547 file.write(self._bpickle.dumps(map))550 file.write(self._bpickle.dumps(map))
548 finally:551 finally:
549 safe_close(file)552 safe_close(file, self.safe_file_closing)
550553
=== modified file 'checkbox/lib/safe.py'
--- checkbox/lib/safe.py 2011-10-07 07:16:47 +0000
+++ checkbox/lib/safe.py 2012-02-22 22:10:23 +0000
@@ -95,7 +95,8 @@
9595
96 return md5sum96 return md5sum
9797
98def safe_close(file):98def safe_close(file, safe = True):
99 file.flush()99 if safe:
100 os.fsync(file.fileno())100 file.flush()
101 os.fsync(file.fileno())
101 file.close()102 file.close()
102103
=== modified file 'checkbox/message.py'
--- checkbox/message.py 2012-02-14 20:32:39 +0000
+++ checkbox/message.py 2012-02-22 22:10:23 +0000
@@ -44,6 +44,10 @@
44 #This caches everything but a message's data, making it manageable to keep in memory.44 #This caches everything but a message's data, making it manageable to keep in memory.
45 _message_cache = {}45 _message_cache = {}
4646
47 #Setting this to False speeds things up considerably, at the expense
48 #of a higher risk of data loss during a crash
49 safe_file_closing = True
50
47 def __init__(self, persist, directory, directory_size=1000):51 def __init__(self, persist, directory, directory_size=1000):
48 self._directory = directory52 self._directory = directory
49 self._directory_size = directory_size53 self._directory_size = directory_size
@@ -219,7 +223,7 @@
219 try:223 try:
220 return file.read()224 return file.read()
221 finally:225 finally:
222 safe_close(file)226 safe_close(file, safe = self.safe_file_closing)
223227
224 def _get_flags(self, path):228 def _get_flags(self, path):
225 basename = posixpath.basename(path)229 basename = posixpath.basename(path)
@@ -261,7 +265,7 @@
261265
262 file = open(filename + ".tmp", "w")266 file = open(filename + ".tmp", "w")
263 file.write(message_data)267 file.write(message_data)
264 safe_close(file)268 safe_close(file, safe = self.safe_file_closing)
265269
266 os.rename(filename + ".tmp", filename)270 os.rename(filename + ".tmp", filename)
267271
268272
=== modified file 'debian/changelog'
--- debian/changelog 2012-02-22 21:05:54 +0000
+++ debian/changelog 2012-02-22 22:10:23 +0000
@@ -5,6 +5,8 @@
55
6 [Daniel Manrique]6 [Daniel Manrique]
7 * Use GObject from gi.repository instead of gobject (LP: #937099)7 * Use GObject from gi.repository instead of gobject (LP: #937099)
8 * Disable flushing to disk after every file access during gathering phase for
9 a significant speed boost. (LP: #939019)
810
9 [Javier Collado]11 [Javier Collado]
10 * Fixed running of disk/read_performance tests (LP: #933528)12 * Fixed running of disk/read_performance tests (LP: #933528)
1113
=== modified file 'plugins/jobs_prompt.py'
--- plugins/jobs_prompt.py 2012-02-15 16:31:04 +0000
+++ plugins/jobs_prompt.py 2012-02-22 22:10:23 +0000
@@ -65,6 +65,13 @@
65 ("report-job", self.report_job)]:65 ("report-job", self.report_job)]:
66 self._manager.reactor.call_on(rt, rh)66 self._manager.reactor.call_on(rt, rh)
6767
68 #This should fire first thing during the gathering phase.
69 self._manager.reactor.call_on("gather", self.begin_gather, -900)
70
71 #This should fire last during gathering (i.e. after
72 #all other gathering callbacks are finished)
73 self._manager.reactor.call_on("gather", self.end_gather, 900)
74
68 def begin_persist(self, persist):75 def begin_persist(self, persist):
69 self._persist = persist76 self._persist = persist
7077
@@ -72,6 +79,14 @@
72 if not recover:79 if not recover:
73 self.store.delete_all_messages()80 self.store.delete_all_messages()
7481
82 def begin_gather(self):
83 #Speed boost during the gathering phase. Not critical data anyway.
84 self.store.safe_file_closing = False
85
86 def end_gather(self):
87 #Back to saving data very carefully once gathering is done.
88 self.store.safe_file_closing = True
89
75 def ignore_jobs(self, jobs):90 def ignore_jobs(self, jobs):
76 self._ignore = jobs91 self._ignore = jobs
7792
7893
=== modified file 'plugins/persist_info.py'
--- plugins/persist_info.py 2011-10-03 10:50:20 +0000
+++ plugins/persist_info.py 2012-02-22 22:10:23 +0000
@@ -42,6 +42,13 @@
42 # Save persist data last42 # Save persist data last
43 self._manager.reactor.call_on("stop", self.save, 1000)43 self._manager.reactor.call_on("stop", self.save, 1000)
4444
45 #This should fire first thing during the gathering phase.
46 self._manager.reactor.call_on("gather", self.begin_gather, -900)
47
48 #This should fire last during gathering (i.e. after
49 #all other gathering callbacks are finished)
50 self._manager.reactor.call_on("gather", self.end_gather, 900)
51
45 def begin(self, interface=None):52 def begin(self, interface=None):
46 if self.persist is None:53 if self.persist is None:
47 self.persist = Persist(self.filename)54 self.persist = Persist(self.filename)
@@ -52,5 +59,12 @@
52 if self.persist:59 if self.persist:
53 self.persist.save()60 self.persist.save()
5461
62 def begin_gather(self):
63 #Speed boost during the gathering phase. Not critical data anyway.
64 self.persist._backend.safe_file_closing = False
65
66 def end_gather(self):
67 #Back to saving data very carefully once gathering is done.
68 self.persist._backend.safe_file_closing = True
5569
56factory = PersistInfo70factory = PersistInfo

Subscribers

People subscribed via source and target branches