Merge lp:~nataliabidart/ubuntuone-client/speed-up-that-local-rescan into lp:ubuntuone-client

Proposed by Natalia Bidart
Status: Merged
Approved by: John O'Brien
Approved revision: 429
Merged at revision: not available
Proposed branch: lp:~nataliabidart/ubuntuone-client/speed-up-that-local-rescan
Merge into: lp:ubuntuone-client
Diff against target: 224 lines (+109/-22)
4 files modified
tests/syncdaemon/test_fsm.py (+79/-3)
ubuntuone/syncdaemon/event_queue.py (+7/-8)
ubuntuone/syncdaemon/filesystem_manager.py (+11/-0)
ubuntuone/syncdaemon/local_rescan.py (+12/-11)
To merge this branch: bzr merge lp:~nataliabidart/ubuntuone-client/speed-up-that-local-rescan
Reviewer Review Type Date Requested Status
John O'Brien (community) Approve
Facundo Batista (community) Approve
Review via email: mp+22145@code.launchpad.net

Commit message

Speeding up the local rescan by retrieving metadata per directory. As per latest profile info, we'd be gaining 24% of speed!

Description of the change

Added retrieval of metadata objects per directory (filesystem_manager).
Avoided using os.path.dirname when possible (event_queue).
Made Local Rescan usa the per directory metadata retriever to speed up scanning (local_rescan).

To post a comment you must log in.
Revision history for this message
Facundo Batista (facundo) wrote :

Approved, but please remove "share_id" from _get_share_info() parameters (don't know why this is not catched by pylint as "unused vars").

review: Approve
Revision history for this message
Natalia Bidart (nataliabidart) wrote :

> Approved, but please remove "share_id" from _get_share_info() parameters
> (don't know why this is not catched by pylint as "unused vars").

Done, thank you!

429. By Natalia Bidart

Removed unused share_id param.

Revision history for this message
John O'Brien (jdobrien) :
review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'tests/syncdaemon/test_fsm.py'
2--- tests/syncdaemon/test_fsm.py 2010-03-16 23:17:53 +0000
3+++ tests/syncdaemon/test_fsm.py 2010-03-26 02:36:46 +0000
4@@ -107,10 +107,12 @@
5 fsm.vm.add_share(share)
6 return share
7
8- def create_node(self, name, is_dir=False):
9+ def create_node(self, name, is_dir=False, share=None):
10 """Create a node."""
11- path = os.path.join(self.share.path, name)
12- mdid = self.fsm.create(path, self.share.volume_id, is_dir=is_dir)
13+ if share is None:
14+ share = self.share
15+ path = os.path.join(share.path, name)
16+ mdid = self.fsm.create(path, share.volume_id, is_dir=is_dir)
17 self.fsm.set_node_id(path, "uuid1")
18 mdobj = self.fsm.get_by_mdid(mdid)
19 return mdobj
20@@ -1252,6 +1254,80 @@
21 self.fsm._set_node_id, mdobj, 'bad-uuid', path)
22
23
24+class GetMDObjectsInDirTests(FSMTestCase):
25+ """Test the get_mdobjs_in_dir method."""
26+
27+ def create_some_contents(self, share):
28+ a = 'a'
29+ ab = os.path.join(a, 'b')
30+ ab1 = os.path.join(a, 'b1')
31+ ab2 = os.path.join(a, 'b2')
32+ ac = os.path.join(a, 'c')
33+ acd = os.path.join(a, 'd')
34+
35+ dirs = [a, ab, ab1, ab2, ac, acd]
36+ for d in dirs:
37+ self.create_node(d, is_dir=True, share=share)
38+
39+ x = os.path.join(a, 'x.txt')
40+ y = os.path.join(ab, 'y.txt')
41+ z = os.path.join(ac, 'z.txt')
42+
43+ files = [x, y, z]
44+ for f in files:
45+ self.create_node(f, is_dir=False, share=share)
46+
47+ self.contents[share] = sorted(dirs + files)
48+
49+ def setUp(self):
50+ """Init."""
51+ FSMTestCase.setUp(self)
52+ self.contents = {}
53+ self.create_some_contents(self.share)
54+
55+ def tearDown(self):
56+ """Clean up."""
57+ self.contents = {}
58+ FSMTestCase.tearDown(self)
59+
60+ def test_basic(self):
61+ """Test basic retrieval."""
62+ expected = self.contents[self.share]
63+ actual = sorted([d.path for d in
64+ self.fsm.get_mdobjs_in_dir(self.share.path)])
65+ self.assertEqual(expected, actual)
66+
67+ def test_similar_paths(self):
68+ """Test having similar paths (a/b, a/b1, a/b2)."""
69+ expected = []
70+ actual = sorted([d.path for d in
71+ self.fsm.get_mdobjs_in_dir(os.path.join('a', 'b'))])
72+ self.assertEqual(expected, actual)
73+
74+ def test_with_two_shares(self):
75+ """Test having 2 shares."""
76+ second_share = self.create_share('second_share', 'the_second',
77+ self.fsm, self.shares_dir)
78+ self.create_some_contents(second_share)
79+
80+ expected = self.contents[second_share]
81+ actual = sorted([d.path for d in
82+ self.fsm.get_mdobjs_in_dir(second_share.path)])
83+ self.assertEqual(expected, actual)
84+
85+ def test_both_shares(self):
86+ """Test having 2 shares and asking for mdobjs in shares_dir."""
87+ second_share = self.create_share('second_share', 'the_second',
88+ self.fsm, self.shares_dir)
89+ self.create_some_contents(second_share)
90+
91+ expected = sorted(self.contents[self.share] +
92+ self.contents[second_share])
93+ actual = sorted([d.path for d in
94+ self.fsm.get_mdobjs_in_dir(self.shares_dir)])
95+ self.assertEqual(expected, actual)
96+
97+
98 class StatTests(FSMTestCase):
99 """Test all the behaviour regarding the stats."""
100
101
102=== modified file 'ubuntuone/syncdaemon/event_queue.py'
103--- ubuntuone/syncdaemon/event_queue.py 2010-03-18 16:51:19 +0000
104+++ ubuntuone/syncdaemon/event_queue.py 2010-03-26 02:36:46 +0000
105@@ -419,17 +419,16 @@
106 # self.timeout() was *just* called, do nothing here
107 pass
108 else:
109- f_path = os.path.join(self.held_event.path,
110- self.held_event.name)
111- t_path = os.path.join(event.path, event.name)
112+ f_path_dir = self.held_event.path
113+ f_path = os.path.join(f_path_dir, self.held_event.name)
114+ t_path_dir = event.path
115+ t_path = os.path.join(t_path_dir, event.name)
116
117 is_from_forreal = not self.is_ignored(f_path)
118 is_to_forreal = not self.is_ignored(t_path)
119 if is_from_forreal and is_to_forreal:
120- f_share_id = self.eq.fs.get_by_path(\
121- os.path.dirname(f_path)).share_id
122- t_share_id = self.eq.fs.get_by_path(\
123- os.path.dirname(t_path)).share_id
124+ f_share_id = self.eq.fs.get_by_path(f_path_dir).share_id
125+ t_share_id = self.eq.fs.get_by_path(t_path_dir).share_id
126 this_is_a_dir = event_is_dir(event)
127 if this_is_a_dir:
128 evtname = "FS_DIR_"
129@@ -500,7 +499,7 @@
130
131 # check if the path is not frozen
132 if self.frozen_path is not None:
133- if os.path.dirname(fullpath) == self.frozen_path:
134+ if event.path == self.frozen_path:
135 # this will at least store the last one, for debug
136 # purposses
137 self.frozen_evts = (evt_name, fullpath)
138
139=== modified file 'ubuntuone/syncdaemon/filesystem_manager.py'
140--- ubuntuone/syncdaemon/filesystem_manager.py 2010-03-16 23:17:53 +0000
141+++ ubuntuone/syncdaemon/filesystem_manager.py 2010-03-26 02:36:46 +0000
142@@ -501,6 +501,17 @@
143 all_mdobjs.append(_MDObject(**mdobj))
144 return all_mdobjs
145
146+ def get_mdobjs_in_dir(self, base_path):
147+ """Return all the mdobjs which dir is base_path."""
148+ all_mdobjs = []
149+ base_path += os.path.sep
150+ len_base = len(base_path)
151+ for path, mdid in self._idx_path.iteritems():
152+ if path[:len_base] == base_path:
153+ mdobj = self.fs[mdid]
154+ all_mdobjs.append(_MDObject(**mdobj))
155+ return all_mdobjs
156+
157 def get_data_for_server_rescan(self):
158 """Generates all the (share, node, hash) tuples needed for rescan"""
159 all_data = []
160
161=== modified file 'ubuntuone/syncdaemon/local_rescan.py'
162--- ubuntuone/syncdaemon/local_rescan.py 2010-03-18 14:56:25 +0000
163+++ ubuntuone/syncdaemon/local_rescan.py 2010-03-26 02:36:46 +0000
164@@ -59,7 +59,7 @@
165
166 def start(self):
167 """Start the comparison."""
168- log_info("start scan all shares")
169+ log_info("start scan all volumes")
170 to_scan = self._get_volumes()
171 for share in to_scan:
172 try:
173@@ -179,10 +179,10 @@
174
175 reactor.callLater(0, safe_scan)
176
177- def _get_share_info(self, share_id, path):
178- """Get all the objects information for a share."""
179+ def _get_share_info(self, path):
180+ """Get all the objects information for a directory path."""
181 share_info = []
182- for obj in self.fsm.get_mdobjs_by_share_id(share_id, path):
183+ for obj in self.fsm.get_mdobjs_in_dir(path):
184 changd = self.fsm.changed(mdid=obj.mdid)
185 share_info.append((obj.path, obj.is_dir, obj.stat, changd,
186 obj.node_id, obj.local_hash, obj.server_hash))
187@@ -233,7 +233,7 @@
188 log_debug("comparing directory %r", dirpath)
189
190 # get the share info
191- share_info = self._get_share_info(share.volume_id, dirpath)
192+ share_info = self._get_share_info(dirpath)
193 shouldbe = self._paths_filter(share_info, dirpath, len(share.path))
194
195 def despair(message, fullname, also_children=False, also_remove=None):
196@@ -455,14 +455,14 @@
197
198 log_info("comp yield: directory %r is gone!", fullname)
199 # it's a directory, didn't have any info inside?
200- base_path = fullname[len(share.path)+1:]
201 to_inform = []
202
203 # get all the info inside that dir
204- for shrpath, is_dir, statinfo, _, _, _, _ in share_info:
205- if shrpath.startswith(base_path):
206- qparts = len(shrpath.split(os.path.sep))
207- to_inform.append((qparts, shrpath, is_dir))
208+ objs = self.fsm.get_mdobjs_by_share_id(share.volume_id, fullname)
209+ for obj in objs:
210+ shrpath = obj.path
211+ qparts = len(shrpath.split(os.path.sep))
212+ to_inform.append((qparts, shrpath, obj.is_dir))
213
214 # order everything from more path components to less (this
215 # will assure correct upgoing walk in the tree)
216@@ -540,7 +540,8 @@
217 self.eq.freeze_begin(dirpath)
218
219 def scan():
220- """the scan, really"""
221+ """The scan, really."""
222+
223 log_debug("scanning the dir %r", dirpath)
224 listdir = os.listdir(dirpath)
225

Subscribers

People subscribed via source and target branches