Merge lp:~jelmer/bzr-search/hpss into lp:bzr-search

Proposed by Jelmer Vernooij
Status: Merged
Merged at revision: 89
Proposed branch: lp:~jelmer/bzr-search/hpss
Merge into: lp:bzr-search
Diff against target: 519 lines (+446/-3)
5 files modified
__init__.py (+20/-0)
index.py (+21/-3)
remote.py (+276/-0)
tests/__init__.py (+1/-0)
tests/test_remote.py (+128/-0)
To merge this branch: bzr merge lp:~jelmer/bzr-search/hpss
Reviewer Review Type Date Requested Status
Bazaar Developers Pending
Review via email: mp+83224@code.launchpad.net

Description of the change

Add HPSS calls to allow remote access to the bzr-search index, and matching client side calls.

 * Branch.open_index
 * Branch.init_index
 * Index.index_revisions
 * Index.indexed_revisions
 * Index.suggest
 * Index.search

(now updated to not have lp:~jelmer/bzr-search/lazy merged)

To post a comment you must log in.
Revision history for this message
Robert Collins (lifeless) wrote :

Shiny. +1

Revision history for this message
Andrew Bennetts (spiv) wrote :

Robert Collins wrote:
> Shiny. +1

I haven't had time to look at the code, but the description is very shiny.
Thanks for making extending HPSS via plugins a reality! :)

-Andrew.

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file '__init__.py'
2--- __init__.py 2008-08-28 02:13:47 +0000
3+++ __init__.py 2011-11-23 21:50:26 +0000
4@@ -38,6 +38,7 @@
5 import commands
6 import errors
7 import index
8+from bzrlib.smart.request import request_handlers as smart_request_handlers
9
10
11 for command in [
12@@ -79,6 +80,25 @@
13 index.make_log_search_filter)
14
15
16+smart_request_handlers.register_lazy(
17+ "Branch.open_index", 'bzrlib.plugins.search.remote',
18+ 'SmartServerBranchRequestOpenIndex')
19+smart_request_handlers.register_lazy(
20+ "Branch.init_index", 'bzrlib.plugins.search.remote',
21+ 'SmartServerBranchRequestInitIndex')
22+smart_request_handlers.register_lazy(
23+ "Index.index_revisions", 'bzrlib.plugins.search.remote',
24+ 'SmartServerIndexRequestIndexRevisions')
25+smart_request_handlers.register_lazy(
26+ "Index.indexed_revisions", 'bzrlib.plugins.search.remote',
27+ 'SmartServerIndexRequestIndexedRevisions')
28+smart_request_handlers.register_lazy(
29+ "Index.suggest", 'bzrlib.plugins.search.remote',
30+ 'SmartServerIndexRequestSuggest')
31+smart_request_handlers.register_lazy(
32+ "Index.search", 'bzrlib.plugins.search.remote',
33+ 'SmartServerIndexRequestSearch')
34+
35 def test_suite():
36 # Thunk across to load_tests for niceness with older bzr versions
37 from bzrlib.tests import TestLoader
38
39=== modified file 'index.py'
40--- index.py 2011-09-19 23:34:37 +0000
41+++ index.py 2011-11-23 21:50:26 +0000
42@@ -31,8 +31,8 @@
43 NotBranchError,
44 NoSuchFile,
45 UnknownFormatError,
46- IncompatibleAPI,
47-)
48+ UnknownSmartMethod,
49+ )
50 from bzrlib.index import CombinedGraphIndex, GraphIndex, InMemoryGraphIndex
51 from bzrlib.lockdir import LockDir
52 try:
53@@ -120,6 +120,14 @@
54 transport = transport.clone(path)
55 transport.ensure_base()
56 index_transport = transport
57+ elif getattr(branch.bzrdir, "_call", None) is not None:
58+ # FIXME 2011-11-17 JRV: Is there a better way to probe
59+ # for smart server branches ?
60+ from bzrlib.plugins.search.remote import RemoteIndex
61+ try:
62+ return RemoteIndex.init(branch)
63+ except UnknownSmartMethod:
64+ raise errors.CannotIndex(branch)
65 else:
66 raise errors.CannotIndex(branch)
67 lockdir = LockDir(index_transport, 'names-lock')
68@@ -205,6 +213,16 @@
69 path = 'bzr-search/svn-lookaside/' + uuid + '/' + branch_path
70 transport = transport.clone(path)
71 commits_only = False
72+ elif getattr(branch.bzrdir, "_call", None) is not None:
73+ # FIXME 2011-11-17 JRV: Is there a better way to probe
74+ # for smart server branches ?
75+ from bzrlib.plugins.search.remote import RemoteIndex
76+ try:
77+ return RemoteIndex.open(branch)
78+ except UnknownSmartMethod:
79+ # Fall back to traditional methods...
80+ transport = branch.bzrdir.transport.clone('bzr-search')
81+ commits_only = False
82 else:
83 transport = branch.bzrdir.transport.clone('bzr-search')
84 commits_only = False
85@@ -391,7 +409,7 @@
86
87 def _add_index(self, builder, to_remove=None, allow_pack=True):
88 """Add a new component index to the list of indices.
89-
90+
91 :param builder: A component builder supporting the upload_index call.
92 :param to_remove: An optional iterable of components to remove.
93 :param allow_pack: Whether an auto pack is permitted by this operation.
94
95=== added file 'remote.py'
96--- remote.py 1970-01-01 00:00:00 +0000
97+++ remote.py 2011-11-23 21:50:26 +0000
98@@ -0,0 +1,276 @@
99+# search, a bzr plugin for searching within bzr branches/repositories.
100+# Copyright (C) 2011 Jelmer Vernooij
101+#
102+# This program is free software; you can redistribute it and/or modify
103+# it under the terms of the GNU General Public License version 2 as published
104+# by the Free Software Foundation.
105+#
106+# This program is distributed in the hope that it will be useful,
107+# but WITHOUT ANY WARRANTY; without even the implied warranty of
108+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
109+# GNU General Public License for more details.
110+#
111+# You should have received a copy of the GNU General Public License
112+# along with this program; if not, write to the Free Software
113+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
114+#
115+
116+"""Smart server integration for bzr-search."""
117+
118+from bzrlib import remote
119+from bzrlib.controldir import ControlDir
120+from bzrlib.errors import (
121+ ErrorFromSmartServer,
122+ UnexpectedSmartServerResponse,
123+ )
124+from bzrlib.smart.branch import (
125+ SmartServerBranchRequest,
126+ )
127+from bzrlib.smart.request import SuccessfulSmartServerResponse
128+
129+from bzrlib.plugins.search import errors, index
130+
131+
132+def _encode_termlist(termlist):
133+ return ["\0".join([k.encode('utf-8') for k in term]) for term in termlist]
134+
135+def _decode_termlist(termlist):
136+ return [tuple([k.decode('utf-8') for k in term.split('\0')]) for term in termlist]
137+
138+
139+class RemoteIndex(object):
140+ """Index accessed over a smart server."""
141+
142+ def __init__(self, client, path, branch=None):
143+ self._client = client
144+ self._path = path
145+ self._branch = branch
146+
147+ def _call(self, method, *args, **err_context):
148+ try:
149+ return self._client.call(method, *args)
150+ except ErrorFromSmartServer, err:
151+ self._translate_error(err, **err_context)
152+
153+ def _call_expecting_body(self, method, *args, **err_context):
154+ try:
155+ return self._client.call_expecting_body(method, *args)
156+ except ErrorFromSmartServer, err:
157+ self._translate_error(err, **err_context)
158+
159+ def _call_with_body_bytes(self, method, args, body_bytes, **err_context):
160+ try:
161+ return self._client.call_with_body_bytes(method, args, body_bytes)
162+ except ErrorFromSmartServer, err:
163+ self._translate_error(err, **err_context)
164+
165+ def _call_with_body_bytes_expecting_body(self, method, args, body_bytes,
166+ **err_context):
167+ try:
168+ return self._client.call_with_body_bytes_expecting_body(
169+ method, args, body_bytes)
170+ except errors.ErrorFromSmartServer, err:
171+ self._translate_error(err, **err_context)
172+
173+ def _translate_error(self, err, **context):
174+ remote._translate_error(err, index=self, **context)
175+
176+ @classmethod
177+ def open(cls, branch):
178+ # This might raise UnknownSmartMethod,
179+ # but the caller should handle that.
180+ response = branch._call("Branch.open_index",
181+ branch._remote_path())
182+ if response == ('no', ):
183+ raise errors.NoSearchIndex(branch.user_transport)
184+ if response != ('yes', ):
185+ raise UnexpectedSmartServerResponse(response)
186+ return RemoteIndex(branch._client, branch._remote_path(), branch)
187+
188+ @classmethod
189+ def init(cls, branch):
190+ response = branch._call("Branch.init_index",
191+ branch._remote_path())
192+ if response != ('ok', ):
193+ raise errors.UnexpectedSmartServerResponse(response)
194+ return RemoteIndex(branch._client, branch._remote_path(), branch)
195+
196+ def index_branch(self, branch, tip_revision):
197+ """Index revisions from a branch.
198+
199+ :param branch: The branch to index.
200+ :param tip_revision: The tip of the branch.
201+ """
202+ self.index_revisions(branch, [tip_revision])
203+
204+ def index_revisions(self, branch, revisions_to_index):
205+ """Index some revisions from branch.
206+
207+ :param branch: A branch to index.
208+ :param revisions_to_index: A set of revision ids to index.
209+ """
210+ body = "\n".join(revisions_to_index)
211+ response = self._call_with_body_bytes(
212+ 'Index.index_revisions', (self._path, branch._remote_path(),),
213+ body)
214+ if response != ('ok', ):
215+ raise errors.UnexpectedSmartServerResponse(response)
216+
217+ def indexed_revisions(self):
218+ """Return the revision_keys that this index contains terms for."""
219+ response, handler = self._call_expecting_body(
220+ 'Index.indexed_revisions', self._path)
221+ if response != ('ok', ):
222+ raise errors.UnexpectedSmartServerResponse(response)
223+ byte_stream = handler.read_streamed_body()
224+ data = ""
225+ for bytes in byte_stream:
226+ data += bytes
227+ lines = data.split("\n")
228+ data = lines.pop()
229+ for revid in lines:
230+ yield (revid, )
231+
232+ def search(self, termlist):
233+ """Trivial set-based search of the index.
234+
235+ :param termlist: A list of terms.
236+ :return: An iterator of SearchResults for documents indexed by all
237+ terms in the termlist.
238+ """
239+ index._ensure_regexes()
240+ response, handler = self._call_expecting_body('Index.search',
241+ self._path, _encode_termlist(termlist))
242+ if response != ('ok', ):
243+ raise errors.UnexpectedSmartServerResponse(response)
244+ byte_stream = handler.read_streamed_body()
245+ data = ""
246+ ret = []
247+ for bytes in byte_stream:
248+ data += bytes
249+ lines = data.split("\n")
250+ data = lines.pop()
251+ for l in lines:
252+ if l[0] == 'r':
253+ hit = index.RevisionHit(self._branch.repository, (l[1:], ))
254+ elif l[0] == 't':
255+ hit = index.FileTextHit(self, self._branch.repository,
256+ tuple(l[1:].split("\0")), termlist)
257+ elif l[0] == 'p':
258+ hit = index.PathHit(l[1:])
259+ else:
260+ raise AssertionError("Unknown hit kind %r" % l[0])
261+ # We can't yield, since the caller might try to look up results
262+ # over the same medium.
263+ ret.append(hit)
264+ return iter(ret)
265+
266+ def suggest(self, termlist):
267+ """Generate suggestions for extending a search.
268+
269+ :param termlist: A list of terms.
270+ :return: An iterator of terms that start with the last search term in
271+ termlist, and match the rest of the search.
272+ """
273+ response = self._call('Index.suggest',
274+ self._path, _encode_termlist(termlist))
275+ if response[0] != 'ok':
276+ raise UnexpectedSmartServerResponse(response)
277+ return [(suggestion.decode('utf-8'),) for suggestion in response[1]]
278+
279+
280+class SmartServerBranchRequestOpenIndex(SmartServerBranchRequest):
281+ """Open an index file."""
282+
283+ def do_with_branch(self, branch):
284+ """open an index."""
285+ try:
286+ idx = index.open_index_branch(branch)
287+ except errors.NoSearchIndex:
288+ return SuccessfulSmartServerResponse(('no', ))
289+ else:
290+ return SuccessfulSmartServerResponse(('yes', ))
291+
292+
293+class SmartServerBranchRequestInitIndex(SmartServerBranchRequest):
294+ """Create an index."""
295+
296+ def do_with_branch(self, branch, format=None):
297+ """Create an index."""
298+ if format is None:
299+ idx = index.init_index(branch)
300+ else:
301+ idx = index.init_index(branch, format)
302+ return SuccessfulSmartServerResponse(('ok', ))
303+
304+
305+class SmartServerIndexRequest(SmartServerBranchRequest):
306+ """Base class for index requests."""
307+
308+ def do_with_branch(self, branch, *args):
309+ idx = index.open_index_branch(branch)
310+ return self.do_with_index(idx, *args)
311+
312+ def do_with_index(self, index, *args):
313+ raise NotImplementedError(self.do_with_index)
314+
315+
316+class SmartServerIndexRequestIndexRevisions(SmartServerIndexRequest):
317+ """Index a set of revisions."""
318+
319+ def do_body(self, body_bytes):
320+ revids = body_bytes.split("\n")
321+ self._index.index_revisions(self._branch, revids)
322+ return SuccessfulSmartServerResponse(('ok', ))
323+
324+ def do_with_index(self, index, branch_path):
325+ self._index = index
326+ transport = self.transport_from_client_path(branch_path)
327+ controldir = ControlDir.open_from_transport(transport)
328+ if controldir.get_branch_reference() is not None:
329+ raise errors.NotBranchError(transport.base)
330+ self._branch = controldir.open_branch(ignore_fallbacks=True)
331+ # Indicate we want a body
332+ return None
333+
334+
335+class SmartServerIndexRequestIndexedRevisions(SmartServerIndexRequest):
336+ """Retrieve the set of revisions in the index."""
337+
338+ def body_stream(self, index):
339+ for revid in index.indexed_revisions():
340+ yield "%s\n" % "\0".join(revid)
341+
342+ def do_with_index(self, index):
343+ return SuccessfulSmartServerResponse(('ok', ),
344+ body_stream=self.body_stream(index))
345+
346+
347+class SmartServerIndexRequestSuggest(SmartServerIndexRequest):
348+ """Suggest alternative terms."""
349+
350+ def do_with_index(self, index, termlist):
351+ suggestions = index.suggest(_decode_termlist(termlist))
352+ return SuccessfulSmartServerResponse(
353+ ('ok',
354+ [suggestion.encode('utf-8') for (suggestion,) in suggestions]))
355+
356+
357+class SmartServerIndexRequestSearch(SmartServerIndexRequest):
358+ """Search for terms."""
359+
360+ def body_stream(self, results):
361+ for hit in results:
362+ if isinstance(hit, index.FileTextHit):
363+ yield "t%s\0%s\n" % hit.text_key
364+ elif isinstance(hit, index.RevisionHit):
365+ yield "r%s\n" % hit.revision_key[0]
366+ elif isinstance(hit, index.PathHit):
367+ yield "p%s\n" % hit.path_utf8
368+ else:
369+ raise AssertionError("Unknown hit type %r" % hit)
370+
371+ def do_with_index(self, index, termlist):
372+ results = index.search(_decode_termlist(termlist))
373+ return SuccessfulSmartServerResponse(
374+ ('ok',), body_stream=self.body_stream(results))
375
376=== modified file 'tests/__init__.py'
377--- tests/__init__.py 2008-06-14 05:07:54 +0000
378+++ tests/__init__.py 2011-11-23 21:50:26 +0000
379@@ -25,6 +25,7 @@
380 'errors',
381 'index',
382 'inventory',
383+ 'remote',
384 'transport',
385 ]
386 standard_tests.addTests(loader.loadTestsFromModuleNames(
387
388=== added file 'tests/test_remote.py'
389--- tests/test_remote.py 1970-01-01 00:00:00 +0000
390+++ tests/test_remote.py 2011-11-23 21:50:26 +0000
391@@ -0,0 +1,128 @@
392+# search, a bzr plugin for searching within bzr branches/repositories.
393+# Copyright (C) 2011 Jelmer Vernooij
394+#
395+# This program is free software; you can redistribute it and/or modify
396+# it under the terms of the GNU General Public License version 2 as published
397+# by the Free Software Foundation.
398+#
399+# This program is distributed in the hope that it will be useful,
400+# but WITHOUT ANY WARRANTY; without even the implied warranty of
401+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
402+# GNU General Public License for more details.
403+#
404+# You should have received a copy of the GNU General Public License
405+# along with this program; if not, write to the Free Software
406+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
407+#
408+
409+"""Tests for the smart server verbs."""
410+
411+from bzrlib import tests
412+from bzrlib.branch import Branch
413+from bzrlib.smart import (
414+ request as smart_req,
415+ )
416+
417+from bzrlib.plugins.search import (
418+ errors,
419+ index,
420+ )
421+from bzrlib.plugins.search.remote import (
422+ RemoteIndex,
423+ SmartServerBranchRequestOpenIndex,
424+ )
425+
426+
427+class TestSmartServerBranchRequestOpenIndex(
428+ tests.TestCaseWithMemoryTransport):
429+
430+ def test_missing(self):
431+ """For an empty branch, the result is ('no', )."""
432+ backing = self.get_transport()
433+ request = SmartServerBranchRequestOpenIndex(backing)
434+ self.make_branch('.')
435+ self.assertEqual(smart_req.SmartServerResponse(('no', )),
436+ request.execute(''))
437+
438+ def test_present(self):
439+ """For a branch with an index, ('yes', ) is returned."""
440+ backing = self.get_transport()
441+ request = SmartServerBranchRequestOpenIndex(backing)
442+ b = self.make_branch('.')
443+ index.init_index(b)
444+ self.assertEqual(smart_req.SmartServerResponse(('yes', )),
445+ request.execute(''))
446+
447+
448+class TestRemoteIndex(tests.TestCaseWithTransport):
449+
450+ def test_no_index(self):
451+ local_branch = self.make_branch('.')
452+ remote_transport = self.make_smart_server('.')
453+ remote_branch = Branch.open_from_transport(remote_transport)
454+ self.assertRaises(errors.NoSearchIndex, RemoteIndex.open,
455+ remote_branch)
456+
457+ def test_open(self):
458+ local_branch = self.make_branch('.')
459+ index.init_index(local_branch)
460+ remote_transport = self.make_smart_server('.')
461+ remote_branch = Branch.open_from_transport(remote_transport)
462+ idx = RemoteIndex.open(remote_branch)
463+ self.assertIsInstance(idx, RemoteIndex)
464+
465+ def test_init(self):
466+ local_branch = self.make_branch('.')
467+ remote_transport = self.make_smart_server('.')
468+ remote_branch = Branch.open_from_transport(remote_transport)
469+ idx = index.init_index(remote_branch)
470+ self.assertIsInstance(idx, RemoteIndex)
471+
472+ def test_init_exists(self):
473+ local_branch = self.make_branch('.')
474+ index.init_index(local_branch)
475+ remote_transport = self.make_smart_server('.')
476+ remote_branch = Branch.open_from_transport(remote_transport)
477+ #self.assertRaises( index.init_index, remote_branch)
478+
479+
480+class TestWithRemoteIndex(tests.TestCaseWithTransport):
481+
482+ def make_remote_index(self):
483+ tree = self.make_branch_and_tree('.')
484+ local_branch = tree.branch
485+ index.init_index(local_branch)
486+ remote_transport = self.make_smart_server('.')
487+ remote_branch = Branch.open_from_transport(remote_transport)
488+ return tree, remote_branch, RemoteIndex.open(remote_branch)
489+
490+ def test_index_revisions(self):
491+ tree, branch, index = self.make_remote_index()
492+ tree.commit(message="message", rev_id='revid1')
493+ index.index_revisions(branch, ['revid1'])
494+ self.assertEquals([('revid1',)], list(index.indexed_revisions()))
495+
496+ def test_indexed_revisions(self):
497+ tree, branch, remote_index = self.make_remote_index()
498+ tree.commit(message="message", rev_id='revid1')
499+ self.assertEquals([], list(remote_index.indexed_revisions()))
500+ local_index = index.open_index_branch(tree.branch)
501+ local_index.index_revisions(tree.branch, ['revid1'])
502+ self.assertEquals([('revid1',)], list(remote_index.indexed_revisions()))
503+
504+ def test_suggest(self):
505+ tree, branch, remote_index = self.make_remote_index()
506+ tree.commit(message="first", rev_id='revid1')
507+ local_index = index.open_index_branch(tree.branch)
508+ local_index.index_revisions(tree.branch, ['revid1'])
509+ self.assertEquals([(u'first',)], list(remote_index.suggest([(u'f',)])))
510+
511+ def test_search(self):
512+ tree, branch, remote_index = self.make_remote_index()
513+ # The double-space is a cheap smoke test for the tokeniser.
514+ revid = tree.commit('first post')
515+ remote_index.index_revisions(branch, [revid])
516+ results = list(remote_index.search([('post',)]))
517+ self.assertEqual(1, len(results))
518+ self.assertIsInstance(results[0], index.RevisionHit)
519+ self.assertEqual((revid,), results[0].revision_key)

Subscribers

People subscribed via source and target branches

to all changes: