After the ngram search was deployed it was discovered to only work if 1)
using
the API and 2) had autocomplete=true.
Inspecting the code showed the intent for ngram to be used in other
situations, and there was not documented rationale for only using it
when
doing autocomplete.
The problem was simply in the query for non-autocomplete the field for
the
ngrams was prefixed with 'data.' when it should not have been.
The ngrams fields were moved out of the charm and bundle fields and
passed as
separate fields, treated like 'provides' and 'requires', to avoid the
prefixing.
As a result of this change, the search on the manage.jujucharms.com page
now
uses ngrams too.
QA:
First, ingest some data:
# ttyn
$ make run
# ttyn+1
$ bin/es-update
$ bin/ingest-queued --prefix=~cabs-team/ ## gets sugarcrm
$ bin/ingest-queued --prefix=~charmers/charms/ ## gets a bunch of charms
$ bin/ingest-queued --prefix=~bac ## gets a bundle
Without autocomplete an ngram search is done in addition to the other
fields. This search will find items with 'popular' in the description. http://127.0.0.1:2464/api/3/search?text=popular
Reviewers: mp+226910_ code.launchpad. net,
Message:
Please take a look.
Description:
Make ngram searches work.
After the ngram search was deployed it was discovered to only work if 1)
using
the API and 2) had autocomplete=true.
Inspecting the code showed the intent for ngram to be used in other
situations, and there was not documented rationale for only using it
when
doing autocomplete.
The problem was simply in the query for non-autocomplete the field for
the
ngrams was prefixed with 'data.' when it should not have been.
The ngrams fields were moved out of the charm and bundle fields and
passed as
separate fields, treated like 'provides' and 'requires', to avoid the
prefixing.
As a result of this change, the search on the manage. jujucharms. com page
now
uses ngrams too.
QA:
First, ingest some data:
# ttyn
$ make run
# ttyn+1 ~cabs-team/ ## gets sugarcrm ~charmers/ charms/ ## gets a bunch of charms
$ bin/es-update
$ bin/ingest-queued --prefix=
$ bin/ingest-queued --prefix=
$ bin/ingest-queued --prefix=~bac ## gets a bundle
Without autocomplete an ngram search is done in addition to the other 127.0.0. 1:2464/ api/3/search? text=popular
fields. This search will find items with 'popular' in the description.
http://
But turning on autocomplete limits to the ngram of the item name. 127.0.0. 1:2464/ api/3/search? text=popular& autocomplete= true
Nothing
found.
http://
These two should find items with names and free text, followed by just 127.0.0. 1:2464/ api/3/search? text=sql 127.0.0. 1:2464/ api/3/search? text=sql& autocomplete= true
names.
http://
http://
This is the entry point used by the search box on the web page. 127.0.0. 1:2464/ api/3/search? text=sql
http://
https:/ /code.launchpad .net/~bac/ charmworld/ ngram-inquiry/ +merge/ 226910
(do not edit description out of merge proposal)
Please review this at https:/ /codereview. appspot. com/117810044/
Affected files (+74, -9 lines): search. py tests/test_ search. py
A [revision details]
M charmworld/
M charmworld/
Index: [revision details] 20140521163857- dgt0onkdb3begqk j
=== added file '[revision details]'
--- [revision details] 2012-01-01 00:00:00 +0000
+++ [revision details] 2012-01-01 00:00:00 +0000
@@ -0,0 +1,2 @@
+Old revision: tarmac-
+New revision: <email address hidden>
Index: charmworld/ search. py search. py' search. py 2014-05-19 23:44:29 +0000 search. py 2014-07-15 19:09:59 +0000 free_text_ fields = { description' : 3, config. options. description' : None, free_text_ fields = { basket_ name': 5, description' : 3,
"fields" : {
"ngrams" : {
"type" : "string",
},
"name" : {
"type" : "string",
=== modified file 'charmworld/
--- charmworld/
+++ charmworld/
@@ -32,7 +32,6 @@
]
charm_
'name': 10,
- 'ngrams': None,
'summary': 5,
'
'
@@ -46,7 +45,6 @@
]
bundle_
'name': 10,
- 'ngrams': None,
'
'
'title': None,
@@ -353,7 +351,7 @@
- "analyzer": "n3_20grams"
+ "analyzer": "n3_20grams",
@@ -490,12 +488,13 @@
- charm_fields, ['requires.*', 'provides.*']),
- 'filter': {'type': {'value': CHARM}}
+ charm_fields, ['ngrams', 'requires.*', 'provides.*']),
+ 'filter': {'type': {'value': CHARM}},
}}
- 'query': make_query(
- 'filter': {'type': {'value': BUNDLE}}
+ 'query': make_query(
+ bundle_fields, ['ngrams']),
+ 'filter': {'type': {'value': BUNDLE}},
}}
# Union the bundle_dsl and charm_dsl results
dsl = {'bool': {'should': [bundle_dsl, charm_dsl]}}
Index: charmworld/ tests/test_ search. py tests/test_ search. py' tests/test_ search. py 2014-05-14 17:59:37 +0000 tests/test_ search. py 2014-07-15 19:09:59 +0000 no_matching_ ngrams( self):
test_ value = "blarglefoafbla therinfinatum" makeCharm( name=test_ value)) makeCharm( name=test_ value))
{"ngrams" : "blvlsdkjfa; lsdkghoifdhvsoi dufhvsdececewv" }}} _client. search( query, index=client. index_name)
self. assertEquals( results[ 'hits'] ['total' ], 0)
=== modified file 'charmworld/
--- charmworld/
+++ charmworld/
@@ -187,13 +187,33 @@
def test_ngrams_
client = self.index_client
- foo_charm = Charm(self.
- foo_charm = foo_charm # shut it pyflakes
+ Charm(self.
query = {"query": {"match":
results = client.
+ def test_search_ charm_ngrams_ found(self) : therinfinatum" makeCharm( name=test_ value)) search( 'foaf') ['results' ] CHARM]) ) charm_no_ matching_ ngrams( self): therinfinatum" makeCharm( name=test_ value)) search( 'schmoo' )['results' ] CHARM]) ) charm_ngrams_ do_not_ match_descripti on(self) : therinfinatum" makeCharm( name=name, description= 'supermoon' )) search( 'moon') ['results' ] CHARM]) ) bundle( self): self.makeBundle ())
self.assertEqua l(2, len(results[ 'bundle' ]))
self.assertEqua l(0, len(results[ 'charm' ]))
+ client = self.index_client
+ test_value = "blarglefoafbla
+ Charm(self.
+ results = client.
+ self.assertEqual(1, len(results[
+
+ def test_search_
+ client = self.index_client
+ test_value = "blarglefoafbla
+ Charm(self.
+ results = client.
+ self.assertEqual(0, len(results[
+
+ def test_search_
+ client = self.index_client
+ name = "blarglefoafbla
+ Charm(self.
+ results = client.
+ self.assertEqual(0, len(results[
+
def test_search_
bundle = Bundle(
client = self.index_client
@@ -325,6 +345,27 @@
+ def test_search_ bundle_ ngrams_ found(self) : therinfinatum" self.makeBundle (name=test_ value)) search( 'foaf') ['results' ] BUNDLE] )) bundle_ no_matching_ ngrams( self): therinfinatum" self.makeBundle (name=test_ value)) search( 'schmoo' )['results' ] BUNDLE] )) bundle_ ngrams_ do_not_ match_descripti on(self) : therinfinatum" self.makeBundle (name=name, description= 'supermoon' )) search( 'moon') ['results' ] BUNDLE] )) for_charms_ only(self) : makeCharm( description= 'a mozilla charm'))
Bundle( self.makeBundle (description= 'a mozilla bundle'))
'bla', autocomplete=True, min_score=0)
self. assertEquals( result, [])
+ client = self.index_client
+ test_value = "blarglefoafbla
+ Bundle(
+ results = client.
+ self.assertEqual(1, len(results[
+
+ def test_search_
+ client = self.index_client
+ test_value = "blarglefoafbla
+ Bundle(
+ results = client.
+ self.assertEqual(0, len(results[
+
+ def test_search_
+ client = self.index_client
+ name = "blarglefoafbla
+ Bundle(
+ results = client.
+ self.assertEqual(0, len(results[
+
def test_searching_
charm = Charm(self.
@@ -577,6 +618,29 @@
+ def test_ngram_ match(self) : name='foo' ) name='foobar' ) name='barfoo' ) client. api_search( sEqual( [ miss(self) : name='foo' ) name='foobar' ) name='barfoo' ) client. api_search( ls(result, []) characters_ return_ no_results( self):
self. makeCharm( name='foo' ) client. api_search(
+ # Without autocomplete.
+ charm1 = self.makeCharm(
+ charm2 = self.makeCharm(
+ charm3 = self.makeCharm(
+ result = self.index_
+ 'foo', autocomplete=False, min_score=0)
+ ids = [r['data']['_id'] for r in result]
+ self.assertItem
+ charm1['_id'],
+ charm2['_id'],
+ charm3['_id']],
+ ids)
+
+ def test_ngram_
+ # Without autocomplete.
+ self.makeCharm(
+ self.makeCharm(
+ self.makeCharm(
+ result = self.index_
+ 'bla', autocomplete=False, min_score=0)
+ self.assertEqua
+
def test_special_
result = self.index_