Code review comment for lp:~bac/charmworld/ngram-inquiry

Revision history for this message
Brad Crittenden (bac) wrote :

Reviewers: mp+226910_code.launchpad.net,

Message:
Please take a look.

Description:
Make ngram searches work.

After the ngram search was deployed it was discovered to only work if 1)
using
the API and 2) had autocomplete=true.

Inspecting the code showed the intent for ngram to be used in other
situations, and there was not documented rationale for only using it
when
doing autocomplete.

The problem was simply in the query for non-autocomplete the field for
the
ngrams was prefixed with 'data.' when it should not have been.

The ngrams fields were moved out of the charm and bundle fields and
passed as
separate fields, treated like 'provides' and 'requires', to avoid the
prefixing.

As a result of this change, the search on the manage.jujucharms.com page
now
uses ngrams too.

QA:

First, ingest some data:
# ttyn
$ make run

# ttyn+1
$ bin/es-update
$ bin/ingest-queued --prefix=~cabs-team/ ## gets sugarcrm
$ bin/ingest-queued --prefix=~charmers/charms/ ## gets a bunch of charms
$ bin/ingest-queued --prefix=~bac ## gets a bundle

Without autocomplete an ngram search is done in addition to the other
fields. This search will find items with 'popular' in the description.
http://127.0.0.1:2464/api/3/search?text=popular

But turning on autocomplete limits to the ngram of the item name.
Nothing
found.
http://127.0.0.1:2464/api/3/search?text=popular&autocomplete=true

These two should find items with names and free text, followed by just
names.
http://127.0.0.1:2464/api/3/search?text=sql
http://127.0.0.1:2464/api/3/search?text=sql&autocomplete=true

This is the entry point used by the search box on the web page.
http://127.0.0.1:2464/api/3/search?text=sql

https://code.launchpad.net/~bac/charmworld/ngram-inquiry/+merge/226910

(do not edit description out of merge proposal)

Please review this at https://codereview.appspot.com/117810044/

Affected files (+74, -9 lines):
   A [revision details]
   M charmworld/search.py
   M charmworld/tests/test_search.py

Index: [revision details]
=== added file '[revision details]'
--- [revision details] 2012-01-01 00:00:00 +0000
+++ [revision details] 2012-01-01 00:00:00 +0000
@@ -0,0 +1,2 @@
+Old revision: tarmac-20140521163857-dgt0onkdb3begqkj
+New revision: <email address hidden>

Index: charmworld/search.py
=== modified file 'charmworld/search.py'
--- charmworld/search.py 2014-05-19 23:44:29 +0000
+++ charmworld/search.py 2014-07-15 19:09:59 +0000
@@ -32,7 +32,6 @@
  ]
  charm_free_text_fields = {
      'name': 10,
- 'ngrams': None,
      'summary': 5,
      'description': 3,
      'config.options.description': None,
@@ -46,7 +45,6 @@
  ]
  bundle_free_text_fields = {
      'name': 10,
- 'ngrams': None,
      'basket_name': 5,
      'description': 3,
      'title': None,
@@ -353,7 +351,7 @@
                  "fields": {
                      "ngrams": {
                          "type": "string",
- "analyzer": "n3_20grams"
+ "analyzer": "n3_20grams",
                      },
                      "name": {
                          "type": "string",
@@ -490,12 +488,13 @@

              charm_dsl = {'filtered': {
                  'query': make_query(
- charm_fields, ['requires.*', 'provides.*']),
- 'filter': {'type': {'value': CHARM}}
+ charm_fields, ['ngrams', 'requires.*', 'provides.*']),
+ 'filter': {'type': {'value': CHARM}},
              }}
              bundle_dsl = {'filtered': {
- 'query': make_query(bundle_fields),
- 'filter': {'type': {'value': BUNDLE}}
+ 'query': make_query(
+ bundle_fields, ['ngrams']),
+ 'filter': {'type': {'value': BUNDLE}},
              }}
              # Union the bundle_dsl and charm_dsl results
              dsl = {'bool': {'should': [bundle_dsl, charm_dsl]}}

Index: charmworld/tests/test_search.py
=== modified file 'charmworld/tests/test_search.py'
--- charmworld/tests/test_search.py 2014-05-14 17:59:37 +0000
+++ charmworld/tests/test_search.py 2014-07-15 19:09:59 +0000
@@ -187,13 +187,33 @@
      def test_ngrams_no_matching_ngrams(self):
          client = self.index_client
          test_value = "blarglefoafblatherinfinatum"
- foo_charm = Charm(self.makeCharm(name=test_value))
- foo_charm = foo_charm # shut it pyflakes
+ Charm(self.makeCharm(name=test_value))
          query = {"query": {"match":
                  {"ngrams": "blvlsdkjfa;lsdkghoifdhvsoidufhvsdececewv"}}}
          results = client._client.search(query, index=client.index_name)
          self.assertEquals(results['hits']['total'], 0)

+ def test_search_charm_ngrams_found(self):
+ client = self.index_client
+ test_value = "blarglefoafblatherinfinatum"
+ Charm(self.makeCharm(name=test_value))
+ results = client.search('foaf')['results']
+ self.assertEqual(1, len(results[CHARM]))
+
+ def test_search_charm_no_matching_ngrams(self):
+ client = self.index_client
+ test_value = "blarglefoafblatherinfinatum"
+ Charm(self.makeCharm(name=test_value))
+ results = client.search('schmoo')['results']
+ self.assertEqual(0, len(results[CHARM]))
+
+ def test_search_charm_ngrams_do_not_match_description(self):
+ client = self.index_client
+ name = "blarglefoafblatherinfinatum"
+ Charm(self.makeCharm(name=name, description='supermoon'))
+ results = client.search('moon')['results']
+ self.assertEqual(0, len(results[CHARM]))
+
      def test_search_bundle(self):
          bundle = Bundle(self.makeBundle())
          client = self.index_client
@@ -325,6 +345,27 @@
              self.assertEqual(2, len(results['bundle']))
              self.assertEqual(0, len(results['charm']))

+ def test_search_bundle_ngrams_found(self):
+ client = self.index_client
+ test_value = "blarglefoafblatherinfinatum"
+ Bundle(self.makeBundle(name=test_value))
+ results = client.search('foaf')['results']
+ self.assertEqual(1, len(results[BUNDLE]))
+
+ def test_search_bundle_no_matching_ngrams(self):
+ client = self.index_client
+ test_value = "blarglefoafblatherinfinatum"
+ Bundle(self.makeBundle(name=test_value))
+ results = client.search('schmoo')['results']
+ self.assertEqual(0, len(results[BUNDLE]))
+
+ def test_search_bundle_ngrams_do_not_match_description(self):
+ client = self.index_client
+ name = "blarglefoafblatherinfinatum"
+ Bundle(self.makeBundle(name=name, description='supermoon'))
+ results = client.search('moon')['results']
+ self.assertEqual(0, len(results[BUNDLE]))
+
      def test_searching_for_charms_only(self):
          charm = Charm(self.makeCharm(description='a mozilla charm'))
          Bundle(self.makeBundle(description='a mozilla bundle'))
@@ -577,6 +618,29 @@
              'bla', autocomplete=True, min_score=0)
          self.assertEquals(result, [])

+ def test_ngram_match(self):
+ # Without autocomplete.
+ charm1 = self.makeCharm(name='foo')
+ charm2 = self.makeCharm(name='foobar')
+ charm3 = self.makeCharm(name='barfoo')
+ result = self.index_client.api_search(
+ 'foo', autocomplete=False, min_score=0)
+ ids = [r['data']['_id'] for r in result]
+ self.assertItemsEqual([
+ charm1['_id'],
+ charm2['_id'],
+ charm3['_id']],
+ ids)
+
+ def test_ngram_miss(self):
+ # Without autocomplete.
+ self.makeCharm(name='foo')
+ self.makeCharm(name='foobar')
+ self.makeCharm(name='barfoo')
+ result = self.index_client.api_search(
+ 'bla', autocomplete=False, min_score=0)
+ self.assertEquals(result, [])
+
      def test_special_characters_return_no_results(self):
          self.makeCharm(name='foo')
          result = self.index_client.api_search(

« Back to merge proposal