Merge lp:~martin-borho/ubuntu-geonames/wildcard-and-geopoint-searches into lp:ubuntu-geonames

Proposed by Martin Borho on 2015-02-28
Status: Needs review
Proposed branch: lp:~martin-borho/ubuntu-geonames/wildcard-and-geopoint-searches
Merge into: lp:ubuntu-geonames
Diff against target: 329 lines (+225/-48)
2 files modified
geoname-modpython.py (+204/-45)
sphinx.conf (+21/-3)
To merge this branch: bzr merge lp:~martin-borho/ubuntu-geonames/wildcard-and-geopoint-searches
Reviewer Review Type Date Requested Status
Alan Pope 🍺🐧🐱 πŸ¦„ 2015-02-28 Pending
Review via email: mp+251364@code.launchpad.net

Commit Message

* possibility to search a location by its coordinates
* wildcard searches

Description of the Change

Based on MP: https://code.launchpad.net/~twstd-dev/ubuntu-geonames/search-wildcard/+merge/234578

This MP should consist of a possibility to search a location by its coordinates:

http://146.185.188.87/?lat=40.714270&long=-74.005970

and the possibility to do wildcard-searches by name:

http://146.185.188.87/?query=Stuttga*

The asterisk has to be part of the given search term, therefore default behaviour is not changed:

http://146.185.188.87/?query=Stuttga (no wildcard search, no hits)

To post a comment you must log in.

Unmerged revisions

37. By Martin Borho on 2015-02-28

making wildcard search optional, by adding * to the search term

36. By twstd on 2014-09-13

Align attributes so indexer would not throw error

35. By twstd on 2014-09-13

Add wildcard search

34. By twstd on 2014-09-03

Increase connection timeout

33. By twstd on 2014-09-03

Fix sphinx client isntance being shared between classes

32. By twstd on 2014-08-28

Reduce timeout value

31. By twstd on 2014-08-28

Add search by coordinates

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'geoname-modpython.py'
2--- geoname-modpython.py 2014-08-14 08:49:43 +0000
3+++ geoname-modpython.py 2015-02-28 13:50:19 +0000
4@@ -2,10 +2,12 @@
5 from mod_python import apache
6 import sphinxapi
7 import psycopg2
8+from math import pi
9+from math import radians
10 try:
11- from config import authstring
12+ from config import authstring
13 except ImportError:
14- authstring = 'dbname=geonames user=geouser password=geopw host=localhost'
15+ authstring = 'dbname=geonames user=geouser password=geopw host=localhost'
16
17 statement = """
18 SELECT
19@@ -44,48 +46,205 @@
20 jsonheader = '['
21 jsonfooter = ']'
22 jsonentry = '{"name" : "%s", "admin1" : "%s", "admin2" : "%s", "country" : "%s", ' \
23- '"longitude" : "%F", "latitude" : "%F" , '"timezone" : "%s" }'
24+ '"longitude" : "%F", "latitude" : "%F" , "timezone" : "%s" }'
25+
26+class RequestVariables:
27+ """
28+ A very simple wrapper to handle incoming GET parameters.
29+ """
30+ def __init__( self, request ):
31+ self.__fields = util.FieldStorage( request )
32+
33+ def contains( self, variable_name ):
34+ """
35+ Checks if parsed field list contains given parameter.
36+ """
37+ return variable_name in self.__fields
38+
39+ def get( self, variable_name ):
40+ """
41+ Returns given variable value.
42+ """
43+ return self.__fields[ variable_name ] if self.contains( variable_name ) else ""
44+
45+class SphinxQuery:
46+ """
47+ Implements a parent class to hold a query to a sphinx engine.
48+ """
49+ __hostname = "localhost"
50+ __port = 3312
51+ query_keyword = ""
52+ client = None
53+ search_index = "geonames"
54+
55+ def __init__( self ):
56+ self.client = sphinxapi.SphinxClient()
57+ self.initialize_client()
58+
59+ def initialize_client( self ):
60+ """
61+ Prepares sphinx client.
62+ """
63+ self.client.SetServer( self.__hostname, self.__port )
64+
65+ def execute( self ):
66+ try:
67+ result = self.client.Query( self.query_keyword, self.search_index )
68+ return result[ "matches" ] if "matches" in result else []
69+ except:
70+ return []
71+
72+class TextQuery( SphinxQuery ):
73+ """
74+ Implements a simple text based query.
75+ """
76+ def __init__( self, query ):
77+ SphinxQuery.__init__( self )
78+ self.client.SetSortMode( sphinxapi.SPH_SORT_ATTR_DESC, 'population' )
79+ self.query_keyword = "%s" % query
80+
81+
82+ def execute( self ):
83+ return ArrayQueryResult( SphinxQuery.execute( self ) )
84+
85+class CoordinatesBasedQuery( SphinxQuery ):
86+ # default is not enough to search by coordinates
87+ __connection_timeout = 10.00
88+ # set to 5km
89+ __radius = 5000.00
90+
91+ """
92+ Implements a search by latitude and longitude coordinates.
93+ """
94+ def __init__( self, latitude, longitude ):
95+ SphinxQuery.__init__( self )
96+ self.search_index = "geonamescoordinates"
97+ self.client.SetConnectTimeout( self.__connection_timeout )
98+ self.client.SetLimits( 0, 1 )
99+ self.client.SetGeoAnchor(
100+ "latitude",
101+ "longitude",
102+ convert_to_radians( latitude ),
103+ convert_to_radians( longitude ) )
104+ self.client.SetFilterFloatRange( "@geodist", 0.0, self.__radius )
105+ self.client.SetSortMode( sphinxapi.SPH_SORT_EXTENDED, "@geodist ASC" )
106+
107+
108+ def execute( self ):
109+ return SingleQueryResult( SphinxQuery.execute( self ) )
110+
111+class QueryResult:
112+ __result = []
113+
114+ """
115+ Implements a structure for query results.
116+ """
117+ def __init__( self, result ):
118+ self.__result = result
119+
120+ def get( self ):
121+ return self.__result
122+
123+class SingleQueryResult( QueryResult ):
124+ """
125+ Represents result containing only one item.
126+ """
127+ pass
128+
129+class ArrayQueryResult( QueryResult ):
130+ """
131+ Represents result containing a list of items.
132+ """
133+ pass
134+
135+
136+def prepare_request( request ):
137+ """
138+ Sets required headers to the given request.
139+ """
140+ request.content_type = "application/json"
141+ return request
142+
143+def convert_to_radians( degrees ):
144+ parsed_value = 0.0
145+
146+ try:
147+ parsed_value = float( degrees )
148+ except:
149+ pass
150+
151+ return radians( parsed_value )
152+
153+def parse_query_result( query_result ):
154+ """
155+ Returns json ready string formed from the given result.
156+ """
157+ result = query_result.get()
158+
159+ ret = []
160+ if result:
161+ connection = psycopg2.connect(authstring)
162+ cursor = connection.cursor()
163+ try:
164+ # We need at least one value for the sql in operator
165+ # and there are no locations with id 0
166+ statement_ids = ['0']
167+ altstatement_ids = ['0']
168+ for x in result:
169+ rawid = x['id']
170+ idval = rawid / 10
171+ idtype = rawid % 10
172+ if idtype == 1:
173+ statement_ids.append(str(idval))
174+ else:
175+ altstatement_ids.append(str(idval))
176+
177+ statement_ids_str = '(' + ','.join(statement_ids) + ')'
178+ altstatement_ids_str = '(' + ','.join(altstatement_ids) + ')'
179+ fullstatement = statement % (statement_ids_str,
180+ altstatement_ids_str)
181+ cursor.execute(fullstatement)
182+ records = cursor.fetchall()
183+ for record in records:
184+ record = tuple([f or '' for f in record])
185+ # Do not expose population column
186+ ret.append(jsonentry % record[:-1])
187+ finally:
188+ cursor.close()
189+ connection.close()
190+
191+
192+ if isinstance( query_result, SingleQueryResult ):
193+ return "".join( ret )
194+
195+ return ( jsonheader + ",".join( ret ) + jsonfooter )
196+
197+
198+
199+class QueryFactory:
200+ """
201+ Returns a query object if known values were found.
202+ """
203+ @staticmethod
204+ def create( request ):
205+ variable_list = RequestVariables( request )
206+
207+ if variable_list.contains( "query" ):
208+ return TextQuery( variable_list.get( "query" ) )
209+ elif variable_list.contains( "lat" ) and variable_list.contains( "long" ):
210+ return CoordinatesBasedQuery( variable_list.get( "lat" ), variable_list.get( "long" ) )
211+ else:
212+ return None
213+
214
215 def handler(req):
216- fs = util.FieldStorage(req)
217- req.content_type = 'application/json'
218- if 'query' in fs:
219- client = sphinxapi.SphinxClient()
220- client.SetServer('localhost', 3312)
221- client.SetSortMode(sphinxapi.SPH_SORT_ATTR_DESC, 'population')
222- result = client.Query(fs['query'])
223- if result:
224- result = result['matches']
225- ret = []
226- if result:
227- connection = psycopg2.connect(authstring)
228- cursor = connection.cursor()
229- try:
230- # We need at least one value for the sql in operator
231- # and there are no locations with id 0
232- statement_ids = ['0']
233- altstatement_ids = ['0']
234- for x in result:
235- rawid = x['id']
236- idval = rawid / 10
237- idtype = rawid % 10
238- if idtype == 1:
239- statement_ids.append(str(idval))
240- else:
241- altstatement_ids.append(str(idval))
242-
243- statement_ids_str = '(' + ','.join(statement_ids) + ')'
244- altstatement_ids_str = '(' + ','.join(altstatement_ids) + ')'
245- fullstatement = statement % (statement_ids_str,
246- altstatement_ids_str)
247- cursor.execute(fullstatement)
248- records = cursor.fetchall()
249- for record in records:
250- record = tuple([f or '' for f in record])
251- # Do not expose population column
252- ret.append(jsonentry % record[:-1])
253- finally:
254- cursor.close()
255- connection.close()
256- req.write(jsonheader + ', '.join(ret) + jsonfooter)
257- return apache.OK
258+ query = QueryFactory.create( req )
259+
260+ if query:
261+ result = query.execute()
262+
263+ req = prepare_request( req )
264+ #req.write( str( result.get() ) )
265+ req.write( parse_query_result( result ) )
266+
267+ return apache.OK
268
269=== modified file 'sphinx.conf'
270--- sphinx.conf 2012-11-21 12:35:46 +0000
271+++ sphinx.conf 2015-02-28 13:50:19 +0000
272@@ -5,8 +5,10 @@
273 sql_user = geouser
274 sql_pass = geopw
275 sql_db = geonames
276-sql_query = SELECT geoname.geonameid*10+1 AS id, geoname.name AS name, geoname.population AS population, geoname.geonameid AS geonameid FROM geoname WHERE geoname.fclass='P'
277+sql_query = SELECT geoname.geonameid*10+1 AS id, geoname.name AS name, geoname.population AS population, geoname.geonameid AS geonameid, radians(geoname.latitude) AS latitude, radians(geoname.longitude) AS longitude FROM geoname WHERE geoname.fclass='P'
278 sql_attr_uint = population
279+sql_attr_float = latitude
280+sql_attr_float = longitude
281 }
282
283 source altnamessrc
284@@ -16,12 +18,15 @@
285 sql_user = geouser
286 sql_pass = geopw
287 sql_db = geonames
288-sql_query = SELECT alternatename.alternatenameId*10+2 AS id, alternatename.alternateName AS name, geoname.population AS population, geoname.geonameid AS geonameid FROM alternatename JOIN geoname on (geoname.geonameid = alternatename.geonameid AND geoname.name != alternatename.alternateName) WHERE (geoname.fclass='P' AND (LENGTH(alternatename.isoLanguage) < 4 OR alternatename.isoLanguage is null))
289+sql_query = SELECT alternatename.alternatenameId*10+2 AS id, alternatename.alternateName AS name, geoname.population AS population, geoname.geonameid AS geonameid, radians(geoname.latitude) AS latitude, radians(geoname.longitude) AS longitude FROM alternatename JOIN geoname on (geoname.geonameid = alternatename.geonameid AND geoname.name != alternatename.alternateName) WHERE (geoname.fclass='P' AND (LENGTH(alternatename.isoLanguage) < 4 OR alternatename.isoLanguage is null))
290 sql_attr_uint = population
291+sql_attr_float = latitude
292+sql_attr_float = longitude
293 }
294
295 index geonames
296 {
297+ enable_star = 1
298 source = geonamessrc
299 source = altnamessrc
300 path = /var/lib/sphinxsearch/data/geonames
301@@ -29,6 +34,19 @@
302 morphology = none
303 stopwords =
304 min_word_len = 2
305+ min_prefix_len = 3
306+ min_infix_len = 0
307+ charset_type = utf-8
308+ charset_table = U+FF10..U+FF19->0..9, 0..9, U+FF41..U+FF5A->a..z, U+FF21..U+FF3A->a..z,A..Z->a..z, a..z, U+0149, U+017F, U+0138, U+00DF, U+00FF, U+00C0..U+00D6->U+00E0..U+00F6,U+00E0..U+00F6, U+00D8..U+00DE->U+00F8..U+00FE, U+00F8..U+00FE, U+0100->U+0101, U+0101, U+0102->U+0103, U+0103, U+0104->U+0105, U+0105, U+0106->U+0107, U+0107, U+0108->U+0109, U+0109, U+010A->U+010B, U+010B, U+010C->U+010D, U+010D, U+010E->U+010F, U+010F, U+0110->U+0111, U+0111, U+0112->U+0113, U+0113, U+0114->U+0115, U+0115, U+0116->U+0117, U+0117, U+0118->U+0119, U+0119, U+011A->U+011B, U+011B, U+011C->U+011D, U+011D, U+011E->U+011F, U+011F, U+0130->U+0131, U+0131, U+0132->U+0133, U+0133, U+0134->U+0135, U+0135, U+0136->U+0137, U+0137, U+0139->U+013A, U+013A, U+013B->U+013C, U+013C, U+013D->U+013E, U+013E, U+013F->U+0140, U+0140, U+0141->U+0142, U+0142, U+0143->U+0144, U+0144, U+0145->U+0146, U+0146, U+0147->U+0148, U+0148, U+014A->U+014B, U+014B, U+014C->U+014D, U+014D, U+014E->U+014F, U+014F, U+0150->U+0151, U+0151, U+0152->U+0153, U+0153, U+0154->U+0155, U+0155, U+0156->U+0157, U+0157, U+0158->U+0159, U+0159, U+015A->U+015B, U+015B, U+015C->U+015D, U+015D, U+015E->U+015F, U+015F, U+0160->U+0161, U+0161, U+0162->U+0163, U+0163, U+0164->U+0165, U+0165, U+0166->U+0167, U+0167, U+0168->U+0169, U+0169, U+016A->U+016B, U+016B, U+016C->U+016D, U+016D, U+016E->U+016F, U+016F, U+0170->U+0171, U+0171, U+0172->U+0173, U+0173, U+0174->U+0175, U+0175, U+0176->U+0177, U+0177, U+0178->U+00FF, U+00FF, U+0179->U+017A, U+017A, U+017B->U+017C, U+017C, U+017D->U+017E, U+017E, U+4E00..U+9FFF, U+3000..U+30FF
309+}
310+index geonamescoordinates
311+{
312+ source = geonamessrc
313+ path = /var/lib/sphinxsearch/data/geonamescoordinates
314+ docinfo = extern
315+ morphology = none
316+ stopwords =
317+ min_word_len = 2
318 min_prefix_len = 0
319 min_infix_len = 0
320 charset_type = utf-8
321@@ -39,7 +57,7 @@
322 port = 3312
323 log = /var/log/sphinxsearch/searchd.log
324 query_log = /var/log/sphinxsearch/query.log
325-read_timeout = 5
326+read_timeout = 10
327 max_children = 30
328 pid_file = /var/run/searchd.pid
329 max_matches = 1000

Subscribers

People subscribed via source and target branches

to all changes: