Merge lp:~marco-gallotta/ibid/flight into lp:~ibid-core/ibid/old-trunk-1.6
- flight
- Merge into old-trunk-1.6
Status: | Superseded | ||||
---|---|---|---|---|---|
Proposed branch: | lp:~marco-gallotta/ibid/flight | ||||
Merge into: | lp:~ibid-core/ibid/old-trunk-1.6 | ||||
Diff against target: |
441 lines (+334/-17) 3 files modified
ibid/plugins/ascii.py (+20/-14) ibid/plugins/flight.py (+300/-0) ibid/utils/__init__.py (+14/-3) |
||||
To merge this branch: | bzr merge lp:~marco-gallotta/ibid/flight | ||||
Related bugs: |
|
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
Ibid Core Team | Pending | ||
Review via email: mp+16961@code.launchpad.net |
This proposal has been superseded by a proposal from 2010-01-07.
Commit message
Description of the change
marcog (marco-gallotta) wrote : | # |
Stefano Rivera (stefanor) wrote : | # |
Whoops, conflicts in the diff.
- 858. By marcog
-
Merge trunk and revert ascii.py (Somehow old changes got muddled into it)
- 859. By marcog
-
Reverting ascii to trunk's version...hopefully i got it right *this* time
- 860. By marcog
-
Fix typos; allow "airport *for* cape town
- 861. By marcog
-
Condense responses into one
- 862. By marcog
-
Remove unused imports
- 863. By marcog
-
Make use of etree's findtext and path querying
- 864. By marcog
-
Merge flight and airport features
- 865. By marcog
-
Use dict substitutions for responses
- 866. By marcog
-
Catch ValueError thrown by parse() when the date is invalid
- 867. By marcog
-
Make strings unicode in places where they weren't; split(' ') if s -> string.split()
- 868. By marcog
-
Make parsing a little more resiliant
- 869. By marcog
-
strftime needs an ascii string for pre-2.6
- 870. By marcog
-
Merge trunk and move flight into geography
- 871. By marcog
-
Travelocity changed some form attribute names, this fixes them but flight is still not working
Preview Diff
1 | === modified file 'ibid/plugins/ascii.py' |
2 | --- ibid/plugins/ascii.py 2010-01-05 08:00:15 +0000 |
3 | +++ ibid/plugins/ascii.py 2010-01-07 11:59:18 +0000 |
4 | @@ -1,12 +1,11 @@ |
5 | -from BaseHTTPServer import BaseHTTPRequestHandler |
6 | from cStringIO import StringIO |
7 | import Image |
8 | from os import remove |
9 | import os.path |
10 | import subprocess |
11 | +from sys import stderr |
12 | from tempfile import mkstemp |
13 | -from urllib2 import HTTPError, URLError, urlopen |
14 | -from urlparse import urlparse |
15 | +from urllib2 import urlopen |
16 | from zipfile import ZipFile |
17 | |
18 | from aalib import AsciiScreen |
19 | @@ -44,6 +43,7 @@ |
20 | |
21 | @match(r'^draw\s+(\S+\.\S+)(\s+in\s+colou?r)?(?:\s+w(?:idth)?\s+(\d+))?(?:\s+h(?:eight)\s+(\d+))?$') |
22 | def draw(self, event, url, colour, width, height): |
23 | +<<<<<<< TREE |
24 | if not urlparse(url).netloc: |
25 | url = 'http://' + url |
26 | if urlparse(url).scheme == 'file': |
27 | @@ -70,17 +70,26 @@ |
28 | if f.read(1) != '': |
29 | event.addresponse(u'File too large (limit is %i KiB)', self.max_filesize) |
30 | return |
31 | +======= |
32 | + f = urlopen(url) |
33 | + |
34 | + filesize = int(f.info().getheaders('Content-Length')[0]) |
35 | + if filesize > self.max_filesize * 1024: |
36 | + event.addresponse(u'File too large (limit is %i KiB)', self.max_filesize) |
37 | + return |
38 | + |
39 | +>>>>>>> MERGE-SOURCE |
40 | try: |
41 | ext = os.path.splitext(url)[1] |
42 | image = mkstemp(suffix=ext)[1] |
43 | file = open(image, 'w') |
44 | - file.write(buffer) |
45 | + file.write(f.read()) |
46 | file.close() |
47 | |
48 | try: |
49 | img = Image.open(StringIO(open(image, 'r').read())).convert('L') |
50 | - except IOError: |
51 | - event.addresponse(u"Sorry, that doesn't look like an image") |
52 | + except: |
53 | + event.addresponse(u'Cannot understand image format') |
54 | return |
55 | input_width, input_height = img.size[0], img.size[1] |
56 | |
57 | @@ -120,8 +129,8 @@ |
58 | def draw_aa(self, event, image, width, height): |
59 | try: |
60 | image = Image.open(StringIO(open(image, 'r').read())).convert('L') |
61 | - except IOError: |
62 | - event.addresponse(u"Sorry, that doesn't look like an image") |
63 | + except: |
64 | + event.addresponse(u'Cannot understand image format') |
65 | return |
66 | screen = AsciiScreen(width=width, height=height) |
67 | image = image.resize(screen.virtual_size) |
68 | @@ -129,6 +138,7 @@ |
69 | event.addresponse(unicode(screen.render()), address=False, conflate=False) |
70 | |
71 | def draw_caca(self, event, image, width, height): |
72 | + from sys import stderr |
73 | process = subprocess.Popen( |
74 | [self.img2txt_bin, '-f', 'irc', '-W', str(width), '-H', str(height), image], |
75 | shell=False, stdout=subprocess.PIPE) |
76 | @@ -137,15 +147,14 @@ |
77 | if code == 0: |
78 | event.addresponse(unicode(response.replace('\r', '')), address=False, conflate=False) |
79 | else: |
80 | - event.addresponse(u"Sorry, that doesn't look like an image") |
81 | + event.addresponse(u'Sorry, cannot understand image format') |
82 | |
83 | class WriteFiglet(Processor): |
84 | u"""figlet <text> [in <font>] |
85 | list figlet fonts [from <index>]""" |
86 | feature = 'figlet' |
87 | |
88 | - max_width = IntOption('max_width', 'Maximum width for ascii output', 60) |
89 | - fonts_zip = Option('fonts_zip', 'Zip file containing figlet fonts', 'ibid/data/figlet-fonts.zip') |
90 | + fonts_zip = Option('fonts_zip', 'Zip file containing figlet fonts', 'data/figlet-fonts.zip') |
91 | |
92 | def __init__(self, name): |
93 | Processor.__init__(self, name) |
94 | @@ -178,7 +187,4 @@ |
95 | del rendered[0] |
96 | while rendered and rendered[-1].strip() == '': |
97 | del rendered[-1] |
98 | - if rendered and len(rendered[0]) > self.max_width: |
99 | - event.addresponse(u"Sorry that's too long, nobody will be able to read it") |
100 | - return |
101 | event.addresponse(unicode('\n'.join(rendered)), address=False, conflate=False) |
102 | |
103 | === added file 'ibid/plugins/flight.py' |
104 | --- ibid/plugins/flight.py 1970-01-01 00:00:00 +0000 |
105 | +++ ibid/plugins/flight.py 2010-01-07 11:59:18 +0000 |
106 | @@ -0,0 +1,300 @@ |
107 | +import csv |
108 | +import re |
109 | +from sys import maxint |
110 | +from urllib import urlencode |
111 | +from xml.etree import ElementTree |
112 | + |
113 | +from dateutil.parser import parse |
114 | + |
115 | +from ibid.config import IntOption |
116 | +from ibid.plugins import Processor, match |
117 | +from ibid.utils import cacheable_download, human_join |
118 | +from ibid.utils.html import get_html_parse_tree |
119 | + |
120 | +help = { u'airport' : u'Search for airports', |
121 | + u'flight' : u'Search for flights on travelocity' } |
122 | + |
123 | +airports_url = 'http://openflights.svn.sourceforge.net/viewvc/openflights/openflights/data/airports.dat' |
124 | + |
125 | +airports = {} |
126 | + |
127 | +def read_data(): |
128 | + # File is listed as ISO 8859-1 (Latin-1) encoded on |
129 | + # http://openflights.org/data.html, but from decoding it appears to |
130 | + # actually be UTF8 |
131 | + filename = cacheable_download(airports_url, 'flight/airports.dat') |
132 | + reader = csv.reader(open(filename), delimiter=',', quotechar='"') |
133 | + for row in reader: |
134 | + airports[int(row[0])] = [unicode(r, 'utf-8') for r in row[1:]] |
135 | + |
136 | +def airport_search(query, search_loc = True): |
137 | + if not airports: |
138 | + read_data() |
139 | + if search_loc: |
140 | + ids = airport_search(query, False) |
141 | + if len(ids) == 1: |
142 | + return ids |
143 | + query = [unicode(q) for q in query.lower().split(' ') if q] |
144 | + else: |
145 | + query = [unicode(query.lower())] |
146 | + ids = [] |
147 | + for id, airport in airports.items(): |
148 | + if search_loc: |
149 | + data = (u' '.join(c.lower() for c in airport[:5])).split(' ') |
150 | + elif len(query[0]) == 3: |
151 | + data = [airport[3].lower()] |
152 | + else: # assume lenght 4 (won't break if not) |
153 | + data = [airport[4].lower()] |
154 | + if len(filter(lambda q: q in data, query)) == len(query): |
155 | + ids.append(id) |
156 | + return ids |
157 | + |
158 | +def repr_airport(id): |
159 | + airport = airports[id] |
160 | + code = '' |
161 | + if airport[3] or airport[4]: |
162 | + code = ' (%s)' % u'/'.join(filter(lambda c: c, airport[3:5])) |
163 | + return '%s%s' % (airport[0], code) |
164 | + |
165 | +class AirportSearch(Processor): |
166 | + """airport [in] <name|location|code>""" |
167 | + |
168 | + feature = 'airport' |
169 | + |
170 | + @match(r'^airports?\s+(in\s+)?(.+)$') |
171 | + def airport_search(self, event, search_loc, query): |
172 | + search_loc = search_loc is not None |
173 | + if not search_loc and not 3 <= len(query) <= 4: |
174 | + event.addresponse(u'Airport code must be 3 or 4 characters') |
175 | + return |
176 | + ids = airport_search(query, search_loc) |
177 | + if len(ids) == 0: |
178 | + event.addresponse(u"Sorry, I don't know that airport") |
179 | + elif len(ids) == 1: |
180 | + id = ids[0] |
181 | + airport = airports[id] |
182 | + code = 'unknown code' |
183 | + if airport[3] and airport[4]: |
184 | + code = 'codes %s and %s' % (airport[3], airport[4]) |
185 | + elif airport[3]: |
186 | + code = 'code %s' % airport[3] |
187 | + elif airport[4]: |
188 | + code = 'code %s' % airport[4] |
189 | + event.addresponse(u'%s in %s, %s has %s' % |
190 | + (airport[0], airport[1], airport[2], code)) |
191 | + else: |
192 | + event.addresponse(u'Found the following airports: %s', human_join(repr_airport(id) for id in ids)[:480]) |
193 | + |
194 | +class Flight: |
195 | + def __init__(self): |
196 | + self.flight, self.depart_time, self.depart_ap, self.arrive_time, \ |
197 | + self.arrive_ap, self.duration, self.stops, self.price = \ |
198 | + [], None, None, None, None, None, None, None |
199 | + |
200 | + def int_price(self): |
201 | + try: |
202 | + return int(self.price[1:]) |
203 | + except ValueError: |
204 | + return maxint |
205 | + |
206 | + def int_duration(self): |
207 | + hours, minutes = 0, 0 |
208 | + match = re.search(r'(\d+)hr', self.duration) |
209 | + if match: |
210 | + hours = int(match.group(1)) |
211 | + match = re.search(r'(\d+)min', self.duration) |
212 | + if match: |
213 | + minutes = int(match.group(1)) |
214 | + return int(hours)*60 + int(minutes) |
215 | + |
216 | +MONTH_SHORT = ('Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec') |
217 | +MONTH_LONG = ('January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December') |
218 | +OTHER_STUFF = ('am', 'pm', 'st', 'nd', 'rd', 'th', 'morning', 'afternoon', 'evening', 'anytime') |
219 | +DATE = r'(?:[0-9.:/hT -]|%s)+' % '|'.join(MONTH_SHORT+MONTH_LONG+OTHER_STUFF) |
220 | + |
221 | +class FlightException(Exception): |
222 | + pass |
223 | + |
224 | +class FlightSearch(Processor): |
225 | + """[<cheapest|quickest]> flight from <departure> to <destination> from <depart_date> [anytime|morning|afternoon|evening|<time>] to <return_date> [anytime|morning|afternoon|evening|<time>]""" |
226 | + |
227 | + feature = 'flight' |
228 | + |
229 | + max_results = IntOption('max_results', 'Maximum number of results to list', 5) |
230 | + |
231 | + def _flight_search(self, event, dpt, to, dep_date, ret_date): |
232 | + airport_dpt = airport_search(dpt) |
233 | + airport_to = airport_search(to) |
234 | + if len(airport_dpt) == 0: |
235 | + event.addresponse(u"Sorry, I don't know the airport you want to leave from") |
236 | + return |
237 | + if len(airport_to) == 0: |
238 | + event.addresponse(u"Sorry, I don't know the airport you want to fly to") |
239 | + return |
240 | + if len(airport_dpt) > 1: |
241 | + event.addresponse(u'The following airports match the departure: %s', human_join(repr_airport(id) for id in airport_dpt)[:480]) |
242 | + return |
243 | + if len(airport_to) > 1: |
244 | + event.addresponse(u'The following airports match the destination: %s', human_join(repr_airport(id) for id in airport_to)[:480]) |
245 | + return |
246 | + |
247 | + dpt = airport_dpt[0] |
248 | + to = airport_to[0] |
249 | + |
250 | + def to_travelocity_date(date): |
251 | + date = date.lower() |
252 | + time = None |
253 | + for period in ['anytime', 'morning', 'afternoon', 'evening']: |
254 | + if period in date: |
255 | + time = period.title() |
256 | + date = date.replace(period, '') |
257 | + break |
258 | + date = parse(date) |
259 | + if time is None: |
260 | + if date.hour == 0 and date.minute == 0: |
261 | + time = 'Anytime' |
262 | + else: |
263 | + time = date.strftime('%I:00') |
264 | + if time[0] == '0': |
265 | + time = time[1:] |
266 | + if date.hour < 12: |
267 | + time += 'am' |
268 | + else: |
269 | + time += 'pm' |
270 | + date = date.strftime('%m/%d/%Y') |
271 | + return (date, time) |
272 | + |
273 | + (dep_date, dep_time) = to_travelocity_date(dep_date) |
274 | + (ret_date, ret_time) = to_travelocity_date(ret_date) |
275 | + |
276 | + params = {} |
277 | + params['leavingFrom'] = airports[dpt][3] |
278 | + params['goingTo'] = airports[to][3] |
279 | + params['leavingDate'] = dep_date |
280 | + params['dateLeavingTime'] = dep_time |
281 | + params['returningDate'] = ret_date |
282 | + params['dateReturningTime'] = ret_time |
283 | + etree = get_html_parse_tree('http://travel.travelocity.com/flights/InitialSearch.do', data=urlencode(params), treetype='etree') |
284 | + while True: |
285 | + script = [script for script in etree.getiterator('script')][1] |
286 | + matches = script.text and re.search(r'var finurl = "(.*)"', script.text) |
287 | + if matches: |
288 | + url = 'http://travel.travelocity.com/flights/%s' % matches.group(1) |
289 | + etree = get_html_parse_tree(url, treetype='etree') |
290 | + else: |
291 | + break |
292 | + |
293 | + # Handle error |
294 | + div = [d for d in etree.getiterator('div') if d.get(u'class') == 'e_content'] |
295 | + if len(div): |
296 | + error = div[0].find('h3').text |
297 | + raise FlightException(error) |
298 | + |
299 | + departing_flights = self._parse_travelocity(etree) |
300 | + return_url = None |
301 | + table = [t for t in etree.getiterator('table')][3] |
302 | + for tr in table.getiterator('tr'): |
303 | + for td in tr.getiterator('td'): |
304 | + if td.get(u'class').strip() in ['tfPrice', 'tfPriceOrButton']: |
305 | + div = td.find('div') |
306 | + if div is not None: |
307 | + button = div.find('button') |
308 | + if button is not None: |
309 | + onclick = button.get('onclick') |
310 | + match = re.search(r"location.href='\.\./flights/(.+)'", onclick) |
311 | + url_page = match.group(1) |
312 | + match = re.search(r'^(.*?)[^/]*$', url) |
313 | + url_base = match.group(1) |
314 | + return_url = url_base + url_page |
315 | + |
316 | + etree = get_html_parse_tree(return_url, treetype='etree') |
317 | + returning_flights = self._parse_travelocity(etree) |
318 | + |
319 | + return (departing_flights, returning_flights, url) |
320 | + |
321 | + def _parse_travelocity(self, etree): |
322 | + flights = [] |
323 | + table = [t for t in etree.getiterator('table') if t.get(u'id') == 'tfGrid'][0] |
324 | + trs = [t for t in table.getiterator('tr')] |
325 | + tr_index = 1 |
326 | + while tr_index < len(trs): |
327 | + tds = [] |
328 | + while True: |
329 | + new_tds = [t for t in trs[tr_index].getiterator('td')] |
330 | + tds.extend(new_tds) |
331 | + tr_index += 1 |
332 | + if len(filter(lambda t: t.get(u'class').strip() == u'tfAirlineSeatsMR', new_tds)): |
333 | + break |
334 | + flight = Flight() |
335 | + for td in tds: |
336 | + if td.get(u'class').strip() == u'tfAirline': |
337 | + anchor = td.find('a') |
338 | + if anchor is not None: |
339 | + airline = anchor.text.strip() |
340 | + else: |
341 | + airline = td.text.split('\n')[0].strip() |
342 | + flight.flight.append(u'%s %s' % (airline, td.find('div').text.strip())) |
343 | + if td.get(u'class').strip() == u'tfDepart' and td.text: |
344 | + flight.depart_time = td.text.split('\n')[0].strip() |
345 | + flight.depart_ap = '%s %s' % (td.find('div').text.strip(), |
346 | + td.find('div').find('span').text.strip()) |
347 | + if td.get(u'class').strip() == u'tfArrive' and td.text: |
348 | + flight.arrive_time = td.text.split('\n')[0].strip() |
349 | + span = td.find('span') |
350 | + if span is not None and span.get(u'class').strip() == u'tfNextDayDate': |
351 | + flight.arrive_time = u'%s %s' % (flight.arrive_time, span.text.strip()[2:]) |
352 | + span = [s for s in td.find('div').getiterator('span')][1] |
353 | + flight.arrive_ap = '%s %s' % (td.find('div').text.strip(), |
354 | + span.text.strip()) |
355 | + else: |
356 | + flight.arrive_ap = '%s %s' % (td.find('div').text.strip(), |
357 | + td.find('div').find('span').text.strip()) |
358 | + if td.get(u'class').strip() == u'tfTime' and td.text: |
359 | + flight.duration = td.text.strip() |
360 | + flight.stops = td.find('span').find('a').text.strip() |
361 | + if td.get(u'class').strip() in [u'tfPrice', u'tfPriceOr'] and td.text: |
362 | + flight.price = td.text.strip() |
363 | + flight.flight = human_join(flight.flight) |
364 | + flights.append(flight) |
365 | + |
366 | + return flights |
367 | + |
368 | + @match(r'^(?:(cheapest|quickest)\s+)?flights?\s+from\s+(.+)\s+to\s+(.+)\s+from\s+(%s)\s+to\s+(%s)$' % (DATE, DATE)) |
369 | + def flight_search(self, event, priority, dpt, to, dep_date, ret_date): |
370 | + try: |
371 | + flights = self._flight_search(event, dpt, to, dep_date, ret_date) |
372 | + except FlightException, e: |
373 | + event.addresponse(unicode(e)) |
374 | + return |
375 | + if flights is None: |
376 | + return |
377 | + if len(flights[0]) == 0: |
378 | + event.addresponse(u'No matching departure flights found') |
379 | + return |
380 | + if len(flights[1]) == 0: |
381 | + event.addresponse(u'No matching return flights found') |
382 | + return |
383 | + |
384 | + cmp = None |
385 | + if priority == 'cheapest': |
386 | + cmp = lambda a, b: a.int_price() < b.int_price() |
387 | + elif priority == 'quickest': |
388 | + cmp = lambda a, b: a.int_duration() < b.int_duration() |
389 | + if cmp: |
390 | + # select best flight based on priority |
391 | + for i in xrange(2): |
392 | + flights[i].sort(cmp=cmp) |
393 | + del flights[i][1:] |
394 | + for i, flight_type in zip(xrange(2), ['Departing', 'Returning']): |
395 | + if len(flights[i]) > 1: |
396 | + event.addresponse(u'%s flights:', flight_type) |
397 | + for flight in flights[i][:self.max_results]: |
398 | + leading = '' |
399 | + if len(flights[i]) == 1: |
400 | + leading = u'%s flight: ' % flight_type |
401 | + event.addresponse('%s%s departing %s from %s, arriving %s at %s (flight time %s, %s) costs %s per person', |
402 | + (leading, flight.flight, flight.depart_time, flight.depart_ap, flight.arrive_time, |
403 | + flight.arrive_ap, flight.duration, flight.stops, flight.price or 'unknown')) |
404 | + event.addresponse(u'Full results: %s', flights[2]) |
405 | + |
406 | +# vi: set et sta sw=4 ts=4: |
407 | |
408 | === modified file 'ibid/utils/__init__.py' |
409 | --- ibid/utils/__init__.py 2010-01-06 23:19:42 +0000 |
410 | +++ ibid/utils/__init__.py 2010-01-07 11:59:19 +0000 |
411 | @@ -80,9 +80,14 @@ |
412 | req.add_header('User-Agent', 'Ibid/' + (ibid_version() or 'dev')) |
413 | |
414 | if exists: |
415 | - modified = os.path.getmtime(cachefile) |
416 | - modified = time.strftime("%a, %d %b %Y %H:%M:%S GMT", time.gmtime(modified)) |
417 | - req.add_header("If-Modified-Since", modified) |
418 | + if os.path.isfile(cachefile + '.etag'): |
419 | + f = file(cachefile + '.etag', 'r') |
420 | + req.add_header("If-None-Match", f.readline().strip()) |
421 | + f.close() |
422 | + else: |
423 | + modified = os.path.getmtime(cachefile) |
424 | + modified = time.strftime("%a, %d %b %Y %H:%M:%S GMT", time.gmtime(modified)) |
425 | + req.add_header("If-Modified-Since", modified) |
426 | |
427 | try: |
428 | connection = urllib2.urlopen(req) |
429 | @@ -106,6 +111,12 @@ |
430 | gzipper = GzipFile(fileobj=compressedstream) |
431 | data = gzipper.read() |
432 | |
433 | + etag = connection.headers.get('etag') |
434 | + if etag: |
435 | + f = file(cachefile + '.etag', 'w') |
436 | + f.write(etag + '\n') |
437 | + f.close() |
438 | + |
439 | outfile = file(cachefile, 'wb') |
440 | outfile.write(data) |
441 | outfile.close() |
I think the flight plugin is mature enough for some reviews. It's quite possible some gaping bugs are still out there, but it seems to be robust enough.