Merge lp:~marco-gallotta/ibid/flight into lp:~ibid-core/ibid/old-trunk-1.6

Proposed by marcog
Status: Superseded
Proposed branch: lp:~marco-gallotta/ibid/flight
Merge into: lp:~ibid-core/ibid/old-trunk-1.6
Diff against target: 441 lines (+334/-17)
3 files modified
ibid/plugins/ascii.py (+20/-14)
ibid/plugins/flight.py (+300/-0)
ibid/utils/__init__.py (+14/-3)
To merge this branch: bzr merge lp:~marco-gallotta/ibid/flight
Reviewer Review Type Date Requested Status
Ibid Core Team Pending
Review via email: mp+16961@code.launchpad.net

This proposal has been superseded by a proposal from 2010-01-07.

To post a comment you must log in.
Revision history for this message
marcog (marco-gallotta) wrote :

I think the flight plugin is mature enough for some reviews. It's quite possible some gaping bugs are still out there, but it seems to be robust enough.

Revision history for this message
Stefano Rivera (stefanor) wrote :

Whoops, conflicts in the diff.

lp:~marco-gallotta/ibid/flight updated
858. By marcog

Merge trunk and revert ascii.py (Somehow old changes got muddled into it)

859. By marcog

Reverting ascii to trunk's version...hopefully i got it right *this* time

860. By marcog

Fix typos; allow "airport *for* cape town

861. By marcog

Condense responses into one

862. By marcog

Remove unused imports

863. By marcog

Make use of etree's findtext and path querying

864. By marcog

Merge flight and airport features

865. By marcog

Use dict substitutions for responses

866. By marcog

Catch ValueError thrown by parse() when the date is invalid

867. By marcog

Make strings unicode in places where they weren't; split(' ') if s -> string.split()

868. By marcog

Make parsing a little more resiliant

869. By marcog

strftime needs an ascii string for pre-2.6

870. By marcog

Merge trunk and move flight into geography

871. By marcog

Travelocity changed some form attribute names, this fixes them but flight is still not working

Unmerged revisions

871. By marcog

Travelocity changed some form attribute names, this fixes them but flight is still not working

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'ibid/plugins/ascii.py'
2--- ibid/plugins/ascii.py 2010-01-05 08:00:15 +0000
3+++ ibid/plugins/ascii.py 2010-01-07 11:59:18 +0000
4@@ -1,12 +1,11 @@
5-from BaseHTTPServer import BaseHTTPRequestHandler
6 from cStringIO import StringIO
7 import Image
8 from os import remove
9 import os.path
10 import subprocess
11+from sys import stderr
12 from tempfile import mkstemp
13-from urllib2 import HTTPError, URLError, urlopen
14-from urlparse import urlparse
15+from urllib2 import urlopen
16 from zipfile import ZipFile
17
18 from aalib import AsciiScreen
19@@ -44,6 +43,7 @@
20
21 @match(r'^draw\s+(\S+\.\S+)(\s+in\s+colou?r)?(?:\s+w(?:idth)?\s+(\d+))?(?:\s+h(?:eight)\s+(\d+))?$')
22 def draw(self, event, url, colour, width, height):
23+<<<<<<< TREE
24 if not urlparse(url).netloc:
25 url = 'http://' + url
26 if urlparse(url).scheme == 'file':
27@@ -70,17 +70,26 @@
28 if f.read(1) != '':
29 event.addresponse(u'File too large (limit is %i KiB)', self.max_filesize)
30 return
31+=======
32+ f = urlopen(url)
33+
34+ filesize = int(f.info().getheaders('Content-Length')[0])
35+ if filesize > self.max_filesize * 1024:
36+ event.addresponse(u'File too large (limit is %i KiB)', self.max_filesize)
37+ return
38+
39+>>>>>>> MERGE-SOURCE
40 try:
41 ext = os.path.splitext(url)[1]
42 image = mkstemp(suffix=ext)[1]
43 file = open(image, 'w')
44- file.write(buffer)
45+ file.write(f.read())
46 file.close()
47
48 try:
49 img = Image.open(StringIO(open(image, 'r').read())).convert('L')
50- except IOError:
51- event.addresponse(u"Sorry, that doesn't look like an image")
52+ except:
53+ event.addresponse(u'Cannot understand image format')
54 return
55 input_width, input_height = img.size[0], img.size[1]
56
57@@ -120,8 +129,8 @@
58 def draw_aa(self, event, image, width, height):
59 try:
60 image = Image.open(StringIO(open(image, 'r').read())).convert('L')
61- except IOError:
62- event.addresponse(u"Sorry, that doesn't look like an image")
63+ except:
64+ event.addresponse(u'Cannot understand image format')
65 return
66 screen = AsciiScreen(width=width, height=height)
67 image = image.resize(screen.virtual_size)
68@@ -129,6 +138,7 @@
69 event.addresponse(unicode(screen.render()), address=False, conflate=False)
70
71 def draw_caca(self, event, image, width, height):
72+ from sys import stderr
73 process = subprocess.Popen(
74 [self.img2txt_bin, '-f', 'irc', '-W', str(width), '-H', str(height), image],
75 shell=False, stdout=subprocess.PIPE)
76@@ -137,15 +147,14 @@
77 if code == 0:
78 event.addresponse(unicode(response.replace('\r', '')), address=False, conflate=False)
79 else:
80- event.addresponse(u"Sorry, that doesn't look like an image")
81+ event.addresponse(u'Sorry, cannot understand image format')
82
83 class WriteFiglet(Processor):
84 u"""figlet <text> [in <font>]
85 list figlet fonts [from <index>]"""
86 feature = 'figlet'
87
88- max_width = IntOption('max_width', 'Maximum width for ascii output', 60)
89- fonts_zip = Option('fonts_zip', 'Zip file containing figlet fonts', 'ibid/data/figlet-fonts.zip')
90+ fonts_zip = Option('fonts_zip', 'Zip file containing figlet fonts', 'data/figlet-fonts.zip')
91
92 def __init__(self, name):
93 Processor.__init__(self, name)
94@@ -178,7 +187,4 @@
95 del rendered[0]
96 while rendered and rendered[-1].strip() == '':
97 del rendered[-1]
98- if rendered and len(rendered[0]) > self.max_width:
99- event.addresponse(u"Sorry that's too long, nobody will be able to read it")
100- return
101 event.addresponse(unicode('\n'.join(rendered)), address=False, conflate=False)
102
103=== added file 'ibid/plugins/flight.py'
104--- ibid/plugins/flight.py 1970-01-01 00:00:00 +0000
105+++ ibid/plugins/flight.py 2010-01-07 11:59:18 +0000
106@@ -0,0 +1,300 @@
107+import csv
108+import re
109+from sys import maxint
110+from urllib import urlencode
111+from xml.etree import ElementTree
112+
113+from dateutil.parser import parse
114+
115+from ibid.config import IntOption
116+from ibid.plugins import Processor, match
117+from ibid.utils import cacheable_download, human_join
118+from ibid.utils.html import get_html_parse_tree
119+
120+help = { u'airport' : u'Search for airports',
121+ u'flight' : u'Search for flights on travelocity' }
122+
123+airports_url = 'http://openflights.svn.sourceforge.net/viewvc/openflights/openflights/data/airports.dat'
124+
125+airports = {}
126+
127+def read_data():
128+ # File is listed as ISO 8859-1 (Latin-1) encoded on
129+ # http://openflights.org/data.html, but from decoding it appears to
130+ # actually be UTF8
131+ filename = cacheable_download(airports_url, 'flight/airports.dat')
132+ reader = csv.reader(open(filename), delimiter=',', quotechar='"')
133+ for row in reader:
134+ airports[int(row[0])] = [unicode(r, 'utf-8') for r in row[1:]]
135+
136+def airport_search(query, search_loc = True):
137+ if not airports:
138+ read_data()
139+ if search_loc:
140+ ids = airport_search(query, False)
141+ if len(ids) == 1:
142+ return ids
143+ query = [unicode(q) for q in query.lower().split(' ') if q]
144+ else:
145+ query = [unicode(query.lower())]
146+ ids = []
147+ for id, airport in airports.items():
148+ if search_loc:
149+ data = (u' '.join(c.lower() for c in airport[:5])).split(' ')
150+ elif len(query[0]) == 3:
151+ data = [airport[3].lower()]
152+ else: # assume lenght 4 (won't break if not)
153+ data = [airport[4].lower()]
154+ if len(filter(lambda q: q in data, query)) == len(query):
155+ ids.append(id)
156+ return ids
157+
158+def repr_airport(id):
159+ airport = airports[id]
160+ code = ''
161+ if airport[3] or airport[4]:
162+ code = ' (%s)' % u'/'.join(filter(lambda c: c, airport[3:5]))
163+ return '%s%s' % (airport[0], code)
164+
165+class AirportSearch(Processor):
166+ """airport [in] <name|location|code>"""
167+
168+ feature = 'airport'
169+
170+ @match(r'^airports?\s+(in\s+)?(.+)$')
171+ def airport_search(self, event, search_loc, query):
172+ search_loc = search_loc is not None
173+ if not search_loc and not 3 <= len(query) <= 4:
174+ event.addresponse(u'Airport code must be 3 or 4 characters')
175+ return
176+ ids = airport_search(query, search_loc)
177+ if len(ids) == 0:
178+ event.addresponse(u"Sorry, I don't know that airport")
179+ elif len(ids) == 1:
180+ id = ids[0]
181+ airport = airports[id]
182+ code = 'unknown code'
183+ if airport[3] and airport[4]:
184+ code = 'codes %s and %s' % (airport[3], airport[4])
185+ elif airport[3]:
186+ code = 'code %s' % airport[3]
187+ elif airport[4]:
188+ code = 'code %s' % airport[4]
189+ event.addresponse(u'%s in %s, %s has %s' %
190+ (airport[0], airport[1], airport[2], code))
191+ else:
192+ event.addresponse(u'Found the following airports: %s', human_join(repr_airport(id) for id in ids)[:480])
193+
194+class Flight:
195+ def __init__(self):
196+ self.flight, self.depart_time, self.depart_ap, self.arrive_time, \
197+ self.arrive_ap, self.duration, self.stops, self.price = \
198+ [], None, None, None, None, None, None, None
199+
200+ def int_price(self):
201+ try:
202+ return int(self.price[1:])
203+ except ValueError:
204+ return maxint
205+
206+ def int_duration(self):
207+ hours, minutes = 0, 0
208+ match = re.search(r'(\d+)hr', self.duration)
209+ if match:
210+ hours = int(match.group(1))
211+ match = re.search(r'(\d+)min', self.duration)
212+ if match:
213+ minutes = int(match.group(1))
214+ return int(hours)*60 + int(minutes)
215+
216+MONTH_SHORT = ('Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec')
217+MONTH_LONG = ('January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December')
218+OTHER_STUFF = ('am', 'pm', 'st', 'nd', 'rd', 'th', 'morning', 'afternoon', 'evening', 'anytime')
219+DATE = r'(?:[0-9.:/hT -]|%s)+' % '|'.join(MONTH_SHORT+MONTH_LONG+OTHER_STUFF)
220+
221+class FlightException(Exception):
222+ pass
223+
224+class FlightSearch(Processor):
225+ """[<cheapest|quickest]> flight from <departure> to <destination> from <depart_date> [anytime|morning|afternoon|evening|<time>] to <return_date> [anytime|morning|afternoon|evening|<time>]"""
226+
227+ feature = 'flight'
228+
229+ max_results = IntOption('max_results', 'Maximum number of results to list', 5)
230+
231+ def _flight_search(self, event, dpt, to, dep_date, ret_date):
232+ airport_dpt = airport_search(dpt)
233+ airport_to = airport_search(to)
234+ if len(airport_dpt) == 0:
235+ event.addresponse(u"Sorry, I don't know the airport you want to leave from")
236+ return
237+ if len(airport_to) == 0:
238+ event.addresponse(u"Sorry, I don't know the airport you want to fly to")
239+ return
240+ if len(airport_dpt) > 1:
241+ event.addresponse(u'The following airports match the departure: %s', human_join(repr_airport(id) for id in airport_dpt)[:480])
242+ return
243+ if len(airport_to) > 1:
244+ event.addresponse(u'The following airports match the destination: %s', human_join(repr_airport(id) for id in airport_to)[:480])
245+ return
246+
247+ dpt = airport_dpt[0]
248+ to = airport_to[0]
249+
250+ def to_travelocity_date(date):
251+ date = date.lower()
252+ time = None
253+ for period in ['anytime', 'morning', 'afternoon', 'evening']:
254+ if period in date:
255+ time = period.title()
256+ date = date.replace(period, '')
257+ break
258+ date = parse(date)
259+ if time is None:
260+ if date.hour == 0 and date.minute == 0:
261+ time = 'Anytime'
262+ else:
263+ time = date.strftime('%I:00')
264+ if time[0] == '0':
265+ time = time[1:]
266+ if date.hour < 12:
267+ time += 'am'
268+ else:
269+ time += 'pm'
270+ date = date.strftime('%m/%d/%Y')
271+ return (date, time)
272+
273+ (dep_date, dep_time) = to_travelocity_date(dep_date)
274+ (ret_date, ret_time) = to_travelocity_date(ret_date)
275+
276+ params = {}
277+ params['leavingFrom'] = airports[dpt][3]
278+ params['goingTo'] = airports[to][3]
279+ params['leavingDate'] = dep_date
280+ params['dateLeavingTime'] = dep_time
281+ params['returningDate'] = ret_date
282+ params['dateReturningTime'] = ret_time
283+ etree = get_html_parse_tree('http://travel.travelocity.com/flights/InitialSearch.do', data=urlencode(params), treetype='etree')
284+ while True:
285+ script = [script for script in etree.getiterator('script')][1]
286+ matches = script.text and re.search(r'var finurl = "(.*)"', script.text)
287+ if matches:
288+ url = 'http://travel.travelocity.com/flights/%s' % matches.group(1)
289+ etree = get_html_parse_tree(url, treetype='etree')
290+ else:
291+ break
292+
293+ # Handle error
294+ div = [d for d in etree.getiterator('div') if d.get(u'class') == 'e_content']
295+ if len(div):
296+ error = div[0].find('h3').text
297+ raise FlightException(error)
298+
299+ departing_flights = self._parse_travelocity(etree)
300+ return_url = None
301+ table = [t for t in etree.getiterator('table')][3]
302+ for tr in table.getiterator('tr'):
303+ for td in tr.getiterator('td'):
304+ if td.get(u'class').strip() in ['tfPrice', 'tfPriceOrButton']:
305+ div = td.find('div')
306+ if div is not None:
307+ button = div.find('button')
308+ if button is not None:
309+ onclick = button.get('onclick')
310+ match = re.search(r"location.href='\.\./flights/(.+)'", onclick)
311+ url_page = match.group(1)
312+ match = re.search(r'^(.*?)[^/]*$', url)
313+ url_base = match.group(1)
314+ return_url = url_base + url_page
315+
316+ etree = get_html_parse_tree(return_url, treetype='etree')
317+ returning_flights = self._parse_travelocity(etree)
318+
319+ return (departing_flights, returning_flights, url)
320+
321+ def _parse_travelocity(self, etree):
322+ flights = []
323+ table = [t for t in etree.getiterator('table') if t.get(u'id') == 'tfGrid'][0]
324+ trs = [t for t in table.getiterator('tr')]
325+ tr_index = 1
326+ while tr_index < len(trs):
327+ tds = []
328+ while True:
329+ new_tds = [t for t in trs[tr_index].getiterator('td')]
330+ tds.extend(new_tds)
331+ tr_index += 1
332+ if len(filter(lambda t: t.get(u'class').strip() == u'tfAirlineSeatsMR', new_tds)):
333+ break
334+ flight = Flight()
335+ for td in tds:
336+ if td.get(u'class').strip() == u'tfAirline':
337+ anchor = td.find('a')
338+ if anchor is not None:
339+ airline = anchor.text.strip()
340+ else:
341+ airline = td.text.split('\n')[0].strip()
342+ flight.flight.append(u'%s %s' % (airline, td.find('div').text.strip()))
343+ if td.get(u'class').strip() == u'tfDepart' and td.text:
344+ flight.depart_time = td.text.split('\n')[0].strip()
345+ flight.depart_ap = '%s %s' % (td.find('div').text.strip(),
346+ td.find('div').find('span').text.strip())
347+ if td.get(u'class').strip() == u'tfArrive' and td.text:
348+ flight.arrive_time = td.text.split('\n')[0].strip()
349+ span = td.find('span')
350+ if span is not None and span.get(u'class').strip() == u'tfNextDayDate':
351+ flight.arrive_time = u'%s %s' % (flight.arrive_time, span.text.strip()[2:])
352+ span = [s for s in td.find('div').getiterator('span')][1]
353+ flight.arrive_ap = '%s %s' % (td.find('div').text.strip(),
354+ span.text.strip())
355+ else:
356+ flight.arrive_ap = '%s %s' % (td.find('div').text.strip(),
357+ td.find('div').find('span').text.strip())
358+ if td.get(u'class').strip() == u'tfTime' and td.text:
359+ flight.duration = td.text.strip()
360+ flight.stops = td.find('span').find('a').text.strip()
361+ if td.get(u'class').strip() in [u'tfPrice', u'tfPriceOr'] and td.text:
362+ flight.price = td.text.strip()
363+ flight.flight = human_join(flight.flight)
364+ flights.append(flight)
365+
366+ return flights
367+
368+ @match(r'^(?:(cheapest|quickest)\s+)?flights?\s+from\s+(.+)\s+to\s+(.+)\s+from\s+(%s)\s+to\s+(%s)$' % (DATE, DATE))
369+ def flight_search(self, event, priority, dpt, to, dep_date, ret_date):
370+ try:
371+ flights = self._flight_search(event, dpt, to, dep_date, ret_date)
372+ except FlightException, e:
373+ event.addresponse(unicode(e))
374+ return
375+ if flights is None:
376+ return
377+ if len(flights[0]) == 0:
378+ event.addresponse(u'No matching departure flights found')
379+ return
380+ if len(flights[1]) == 0:
381+ event.addresponse(u'No matching return flights found')
382+ return
383+
384+ cmp = None
385+ if priority == 'cheapest':
386+ cmp = lambda a, b: a.int_price() < b.int_price()
387+ elif priority == 'quickest':
388+ cmp = lambda a, b: a.int_duration() < b.int_duration()
389+ if cmp:
390+ # select best flight based on priority
391+ for i in xrange(2):
392+ flights[i].sort(cmp=cmp)
393+ del flights[i][1:]
394+ for i, flight_type in zip(xrange(2), ['Departing', 'Returning']):
395+ if len(flights[i]) > 1:
396+ event.addresponse(u'%s flights:', flight_type)
397+ for flight in flights[i][:self.max_results]:
398+ leading = ''
399+ if len(flights[i]) == 1:
400+ leading = u'%s flight: ' % flight_type
401+ event.addresponse('%s%s departing %s from %s, arriving %s at %s (flight time %s, %s) costs %s per person',
402+ (leading, flight.flight, flight.depart_time, flight.depart_ap, flight.arrive_time,
403+ flight.arrive_ap, flight.duration, flight.stops, flight.price or 'unknown'))
404+ event.addresponse(u'Full results: %s', flights[2])
405+
406+# vi: set et sta sw=4 ts=4:
407
408=== modified file 'ibid/utils/__init__.py'
409--- ibid/utils/__init__.py 2010-01-06 23:19:42 +0000
410+++ ibid/utils/__init__.py 2010-01-07 11:59:19 +0000
411@@ -80,9 +80,14 @@
412 req.add_header('User-Agent', 'Ibid/' + (ibid_version() or 'dev'))
413
414 if exists:
415- modified = os.path.getmtime(cachefile)
416- modified = time.strftime("%a, %d %b %Y %H:%M:%S GMT", time.gmtime(modified))
417- req.add_header("If-Modified-Since", modified)
418+ if os.path.isfile(cachefile + '.etag'):
419+ f = file(cachefile + '.etag', 'r')
420+ req.add_header("If-None-Match", f.readline().strip())
421+ f.close()
422+ else:
423+ modified = os.path.getmtime(cachefile)
424+ modified = time.strftime("%a, %d %b %Y %H:%M:%S GMT", time.gmtime(modified))
425+ req.add_header("If-Modified-Since", modified)
426
427 try:
428 connection = urllib2.urlopen(req)
429@@ -106,6 +111,12 @@
430 gzipper = GzipFile(fileobj=compressedstream)
431 data = gzipper.read()
432
433+ etag = connection.headers.get('etag')
434+ if etag:
435+ f = file(cachefile + '.etag', 'w')
436+ f.write(etag + '\n')
437+ f.close()
438+
439 outfile = file(cachefile, 'wb')
440 outfile.write(data)
441 outfile.close()

Subscribers

People subscribed via source and target branches