2 # -*- coding: UTF-8 -*-
4 from BeautifulSoup import BeautifulSoup, NavigableString
5 from urllib2 import urlopen
6 from urllib import urlencode
7 from datetime import datetime, time
8 from textwrap import wrap
13 from gotovienna import defaults
15 POSITION_TYPES = ('stop', 'address', 'poi')
17 DEBUGLOG = os.path.expanduser('~/gotoVienna.debug')
19 class ParserError(Exception):
21 def __init__(self, msg='Parser error'):
25 UNKNOWN, CORRECTION, RESULT = range(3)
28 def guess_location_type(location):
29 """Guess type (stop, address, poi) of a location
31 >>> guess_location_type('pilgramgasse')
34 >>> guess_location_type('karlsplatz 14')
37 >>> guess_location_type('reumannplatz 12/34')
40 parts = location.split()
44 # Assume all single-word locations are stops
48 # If the last part is numeric, assume address
49 if last_part.isdigit() and len(parts) > 1:
52 # Addresses with door number (e.g. "12/34")
53 if all(x.isdigit() or x == '/' for x in last_part):
56 # Sane default - assume it's a stop/station name
60 def search(origin_tuple, destination_tuple, dtime=None):
61 """ build route request
62 returns html result (as urllib response)
65 dtime = datetime.now()
67 origin, origin_type = origin_tuple
68 destination, destination_type = destination_tuple
70 if origin_type is None:
71 origin_type = guess_location_type(origin)
72 print 'Guessed origin type:', origin_type
74 if destination_type is None:
75 destination_type = guess_location_type(destination)
76 print 'Guessed destination type:', destination_type
78 if (origin_type not in POSITION_TYPES or
79 destination_type not in POSITION_TYPES):
80 raise ParserError('Invalid position type')
82 post = defaults.search_post
83 post['name_origin'] = origin
84 post['type_origin'] = origin_type
85 post['name_destination'] = destination
86 post['type_destination'] = destination_type
87 post['itdDateDayMonthYear'] = dtime.strftime('%d.%m.%Y')
88 post['itdTime'] = dtime.strftime('%H:%M')
89 params = urlencode(post)
90 url = '%s?%s' % (defaults.action, params)
93 f = open(DEBUGLOG, 'a')
97 print 'Unable to write to DEBUGLOG: %s' % DEBUGLOG
103 """ Parser for search response
106 def __init__(self, html):
107 self.soup = BeautifulSoup(html)
109 def check_page(self):
110 if self.soup.find('form', {'id': 'form_efaresults'}):
111 return PageType.RESULT
113 if self.soup.find('div', {'class':'form_error'}):
114 return PageType.CORRECTION
116 return PageType.UNKNOWN
118 state = property(check_page)
120 def get_correction(self):
121 nlo = self.soup.find('select', {'id': 'nameList_origin'})
122 nld = self.soup.find('select', {'id': 'nameList_destination'})
124 if not nlo and not nld:
125 raise ParserError('Unable to parse html')
128 origin = map(lambda x: x.text, nlo.findAll('option'))
132 destination = map(lambda x: x.text, nld.findAll('option'))
136 return (origin, destination)
138 def get_result(self):
139 return rParser(str(self.soup))
144 """ Parser for routing results
147 def __init__(self, html):
148 self.soup = BeautifulSoup(html)
149 self._overview = None
153 def get_tdtext(cls, x, cl):
154 return x.find('td', {'class': cl}).text
157 def get_change(cls, x):
158 y = rParser.get_tdtext(x, 'col_change')
165 def get_price(cls, x):
166 y = rParser.get_tdtext(x, 'col_price')
170 return float(y.replace(',', '.'))
175 def get_date(cls, x):
176 y = rParser.get_tdtext(x, 'col_date')
178 return datetime.strptime(y, '%d.%m.%Y').date()
183 def get_time(cls, x):
184 y = rParser.get_tdtext(x, 'col_time')
186 if (y.find("-") > 0):
187 return map(lambda z: time(*map(int, z.split(':'))), y.split('-'))
189 return map(lambda z: time(*map(int, z.split(':'))), wrap(y, 5))
194 def get_duration(cls, x):
195 y = rParser.get_tdtext(x, 'col_duration')
197 return time(*map(int, y.split(":")))
202 for detail in self.details():
205 def _parse_details(self):
206 tours = self.soup.findAll('div', {'class': 'data_table tourdetail'})
208 trips = map(lambda x: map(lambda y: {
209 'time': rParser.get_time(y),
210 'station': map(lambda z: z[2:].strip(),
211 filter(lambda x: type(x) == NavigableString, y.find('td', {'class': 'col_station'}).contents)), # filter non NaviStrings
212 'info': map(lambda x: x.strip(),
213 filter(lambda z: type(z) == NavigableString, y.find('td', {'class': 'col_info'}).contents)),
214 }, x.find('tbody').findAll('tr')),
220 """returns list of trip details
221 [ [ { 'time': [datetime.time, datetime.time] if time else [],
222 'station': [u'start', u'end'] if station else [],
223 'info': [u'start station' if station else u'details for walking', u'end station' if station else u'walking duration']
224 }, ... # next trip step
225 ], ... # next trip possibility
228 if not self._details:
229 self._details = self._parse_details()
233 def _parse_overview(self):
236 table = self.soup.find('table', {'id': 'tbl_fahrten'})
238 # check if there is an overview table
239 if table and table.findAll('tr'):
241 rows = table.findAll('tr')[1:] # cut off headline
243 overview = map(lambda x: {
244 'date': rParser.get_date(x),
245 'time': rParser.get_time(x),
246 'duration': rParser.get_duration(x), # grab duration
247 'change': rParser.get_change(x),
248 'price': rParser.get_price(x),
252 raise ParserError('Unable to parse overview')
265 if not self._overview:
267 self._overview = self._parse_overview()
268 except AttributeError:
269 f = open(DEBUGLOG, 'w')
270 f.write(str(self.soup))
273 return self._overview