2 # -*- coding: UTF-8 -*-
4 from BeautifulSoup import BeautifulSoup, NavigableString
5 from urllib2 import urlopen
6 from urllib import urlencode
7 from datetime import datetime, time, timedelta
8 from textwrap import wrap
14 from gotovienna import defaults
16 POSITION_TYPES = ('stop', 'address', 'poi')
18 DEBUGLOG = os.path.expanduser('~/gotoVienna.debug')
20 class ParserError(Exception):
22 def __init__(self, msg='Parser error'):
26 UNKNOWN, CORRECTION, RESULT = range(3)
29 def extract_city(station):
30 """ Extract city from string if present,
31 else return default city
33 >>> extract_city('Karlsplatz, Wien')
36 if len(station.split(',')) > 1:
37 return station.split(',')[-1].strip()
41 def extract_station(station):
42 """ Remove city from string
44 >>> extract_station('Karlsplatz, Wien')
47 if len(station.split(',')) > 1:
48 return station[:station.rindex(',')].strip()
52 def split_station(station):
53 """ >>> split_station('Karlsplatz, Wien')
54 ('Karlsplatz', 'Wien')
55 >>> split_station('Karlsplatz')
56 ('Karlsplatz', 'Wien')
58 if len(station.split(',')) > 1:
59 return (station[:station.rindex(',')].strip(), station.split(',')[-1].strip())
61 return (station, 'Wien')
63 def search(origin_tuple, destination_tuple, dtime=None):
64 """ build route request
65 returns html result (as urllib response)
68 dtime = datetime.now()
70 origin, origin_type = origin_tuple
71 origin, origin_city = split_station(origin)
73 destination, destination_type = destination_tuple
74 destination, destination_city = split_station(destination)
77 if not origin_type in POSITION_TYPES or\
78 not destination_type in POSITION_TYPES:
79 raise ParserError('Invalid position type')
81 post = defaults.search_post
82 post['name_origin'] = origin
83 post['type_origin'] = origin_type
84 post['name_destination'] = destination
85 post['type_destination'] = destination_type
86 post['itdDateDayMonthYear'] = dtime.strftime('%d.%m.%Y')
87 post['itdTime'] = dtime.strftime('%H:%M')
88 post['place_origin'] = origin_city
89 post['place_destination'] = destination_city
90 params = urlencode(post)
91 url = '%s?%s' % (defaults.action, params)
94 f = open(DEBUGLOG, 'a')
98 print 'Unable to write to DEBUGLOG: %s' % DEBUGLOG
104 """ Parser for search response
107 def __init__(self, html):
108 self.soup = BeautifulSoup(html)
110 def check_page(self):
111 if self.soup.find('form', {'id': 'form_efaresults'}):
112 return PageType.RESULT
114 if self.soup.find('div', {'class':'form_error'}):
115 return PageType.CORRECTION
117 return PageType.UNKNOWN
119 def get_correction(self):
120 names_origin = self.soup.find('select', {'id': 'nameList_origin'})
121 names_destination = self.soup.find('select', {'id': 'nameList_destination'})
122 places_origin = self.soup.find('select', {'id': 'placeList_origin'})
123 places_destination = self.soup.find('select', {'id': 'placeList_destination'})
126 if names_origin or names_destination or places_origin or places_destination:
130 dict['origin'] = map(lambda x: x.text, names_origin.findAll('option'))
131 if names_destination:
132 dict['destination'] = map(lambda x: x.text, names_destination.findAll('option'))
135 dict['place_origin'] = map(lambda x: x.text, names_origin.findAll('option'))
136 if names_destination:
137 dict['place_destination'] = map(lambda x: x.text, names_destination.findAll('option'))
142 raise ParserError('Unable to parse html')
144 def get_result(self):
145 return rParser(str(self.soup))
150 """ Parser for routing results
153 def __init__(self, html):
154 self.soup = BeautifulSoup(html)
155 self._overview = None
159 def get_tdtext(cls, x, cl):
160 return x.find('td', {'class': cl}).text
163 def get_change(cls, x):
164 y = rParser.get_tdtext(x, 'col_change')
171 def get_price(cls, x):
172 y = rParser.get_tdtext(x, 'col_price')
176 return float(y.replace(',', '.'))
181 def get_date(cls, x):
182 y = rParser.get_tdtext(x, 'col_date')
184 return datetime.strptime(y, '%d.%m.%Y').date()
189 def get_datetime(cls, x):
190 y = rParser.get_tdtext(x, 'col_time')
192 if (y.find("-") > 0):
194 times = map(lambda z: time(*map(int, z.split(':'))), y.split('-'))
195 d = rParser.get_date(x)
196 from_dtime = datetime.combine(d, times[0])
197 if times[0] > times[1]:
199 to_dtime = datetime.combine(d + timedelta(1), times[1])
201 to_dtime = datetime.combine(d, times[1])
203 return [from_dtime, to_dtime]
206 dtregex = {'date' : '\d\d\.\d\d',
209 regex = "\s*(?P<date1>{date})?\s*(?P<time1>{time})\s*(?P<date2>{date})?\s*(?P<time2>{time})\s*".format(**dtregex)
210 ma = re.match(regex, y)
217 def extract_datetime(gr, n):
218 if 'date%d' % n in gr and gr['date%d' % n]:
219 from_dtime = datetime.strptime(str(datetime.today().year) + gr['date%d' % n] + gr['time%d' % n], '%Y%d.%m.%H:%M')
221 t = datetime.strptime(gr['time%d' % n], '%H:%M').time()
222 d = datetime.today().date()
223 return datetime.combine(d, t)
226 from_dtime = extract_datetime(gr, 1)
227 to_dtime = extract_datetime(gr, 2)
229 return [from_dtime, to_dtime]
235 for detail in self.details():
238 def _parse_details(self):
239 tours = self.soup.findAll('div', {'class': 'data_table tourdetail'})
241 trips = map(lambda x: map(lambda y: {
242 'timespan': rParser.get_datetime(y),
243 'station': map(lambda z: z[2:].strip(),
244 filter(lambda x: type(x) == NavigableString, y.find('td', {'class': 'col_station'}).contents)), # filter non NaviStrings
245 'info': map(lambda x: x.strip(),
246 filter(lambda z: type(z) == NavigableString, y.find('td', {'class': 'col_info'}).contents)),
247 }, x.find('tbody').findAll('tr')),
253 """returns list of trip details
254 [ [ { 'time': [datetime.time, datetime.time] if time else [],
255 'station': [u'start', u'end'] if station else [],
256 'info': [u'start station' if station else u'details for walking', u'end station' if station else u'walking duration']
257 }, ... # next trip step
258 ], ... # next trip possibility
261 if not self._details:
262 self._details = self._parse_details()
266 def _parse_overview(self):
269 table = self.soup.find('table', {'id': 'tbl_fahrten'})
271 # check if there is an overview table
272 if table and table.findAll('tr'):
274 rows = table.findAll('tr')[1:] # cut off headline
276 overview = map(lambda x: {
277 'timespan': rParser.get_datetime(x),
278 'change': rParser.get_change(x),
279 'price': rParser.get_price(x),
283 raise ParserError('Unable to parse overview')
296 if not self._overview:
298 self._overview = self._parse_overview()
299 except AttributeError:
300 f = open(DEBUGLOG, 'w')
301 f.write(str(self.soup))
304 return self._overview