fixing routing.py bug, if time is 24:00
[pywienerlinien] / gotovienna / routing.py
1 #!/usr/bin/env python
2 # -*- coding: UTF-8 -*-
3
4 from gotovienna.BeautifulSoup import BeautifulSoup, NavigableString
5 #from urllib2 import urlopen
6 from UrlOpener import urlopen
7 from urllib import urlencode
8 from datetime import datetime, time, timedelta
9 from textwrap import wrap
10 import sys
11 import os.path
12 import re
13
14 from gotovienna import defaults
15
16 POSITION_TYPES = ('stop', 'address', 'poi')
17 TIMEFORMAT = '%H:%M'
18 DEBUGLOG = os.path.expanduser('~/gotoVienna.debug')
19
20 class ParserError(Exception):
21
22     def __init__(self, msg='Parser error'):
23         self.message = msg
24
25 class PageType:
26     UNKNOWN, CORRECTION, RESULT = range(3)
27
28
29 def extract_city(station):
30     """ Extract city from string if present,
31     else return default city
32     
33     >>> extract_city('Karlsplatz, Wien')
34     'Wien'
35     """
36     if len(station.split(',')) > 1:
37         return station.split(',')[-1].strip()
38     else:
39         return 'Wien'
40
41 def extract_station(station):
42     """ Remove city from string
43     
44     >>> extract_station('Karlsplatz, Wien')
45     'Karlsplatz'
46     """
47     if len(station.split(',')) > 1:
48         return station[:station.rindex(',')].strip()
49     else:
50         return station
51
52 def split_station(station):
53     """ >>> split_station('Karlsplatz, Wien')
54     ('Karlsplatz', 'Wien')
55     >>> split_station('Karlsplatz')
56     ('Karlsplatz', 'Wien')
57     """
58     if len(station.split(',')) > 1:
59         return (station[:station.rindex(',')].strip(), station.split(',')[-1].strip())
60     else:
61         return (station, 'Wien')
62
63 def guess_location_type(location):
64     """Guess type (stop, address, poi) of a location
65
66     >>> guess_location_type('pilgramgasse')
67     'stop'
68
69     >>> guess_location_type('karlsplatz 14')
70     'address'
71
72     >>> guess_location_type('reumannplatz 12/34')
73     'address'
74     """
75     parts = location.split()
76     first_part = parts[0]
77     last_part = parts[-1]
78
79     # Assume all single-word locations are stops
80     if len(parts) == 1:
81         return 'stop'
82
83     # If the last part is numeric, assume address
84     if last_part.isdigit() and len(parts) > 1:
85         return 'address'
86
87     # Addresses with door number (e.g. "12/34")
88     if all(x.isdigit() or x == '/' for x in last_part):
89         return 'address'
90
91     # Sane default - assume it's a stop/station name
92     return 'stop'
93
94 def search(origin_tuple, destination_tuple, dtime=None):
95     """ build route request
96     returns html result (as urllib response)
97     """
98     if not dtime:
99         dtime = datetime.now()
100
101     origin, origin_type = origin_tuple
102     origin, origin_city = split_station(origin)
103
104     destination, destination_type = destination_tuple
105     destination, destination_city = split_station(destination)
106
107
108     if origin_type is None:
109         origin_type = guess_location_type(origin)
110         print 'Guessed origin type:', origin_type
111
112     if destination_type is None:
113         destination_type = guess_location_type(destination)
114         print 'Guessed destination type:', destination_type
115
116     if (origin_type not in POSITION_TYPES or
117             destination_type not in POSITION_TYPES):
118         raise ParserError('Invalid position type')
119
120     post = defaults.search_post
121     post['name_origin'] = origin
122     post['type_origin'] = origin_type
123     post['name_destination'] = destination
124     post['type_destination'] = destination_type
125     post['itdDateDayMonthYear'] = dtime.strftime('%d.%m.%Y')
126     post['itdTime'] = dtime.strftime('%H:%M')
127     post['place_origin'] = origin_city
128     post['place_destination'] = destination_city
129     params = urlencode(post)
130     url = '%s?%s' % (defaults.action, params)
131     #print url
132
133     return urlopen(url)
134
135
136 class sParser:
137     """ Parser for search response
138     """
139
140     def __init__(self, html):
141         self.soup = BeautifulSoup(html)
142
143     def check_page(self):
144         if self.soup.find('form', {'id': 'form_efaresults'}):
145             return PageType.RESULT
146
147         if self.soup.find('div', {'class':'form_error'}):
148             return PageType.CORRECTION
149
150         return PageType.UNKNOWN
151
152     state = property(check_page)
153
154     def get_correction(self):
155         names_origin = self.soup.find('select', {'id': 'nameList_origin'})
156         names_destination = self.soup.find('select', {'id': 'nameList_destination'})
157         places_origin = self.soup.find('select', {'id': 'placeList_origin'})
158         places_destination = self.soup.find('select', {'id': 'placeList_destination'})
159
160
161         if any([names_origin, names_destination, places_origin, places_destination]):
162             dict = {}
163
164             if names_origin:
165                 dict['origin'] = map(lambda x: x.text,
166                                      names_origin.findAll('option'))
167             if names_destination:
168                 dict['destination'] = map(lambda x: x.text,
169                                           names_destination.findAll('option'))
170
171             if places_origin:
172                 dict['place_origin'] = map(lambda x: x.text,
173                                            names_origin.findAll('option'))
174             if names_destination:
175                 dict['place_destination'] = map(lambda x: x.text,
176                                                 names_destination.findAll('option'))
177
178             return dict
179
180         else:
181             raise ParserError('Unable to parse html')
182
183     def get_result(self):
184         return rParser(str(self.soup))
185
186
187
188 class rParser:
189     """ Parser for routing results
190     """
191
192     def __init__(self, html):
193         self.soup = BeautifulSoup(html)
194         self._overview = None
195         self._details = None
196
197     @classmethod
198     def get_tdtext(cls, x, cl):
199             return x.find('td', {'class': cl}).text
200
201     @classmethod
202     def get_change(cls, x):
203         y = rParser.get_tdtext(x, 'col_change')
204         if y:
205             return int(y)
206         else:
207             return 0
208
209     @classmethod
210     def get_price(cls, x):
211         y = rParser.get_tdtext(x, 'col_price')
212         if y == '*':
213             return 0.0
214         if y.find(','):
215             return float(y.replace(',', '.'))
216         else:
217             return 0.0
218
219     @classmethod
220     def get_date(cls, x):
221         y = rParser.get_tdtext(x, 'col_date')
222         if y:
223             return datetime.strptime(y, '%d.%m.%Y').date()
224         else:
225             return None
226
227     @classmethod
228     def get_datetime(cls, x):
229         y = rParser.get_tdtext(x, 'col_time')
230         if y:
231             if (y.find("-") > 0):
232                 # overview mode
233                 times = map(lambda z: time(*map(int, z.split(':'))), y.split('-'))
234                 d = rParser.get_date(x)
235                 from_dtime = datetime.combine(d, times[0])
236                 if times[0] > times[1]:
237                     # dateline crossing
238                     to_dtime = datetime.combine(d + timedelta(1), times[1])
239                 else:
240                     to_dtime = datetime.combine(d, times[1])
241
242                 return [from_dtime, to_dtime]
243
244             else:
245                 dtregex = {'date' : '\d\d\.\d\d',
246                            'time': '\d\d:\d\d'}
247
248                 regex = "\s*(?P<date1>{date})?\s*(?P<time1>{time})\s*(?P<date2>{date})?\s*(?P<time2>{time})\s*".format(**dtregex)
249                 ma = re.match(regex, y)
250
251                 if not ma:
252                     return []
253
254                 gr = ma.groupdict()
255
256                 def extract_datetime(gr, n):
257                     if 'date%d' % n in gr and gr['date%d' % n]:
258                         if gr['time%d' % n] == '24:00':
259                             gr['time%d' % n] = '0:00'
260                         from_dtime = datetime.strptime(str(datetime.today().year) + gr['date%d' % n] + gr['time%d' % n], '%Y%d.%m.%H:%M')
261                     else:
262                         d = datetime.today().date()
263                         # Strange times possible at wienerlinien
264                         if gr['time%d' % n] == '24:00':
265                             gr['time%d' % n] = '0:00'
266                             d += timedelta(days=1)
267                         t = datetime.strptime(gr['time%d' % n], '%H:%M').time()
268                         
269                         return datetime.combine(d, t)
270
271                 # detail mode
272                 from_dtime = extract_datetime(gr, 1)
273                 to_dtime = extract_datetime(gr, 2)
274
275                 return [from_dtime, to_dtime]
276
277         else:
278             return []
279
280     def __iter__(self):
281         for detail in self.details():
282             yield detail
283
284     def _parse_details(self):
285         tours = self.soup.findAll('div', {'class': 'data_table tourdetail'})
286
287         trips = map(lambda x: map(lambda y: {
288                         'timespan': rParser.get_datetime(y),
289                         'station': map(lambda z: z[2:].strip(),
290                                        filter(lambda x: type(x) == NavigableString, y.find('td', {'class': 'col_station'}).contents)), # filter non NaviStrings
291                         'info': map(lambda x: x.strip(),
292                                     filter(lambda z: type(z) == NavigableString, y.find('td', {'class': 'col_info'}).contents)),
293                     }, x.find('tbody').findAll('tr')),
294                     tours) # all routes
295         return trips
296
297     @property
298     def details(self):
299         """returns list of trip details
300         [ [ { 'time': [datetime.time, datetime.time] if time else [],
301               'station': [u'start', u'end'] if station else [],
302               'info': [u'start station' if station else u'details for walking', u'end station' if station else u'walking duration']
303             }, ... # next trip step
304           ], ... # next trip possibility
305         ]
306         """
307         if not self._details:
308             self._details = self._parse_details()
309
310         return self._details
311
312     def _parse_overview(self):
313
314         # get overview table
315         table = self.soup.find('table', {'id': 'tbl_fahrten'})
316
317         # check if there is an overview table
318         if table and table.findAll('tr'):
319             # get rows
320             rows = table.findAll('tr')[1:] # cut off headline
321
322             overview = map(lambda x: {
323                                'timespan': rParser.get_datetime(x),
324                                'change': rParser.get_change(x),
325                                'price': rParser.get_price(x),
326                            },
327                            rows)
328         else:
329             raise ParserError('Unable to parse overview')
330
331         return overview
332
333     @property
334     def overview(self):
335         """dict containing
336         date: datetime
337         time: [time, time]
338         duration: time
339         change: int
340         price: float
341         """
342         if not self._overview:
343             try:
344                 self._overview = self._parse_overview()
345             except AttributeError:
346                 f = open(DEBUGLOG, 'w')
347                 f.write(str(self.soup))
348                 f.close()
349
350         return self._overview
351