combined date and time, renamed to timespan
[pywienerlinien] / gotovienna / routing.py
1 #!/usr/bin/env python
2 # -*- coding: UTF-8 -*-
3
4 from BeautifulSoup import BeautifulSoup, NavigableString
5 from urllib2 import urlopen
6 from urllib import urlencode
7 from datetime import datetime, time, timedelta
8 from textwrap import wrap
9 import argparse
10 import sys
11 import os.path
12 import re
13
14 from gotovienna import defaults
15
16 POSITION_TYPES = ('stop', 'address', 'poi')
17 TIMEFORMAT = '%H:%M'
18 DEBUGLOG = os.path.expanduser('~/gotoVienna.debug')
19
20 class ParserError(Exception):
21
22     def __init__(self, msg='Parser error'):
23         self.message = msg
24
25 class PageType:
26     UNKNOWN, CORRECTION, RESULT = range(3)
27
28
29 def extract_city(station):
30     """ Extract city from string if present,
31     else return default city
32     
33     >>> extract_city('Karlsplatz, Wien')
34     'Wien'
35     """
36     if len(station.split(',')) > 1:
37         return station.split(',')[-1].strip()
38     else:
39         return 'Wien'
40         
41 def extract_station(station):
42     """ Remove city from string
43     
44     >>> extract_station('Karlsplatz, Wien')
45     'Karlsplatz'
46     """
47     if len(station.split(',')) > 1:
48         return station[:station.rindex(',')].strip()
49     else:
50         return station
51     
52 def split_station(station):
53     """ >>> split_station('Karlsplatz, Wien')
54     ('Karlsplatz', 'Wien')
55     >>> split_station('Karlsplatz')
56     ('Karlsplatz', 'Wien')
57     """
58     if len(station.split(',')) > 1:
59         return (station[:station.rindex(',')].strip(), station.split(',')[-1].strip())
60     else:
61         return (station, 'Wien')
62
63 def search(origin_tuple, destination_tuple, dtime=None):
64     """ build route request
65     returns html result (as urllib response)
66     """
67     if not dtime:
68         dtime = datetime.now()
69
70     origin, origin_type = origin_tuple
71     origin, origin_city = split_station(origin)
72     
73     destination, destination_type = destination_tuple
74     destination, destination_city = split_station(destination)
75
76
77     if not origin_type in POSITION_TYPES or\
78         not destination_type in POSITION_TYPES:
79         raise ParserError('Invalid position type')
80
81     post = defaults.search_post
82     post['name_origin'] = origin
83     post['type_origin'] = origin_type
84     post['name_destination'] = destination
85     post['type_destination'] = destination_type
86     post['itdDateDayMonthYear'] = dtime.strftime('%d.%m.%Y')
87     post['itdTime'] = dtime.strftime('%H:%M')
88     post['place_origin'] = origin_city
89     post['place_destination'] = destination_city
90     params = urlencode(post)
91     url = '%s?%s' % (defaults.action, params)
92
93     try:
94         f = open(DEBUGLOG, 'a')
95         f.write(url + '\n')
96         f.close()
97     except:
98         print 'Unable to write to DEBUGLOG: %s' % DEBUGLOG
99
100     return urlopen(url)
101
102
103 class sParser:
104     """ Parser for search response
105     """
106
107     def __init__(self, html):
108         self.soup = BeautifulSoup(html)
109
110     def check_page(self):
111         if self.soup.find('form', {'id': 'form_efaresults'}):
112             return PageType.RESULT
113
114         if self.soup.find('div', {'class':'form_error'}):
115             return PageType.CORRECTION
116
117         return PageType.UNKNOWN
118
119     def get_correction(self):
120         names_origin = self.soup.find('select', {'id': 'nameList_origin'})
121         names_destination = self.soup.find('select', {'id': 'nameList_destination'})
122         places_origin = self.soup.find('select', {'id': 'placeList_origin'})
123         places_destination = self.soup.find('select', {'id': 'placeList_destination'})
124         
125
126         if names_origin or names_destination or places_origin or places_destination:
127             dict = {}
128             
129             if names_origin:
130                 dict['origin'] = map(lambda x: x.text, names_origin.findAll('option'))
131             if names_destination:
132                 dict['destination'] = map(lambda x: x.text, names_destination.findAll('option'))
133                 
134             if places_origin:
135                 dict['place_origin'] = map(lambda x: x.text, names_origin.findAll('option'))
136             if names_destination:
137                 dict['place_destination'] = map(lambda x: x.text, names_destination.findAll('option'))
138     
139             return dict
140         
141         else:
142             raise ParserError('Unable to parse html')
143
144     def get_result(self):
145         return rParser(str(self.soup))
146
147
148
149 class rParser:
150     """ Parser for routing results
151     """
152
153     def __init__(self, html):
154         self.soup = BeautifulSoup(html)
155         self._overview = None
156         self._details = None
157
158     @classmethod
159     def get_tdtext(cls, x, cl):
160             return x.find('td', {'class': cl}).text
161
162     @classmethod
163     def get_change(cls, x):
164         y = rParser.get_tdtext(x, 'col_change')
165         if y:
166             return int(y)
167         else:
168             return 0
169
170     @classmethod
171     def get_price(cls, x):
172         y = rParser.get_tdtext(x, 'col_price')
173         if y == '*':
174             return 0.0
175         if y.find(','):
176             return float(y.replace(',', '.'))
177         else:
178             return 0.0
179
180     @classmethod
181     def get_date(cls, x):
182         y = rParser.get_tdtext(x, 'col_date')
183         if y:
184             return datetime.strptime(y, '%d.%m.%Y').date()
185         else:
186             return None
187
188     @classmethod
189     def get_datetime(cls, x):
190         y = rParser.get_tdtext(x, 'col_time')
191         if y:
192             if (y.find("-") > 0):
193                 # overview mode
194                 times = map(lambda z: time(*map(int, z.split(':'))), y.split('-'))
195                 d = rParser.get_date(x)
196                 from_dtime = datetime.combine(d, times[0])
197                 if times[0] > times[1]:
198                     # dateline crossing
199                     to_dtime = datetime.combine(d + timedelta(1), times[1])
200                 else:
201                     to_dtime = datetime.combine(d, times[1])
202                     
203                 return [from_dtime, to_dtime]
204             
205             else:
206                 dtregex = {'date' : '\d\d\.\d\d',
207                            'time': '\d\d:\d\d'}
208                 
209                 regex = "\s*(?P<date1>{date})?\s*(?P<time1>{time})\s*(?P<date2>{date})?\s*(?P<time2>{time})\s*".format(**dtregex)
210                 ma = re.match(regex, y)
211                 
212                 if not ma:
213                     return []
214                 
215                 gr = ma.groupdict()
216                 
217                 def extract_datetime(gr, n):
218                     if 'date%d' % n in gr and gr['date%d' % n]:
219                         from_dtime = datetime.strptime(str(datetime.today().year) + gr['date%d' % n] + gr['time%d' % n], '%Y%d.%m.%H:%M')
220                     else:
221                         t = datetime.strptime(gr['time%d' % n], '%H:%M').time()
222                         d = datetime.today().date()
223                         return datetime.combine(d, t)
224                 
225                 # detail mode
226                 from_dtime = extract_datetime(gr, 1)
227                 to_dtime = extract_datetime(gr, 2)
228                 
229                 return [from_dtime, to_dtime]
230                 
231         else:
232             return []
233
234     def __iter__(self):
235         for detail in self.details():
236             yield detail
237
238     def _parse_details(self):
239         tours = self.soup.findAll('div', {'class': 'data_table tourdetail'})
240
241         trips = map(lambda x: map(lambda y: {
242                         'timespan': rParser.get_datetime(y),
243                         'station': map(lambda z: z[2:].strip(),
244                                        filter(lambda x: type(x) == NavigableString, y.find('td', {'class': 'col_station'}).contents)), # filter non NaviStrings
245                         'info': map(lambda x: x.strip(),
246                                     filter(lambda z: type(z) == NavigableString, y.find('td', {'class': 'col_info'}).contents)),
247                     }, x.find('tbody').findAll('tr')),
248                     tours) # all routes
249         return trips
250
251     @property
252     def details(self):
253         """returns list of trip details
254         [ [ { 'time': [datetime.time, datetime.time] if time else [],
255               'station': [u'start', u'end'] if station else [],
256               'info': [u'start station' if station else u'details for walking', u'end station' if station else u'walking duration']
257             }, ... # next trip step
258           ], ... # next trip possibility
259         ]
260         """
261         if not self._details:
262             self._details = self._parse_details()
263
264         return self._details
265
266     def _parse_overview(self):
267
268         # get overview table
269         table = self.soup.find('table', {'id': 'tbl_fahrten'})
270
271         # check if there is an overview table
272         if table and table.findAll('tr'):
273             # get rows
274             rows = table.findAll('tr')[1:] # cut off headline
275
276             overview = map(lambda x: {
277                                'timespan': rParser.get_datetime(x),
278                                'change': rParser.get_change(x),
279                                'price': rParser.get_price(x),
280                            },
281                            rows)
282         else:
283             raise ParserError('Unable to parse overview')
284
285         return overview
286
287     @property
288     def overview(self):
289         """dict containing
290         date: datetime
291         time: [time, time]
292         duration: time
293         change: int
294         price: float
295         """
296         if not self._overview:
297             try:
298                 self._overview = self._parse_overview()
299             except AttributeError:
300                 f = open(DEBUGLOG, 'w')
301                 f.write(str(self.soup))
302                 f.close()
303
304         return self._overview
305