fixed Departure.departure_time() timedelta bug
[pywienerlinien] / gotovienna / realtime.py
1 # -*- coding: utf-8 -*-
2
3 from gotovienna.BeautifulSoup import BeautifulSoup
4 #from urllib2 import urlopen
5 from urllib import quote_plus
6 from UrlOpener import urlopen
7 from datetime import time, datetime, timedelta
8 import re
9 import collections
10 from errors import LineNotFoundError, StationNotFoundError
11 import cache
12 from cache import Stations
13 from time import sleep
14 from utils import sort_departures
15
16 from gotovienna import defaults
17
18 class Departure(dict):
19     def __init__(self, line, station, direction, time, lowfloor):
20         self['line'] = line
21         self['station'] = station
22         self['direction'] = direction
23         self['time'] = time
24         self['lowfloor'] = lowfloor
25
26     def __getitem__(self, *args, **kwargs):
27         if args[0] == 'ftime':
28             # string representation of time/minutes
29             return self.ftime
30         elif args[0] == 'deltatime':
31             # minutes
32             return self.departure_deltatime
33         elif args[0] == 'atime':
34             # time object
35             return self.departure_time
36         return dict.__getitem__(self, *args, **kwargs)
37
38     @property
39     def departure_time(self):
40         """ return time object of departure time
41         """
42         if type(self['time']) == time:
43             return self['time']
44         else:
45             return (datetime.now() + timedelta(0, self['time']) * 60).time()
46
47     @property
48     def departure_deltatime(self):
49         """ return int representing minutes until departure
50         """
51         if type(self['time']) == int:
52             return self['time']
53         else:
54             raise NotImplementedError()
55
56     @property
57     def ftime(self):
58         if type(self['time']) == int:
59             return str(self['time'])
60         elif type(self['time']) == time:
61             return self['time'].strftime('%H:%M')
62
63 class ITipParser:
64     def __init__(self):
65         self._lines = cache.lines
66
67     def get_stations(self, name):
68         """ Get station by direction
69         {'Directionname': [('Station name', 'url')]}
70         """
71         if not name in self.lines:
72             return {}
73
74         st = Stations(name)
75
76         if not st:
77             bs = BeautifulSoup(urlopen(self.lines[name]))
78             tables = bs.findAll('table', {'class': 'text_10pix'})
79             for i in range(2):
80                 dir = tables[i].div.contents[-1].strip()[6:-6]
81
82                 sta = []
83                 for tr in tables[i].findAll('tr', {'onmouseout': 'obj_unhighlight(this);'}):
84                     if tr.a:
85                         sta.append((tr.a.text, defaults.line_overview + tr.a['href']))
86                     else:
87                         sta.append((tr.text.strip(' '), None))
88
89                 st[dir] = sta
90
91         return st
92
93     @property
94     def lines(self):
95         """ Dictionary of Line names with url as value
96         """
97         if not self._lines:
98             bs = BeautifulSoup(urlopen(defaults.line_overview))
99             # get tables
100             lines = bs.findAll('td', {'class': 'linie'})
101
102             for line in lines:
103                 if line.a:
104                     href = defaults.line_overview + line.a['href']
105                     if line.text:
106                         self._lines[line.text] = href
107                     elif line.img:
108                         self._lines[line.img['alt']] = href
109
110         return self._lines
111
112     def get_url_from_direction(self, line, direction, station):
113         stations = self.get_stations(line)
114
115         for stationname, url in stations.get(direction, []):
116             if stationname == station:
117                 return url
118
119         return None
120
121     def get_departures_by_station(self, station):
122         """ Get list of Departures for one station
123         """
124
125         # TODO 1. Error handling
126         # TODO 2. more error handling
127         # TODO 3. ultimative error handling
128
129         dep = []
130         bs = BeautifulSoup(urlopen(defaults.departures_by_station % quote_plus(station.encode('UTF-8'))))
131         try:
132             li = bs.ul.findAll('li')
133             if li[0].a:
134                 # Dirty workaround for ambiguous station
135                 bs = BeautifulSoup(urlopen(defaults.qando + li[0].a['href']))
136                 li = bs.ul.findAll('li')
137
138             for l in li:
139                 try:
140                     d = l.div.next
141                     if d.find('»') == -1:
142                         d = d.next.next
143
144                     direction = d.replace('»', '').strip()
145                     if direction.startswith('NICHT EINSTEIGEN'):
146                         continue
147
148                     line = l.img['alt']
149                     for span in l.findAll('span'):
150                         if span.text.isdigit():
151                             tim = int(span.text)
152                         elif span.text.find(':') >= 0:
153                             tim = time(*map(int, span.text.split(':')))
154                         else:
155                             print 'Warning: %s' % span.text
156                             continue
157
158                         if span['class'] == 'departureBarrierFree':
159                             lowfloor = True
160                         else:
161                             lowfloor = False
162
163                         dep.append(Departure(line, station, direction, tim, lowfloor))
164
165                 except:
166                     print 'Warning: %s' % l
167                     continue
168
169         except AttributeError:
170             print 'Error while getting station %s' % station
171
172         finally:
173             return dep
174
175     def get_departures(self, url):
176         """ Get list of next departures as Departure object
177         """
178
179         #TODO parse line name and direction for station site parsing
180
181         if not url:
182             # FIXME prevent from calling this method with None
183             print "ERROR empty url"
184             return []
185
186         # open url for 90 min timeslot / get departure for next 90 min
187         retry = 0
188         tries = 2 # try a second time before return empty list
189         while retry < tries:
190             bs = BeautifulSoup(urlopen(url + "&departureSizeTimeSlot=90"))
191             try:
192                 lines = bs.find('form', {'name': 'mainform'}).table.findAll('tr')[1]
193                 break
194
195             except AttributeError:
196                 print 'FetchError'
197                 msg = bs.findAll('span', {'class': 'rot fett'})
198                 if len(msg) > 0 and str(msg[0].text).find(u'technischen St') > 0:
199                     print 'Temporary problem'
200                     print '\n'.join(map(lambda x: x.text.replace('&nbsp;', ''), msg))
201                     # FIXME Change to error message after fixing qml gui
202                     return []
203                 # FIXME more testing
204                 retry += 1
205                 if retry == tries:
206                     return []
207             sleep(0.5)
208
209         if len(lines.findAll('td', {'class': 'info'})) > 0:
210             station = lines.span.text.replace('&nbsp;', '')
211             line = lines.findAll('span')[-1].text.replace('&nbsp;', '')
212         else:
213             station = lines.td.span.text.replace('&nbsp;', '')
214             line = lines.find('td', {'align': 'right'}).span.text.replace('&nbsp;', '')
215
216         result_lines = bs.findAll('table')[-1].findAll('tr')
217
218         dep = []
219         for tr in result_lines[1:]:
220             d = {'station': station}
221             th = tr.findAll('th')
222
223             if len(th) < 2:
224                 #TODO replace with logger
225                 print "[DEBUG] Unable to find th in:\n%s" % str(tr)
226             elif len(th) == 2:
227                 # underground site looks different -.-
228                 d['lowfloor'] = True
229                 d['line'] = line
230                 d['direction'] = th[0].text.replace('&nbsp;', '')
231                 t = th[-1]
232             else:
233                 # all other lines
234                 print th[-1].find('img') and th[-1].img.has_key('alt')
235                 d['lowfloor'] = th[-1].find('img') and th[-1].img.has_key('alt')
236                 d['line'] = th[0].text.replace('&nbsp;', '')
237                 d['direction'] = th[1].text.replace('&nbsp;', '')
238                 t = th[-2]
239             # parse time
240             tim = t.text.split(' ')
241             if len(tim) < 2:
242                 # print '[WARNING] Invalid time: %s' % time
243                 # TODO: Issue a warning OR convert "HH:MM" format to countdown
244                 tim = tim[0]
245             else:
246                 tim = tim[1]
247
248             if tim.find('rze...') >= 0:
249                     d['time'] = 0
250             elif tim.isdigit():
251                 # if time to next departure in cell convert to int
252                 d['time'] = int(tim)
253             else:
254                 # check if time of next departue in cell
255                 t = tim.strip('&nbsp;').split(':')
256                 if len(t) == 2 and all(map(lambda x: x.isdigit(), t)):
257                     t = map(int, t)
258                     d['time'] = time(*t)
259                 else:
260                     # Unexpected content
261                     #TODO replace with logger
262                     print "[DEBUG] Invalid data:\n%s" % time
263
264             dep.append(Departure(**d))
265
266         return dep
267
268
269 UBAHN, TRAM, BUS, NIGHTLINE, OTHER = range(5)
270 LINE_TYPE_NAMES = ['U-Bahn', 'Strassenbahn', 'Bus', 'Nightline', 'Andere']
271
272 def get_line_sort_key(name):
273     """Return a sort key for a line name
274
275     >>> get_line_sort_key('U6')
276     ('U', 6)
277
278     >>> get_line_sort_key('D')
279     ('D', 0)
280
281     >>> get_line_sort_key('59A')
282     ('A', 59)
283     """
284     txt = ''.join(x for x in name if not x.isdigit())
285     num = ''.join(x for x in name if x.isdigit()) or '0'
286
287     return (txt, int(num))
288
289 def get_line_type(name):
290     """Get the type of line for the given name
291
292     >>> get_line_type('U1')
293     UBAHN
294     >>> get_line_type('59A')
295     BUS
296     """
297     if name.isdigit():
298         return TRAM
299     elif name.endswith('A') or name.endswith('B') and name[1].isdigit():
300         return BUS
301     elif name.startswith('U'):
302         return UBAHN
303     elif name.startswith('N'):
304         return NIGHTLINE
305     elif name in ('D', 'O', 'VRT', 'WLB'):
306         return TRAM
307
308     return OTHER
309
310 def categorize_lines(lines):
311     """Return a categorized version of a list of line names
312
313     >>> categorize_lines(['U4', 'U3', '59A'])
314     [('U-Bahn', ['U3', 'U4']), ('Bus', ['59A'])]
315     """
316     categorized_lines = collections.defaultdict(list)
317
318     for line in sorted(lines):
319         line_type = get_line_type(line)
320         categorized_lines[line_type].append(line)
321
322     for lines in categorized_lines.values():
323         lines.sort(key=get_line_sort_key)
324
325     return [(LINE_TYPE_NAMES[key], categorized_lines[key])
326             for key in sorted(categorized_lines)]