fixing lowfloor bug
[pywienerlinien] / gotovienna / realtime.py
1 # -*- coding: utf-8 -*-
2
3 from gotovienna.BeautifulSoup import BeautifulSoup
4 #from urllib2 import urlopen
5 from urllib import quote_plus
6 from UrlOpener import urlopen
7 from datetime import time, datetime, timedelta
8 import re
9 import collections
10 from errors import LineNotFoundError, StationNotFoundError
11 import cache
12 from cache import Stations
13 from time import sleep
14
15 from gotovienna import defaults
16
17 class Departure(dict):
18     def __init__(self, line, station, direction, time, lowfloor):
19         self['line'] = line
20         self['station'] = station
21         self['direction'] = direction
22         self['time'] = time
23         self['lowfloor'] = lowfloor
24
25     def __getitem__(self, *args, **kwargs):
26         if args[0] == 'ftime':
27             return self.ftime
28         elif args[0] == 'deltatime':
29             return self.departure_deltatime
30         elif args[0] == 'atime':
31             return self.departure_time
32         return dict.__getitem__(self, *args, **kwargs)
33
34     @property
35     def departure_time(self):
36         """ return time object of departure time
37         """
38         if type(self['time']) == time:
39             return self['time']
40         else:
41             return (datetime.now() + timedelta(self['time'])).time()
42
43     @property
44     def departure_deltatime(self):
45         """ return int representing minutes until departure
46         """
47         if type(self['time']) == int:
48             return self['time']
49         else:
50             raise NotImplementedError()
51
52     @property
53     def ftime(self):
54         if type(self['time']) == int:
55             return str(self['time'])
56         elif type(self['time']) == time:
57             return self['time'].strftime('%H:%M')
58
59 class ITipParser:
60     def __init__(self):
61         self._lines = cache.lines
62
63     def get_stations(self, name):
64         """ Get station by direction
65         {'Directionname': [('Station name', 'url')]}
66         """
67         if not name in self.lines:
68             return {}
69
70         st = Stations(name)
71
72         if not st:
73             bs = BeautifulSoup(urlopen(self.lines[name]))
74             tables = bs.findAll('table', {'class': 'text_10pix'})
75             for i in range(2):
76                 dir = tables[i].div.contents[-1].strip()[6:-6]
77
78                 sta = []
79                 for tr in tables[i].findAll('tr', {'onmouseout': 'obj_unhighlight(this);'}):
80                     if tr.a:
81                         sta.append((tr.a.text, defaults.line_overview + tr.a['href']))
82                     else:
83                         sta.append((tr.text.strip(' '), None))
84
85                 st[dir] = sta
86
87         return st
88
89     @property
90     def lines(self):
91         """ Dictionary of Line names with url as value
92         """
93         if not self._lines:
94             bs = BeautifulSoup(urlopen(defaults.line_overview))
95             # get tables
96             lines = bs.findAll('td', {'class': 'linie'})
97
98             for line in lines:
99                 if line.a:
100                     href = defaults.line_overview + line.a['href']
101                     if line.text:
102                         self._lines[line.text] = href
103                     elif line.img:
104                         self._lines[line.img['alt']] = href
105
106         return self._lines
107
108     def get_url_from_direction(self, line, direction, station):
109         stations = self.get_stations(line)
110
111         for stationname, url in stations.get(direction, []):
112             if stationname == station:
113                 return url
114
115         return None
116
117     def get_departures_by_station(self, station):
118         """ Get list of Departures for one station
119         """
120
121         # TODO 1. Error handling
122         # TODO 2. more error handling
123         # TODO 3. ultimative error handling
124
125         dep = []
126         bs = BeautifulSoup(urlopen(defaults.departures_by_station % quote_plus(station.encode('UTF-8'))))
127         try:
128             li = bs.ul.findAll('li')
129             if li[0].a:
130                 # Dirty workaround for ambiguous station
131                 bs = BeautifulSoup(urlopen(defaults.qando + li[0].a['href']))
132                 li = bs.ul.findAll('li')
133
134             for l in li:
135                 try:
136                     d = l.div.next
137                     if d.find('»') == -1:
138                         d = d.next.next
139
140                     direction = d.replace('»', '').strip()
141                     if direction.startswith('NICHT EINSTEIGEN'):
142                         continue
143
144                     line = l.img['alt']
145                     for span in l.findAll('span'):
146                         if span.text.isdigit():
147                             tim = int(span.text)
148                         elif span.text.find(':') >= 0:
149                             tim = time(*map(int, span.text.split(':')))
150                         else:
151                             print 'Warning: %s' % span.text
152                             continue
153
154                         if span['class'] == 'departureBarrierFree':
155                             lowfloor = True
156                         else:
157                             lowfloor = False
158
159                         dep.append(Departure(line, station, direction, tim, lowfloor))
160
161                 except:
162                     print 'Warning: %s' % l
163                     continue
164
165         except AttributeError:
166             print 'Error while getting station %s' % station
167             return dep
168
169         return dep
170
171     def get_departures(self, url):
172         """ Get list of next departures as Departure object
173         """
174
175         #TODO parse line name and direction for station site parsing
176
177         if not url:
178             # FIXME prevent from calling this method with None
179             print "ERROR empty url"
180             return []
181
182         # open url for 90 min timeslot / get departure for next 90 min
183         retry = 0
184         tries = 2 # try a second time before return empty list
185         while retry < tries:
186             bs = BeautifulSoup(urlopen(url + "&departureSizeTimeSlot=90"))
187             try:
188                 lines = bs.find('form', {'name': 'mainform'}).table.findAll('tr')[1]
189                 break
190
191             except AttributeError:
192                 print 'FetchError'
193                 msg = bs.findAll('span', {'class': 'rot fett'})
194                 if len(msg) > 0 and str(msg[0].text).find(u'technischen St') > 0:
195                     print 'Temporary problem'
196                     print '\n'.join(map(lambda x: x.text.replace('&nbsp;', ''), msg))
197                     # FIXME Change to error message after fixing qml gui
198                     return []
199                 # FIXME more testing
200                 retry += 1
201                 if retry == tries:
202                     return []
203             sleep(0.5)
204
205         if len(lines.findAll('td', {'class': 'info'})) > 0:
206             station = lines.span.text.replace('&nbsp;', '')
207             line = lines.findAll('span')[-1].text.replace('&nbsp;', '')
208         else:
209             station = lines.td.span.text.replace('&nbsp;', '')
210             line = lines.find('td', {'align': 'right'}).span.text.replace('&nbsp;', '')
211
212         result_lines = bs.findAll('table')[-1].findAll('tr')
213
214         dep = []
215         for tr in result_lines[1:]:
216             d = {'station': station}
217             th = tr.findAll('th')
218             
219             if len(th) < 2:
220                 #TODO replace with logger
221                 print "[DEBUG] Unable to find th in:\n%s" % str(tr)
222             elif len(th) == 2:
223                 # underground site looks different -.-
224                 d['lowfloor'] = True
225                 d['line'] = line
226                 d['direction'] = th[0].text.replace('&nbsp;', '')
227                 t = th[-1]
228             else:
229                 # all other lines
230                 print th[-1].find('img') and th[-1].img.has_key('alt')
231                 d['lowfloor'] = th[-1].find('img') and th[-1].img.has_key('alt')
232                 d['line'] = th[0].text.replace('&nbsp;', '')
233                 d['direction'] = th[1].text.replace('&nbsp;', '')
234                 t = th[-2]
235             # parse time
236             tim = t.text.split(' ')
237             if len(tim) < 2:
238                 # print '[WARNING] Invalid time: %s' % time
239                 # TODO: Issue a warning OR convert "HH:MM" format to countdown
240                 tim = tim[0]
241             else:
242                 tim = tim[1]
243
244             if tim.find('rze...') >= 0:
245                     d['time'] = 0
246             elif tim.isdigit():
247                 # if time to next departure in cell convert to int
248                 d['time'] = int(tim)
249             else:
250                 # check if time of next departue in cell
251                 t = tim.strip('&nbsp;').split(':')
252                 if len(t) == 2 and all(map(lambda x: x.isdigit(), t)):
253                     t = map(int, t)
254                     d['time'] = time(*t)
255                 else:
256                     # Unexpected content
257                     #TODO replace with logger
258                     print "[DEBUG] Invalid data:\n%s" % time
259
260             dep.append(Departure(**d))
261
262         return dep
263
264
265 UBAHN, TRAM, BUS, NIGHTLINE, OTHER = range(5)
266 LINE_TYPE_NAMES = ['U-Bahn', 'Strassenbahn', 'Bus', 'Nightline', 'Andere']
267
268 def get_line_sort_key(name):
269     """Return a sort key for a line name
270
271     >>> get_line_sort_key('U6')
272     ('U', 6)
273
274     >>> get_line_sort_key('D')
275     ('D', 0)
276
277     >>> get_line_sort_key('59A')
278     ('A', 59)
279     """
280     txt = ''.join(x for x in name if not x.isdigit())
281     num = ''.join(x for x in name if x.isdigit()) or '0'
282
283     return (txt, int(num))
284
285 def get_line_type(name):
286     """Get the type of line for the given name
287
288     >>> get_line_type('U1')
289     UBAHN
290     >>> get_line_type('59A')
291     BUS
292     """
293     if name.isdigit():
294         return TRAM
295     elif name.endswith('A') or name.endswith('B') and name[1].isdigit():
296         return BUS
297     elif name.startswith('U'):
298         return UBAHN
299     elif name.startswith('N'):
300         return NIGHTLINE
301     elif name in ('D', 'O', 'VRT', 'WLB'):
302         return TRAM
303
304     return OTHER
305
306 def categorize_lines(lines):
307     """Return a categorized version of a list of line names
308
309     >>> categorize_lines(['U4', 'U3', '59A'])
310     [('U-Bahn', ['U3', 'U4']), ('Bus', ['59A'])]
311     """
312     categorized_lines = collections.defaultdict(list)
313
314     for line in sorted(lines):
315         line_type = get_line_type(line)
316         categorized_lines[line_type].append(line)
317
318     for lines in categorized_lines.values():
319         lines.sort(key=get_line_sort_key)
320
321     return [(LINE_TYPE_NAMES[key], categorized_lines[key])
322             for key in sorted(categorized_lines)]