1 # -*- coding: utf-8 -*-
3 from gotovienna.BeautifulSoup import BeautifulSoup
4 #from urllib2 import urlopen
5 from urllib import quote_plus
6 from UrlOpener import urlopen
7 from datetime import time, datetime, timedelta
10 from errors import LineNotFoundError, StationNotFoundError
12 from cache import Stations
13 from time import sleep
14 from utils import sort_departures
16 from gotovienna import defaults
18 class Departure(dict):
19 def __init__(self, line, station, direction, time, lowfloor):
21 self['station'] = station
22 self['direction'] = direction
24 self['lowfloor'] = lowfloor
26 def __getitem__(self, *args, **kwargs):
27 if args[0] == 'ftime':
28 # string representation of time/minutes
30 elif args[0] == 'deltatime':
32 return self.departure_deltatime
33 elif args[0] == 'atime':
35 return self.departure_time
36 return dict.__getitem__(self, *args, **kwargs)
39 def departure_time(self):
40 """ return time object of departure time
42 if type(self['time']) == time:
45 return (datetime.now() + timedelta(0, self['time']) * 60).time()
48 def departure_deltatime(self):
49 """ return int representing minutes until departure
51 if type(self['time']) == int:
54 raise NotImplementedError()
58 if type(self['time']) == int:
59 return str(self['time'])
60 elif type(self['time']) == time:
61 return self['time'].strftime('%H:%M')
65 self._lines = cache.lines
67 def parse_stations(self, html):
68 bs = BeautifulSoup(html)
69 tables = bs.findAll('table', {'class': 'text_10pix'})
73 dir = tables[i].div.contents[-1].strip()[6:-6]
76 for tr in tables[i].findAll('tr', {'onmouseout': 'obj_unhighlight(this);'}):
78 sta.append((tr.a.text, defaults.line_overview + tr.a['href']))
80 sta.append((tr.text.strip(' '), None))
85 def get_stations(self, name):
86 """ Get station by direction
87 {'Directionname': [('Station name', 'url')]}
89 if not name in self.lines:
95 st = self.parse_stations(urlopen(self.lines[name]).read())
99 def parse_lines(self, html):
100 """ Parse lines from html
102 bs = BeautifulSoup(html)
104 lines = bs.findAll('td', {'class': 'linie'})
110 href = defaults.line_overview + line.a['href']
114 l[line.img['alt']] = href
120 """ Dictionary of Line names with url as value
123 self._lines = self.parse_lines(urlopen(defaults.line_overview).read())
127 def get_url_from_direction(self, line, direction, station):
128 stations = self.get_stations(line)
130 for stationname, url in stations.get(direction, []):
131 if stationname == station:
136 def parse_departures_by_station(self, html):
137 """ Parse departure page
138 precondition: html is correct departure page
139 handle select station page before calling this method
141 bs = BeautifulSoup(html)
145 li = bs.ul.findAll('li')
147 station = bs.strong.text.split(',')[0]
152 if d.find('»') == -1:
155 direction = d.replace('»', '').strip()
156 if direction.startswith('NICHT EINSTEIGEN'):
160 for span in l.findAll('span'):
161 if span.text.isdigit():
163 elif span.text.find(':') >= 0:
164 tim = time(*map(int, span.text.split(':')))
166 print 'Warning: %s' % span.text
169 if span['class'] == 'departureBarrierFree':
174 dep.append(Departure(line, station, direction, tim, lowfloor))
176 except Exception as e:
177 print 'Warning: %s' % e.message
180 except AttributeError:
181 print 'Error while getting station %s' % station
186 def get_departures_by_station(self, station):
187 """ Get list of Departures for one station
190 # TODO 1. Error handling
191 # TODO 2. more error handling
192 # TODO 3. ultimative error handling
194 html = urlopen(defaults.departures_by_station % quote_plus(station.encode('UTF-8'))).read()
196 li = BeautifulSoup(html).ul.findAll('li')
199 # Dirty workaround for ambiguous station
200 html = urlopen(defaults.qando + li[0].a['href']).read()
202 dep = self.parse_departures_by_station(html)
204 self.parse_departures_by_station(html)
207 def parse_departures(self, html):
208 bs = BeautifulSoup(html)
210 # Check for error messages
211 msg = bs.findAll('span', {'class': 'rot fett'})
212 if msg and len(msg) > 0 and unicode(msg[0].text).find(u'technischen St') > 0:
213 print '\n'.join(map(lambda x: x.text.replace(' ', ''), msg))
216 lines = bs.find('form', {'name': 'mainform'}).table.findAll('tr')[1]
218 if len(lines.findAll('td', {'class': 'info'})) > 0:
219 station = lines.span.text.replace(' ', '')
220 line = lines.findAll('span')[-1].text.replace(' ', '')
222 station = lines.td.span.text.replace(' ', '')
223 line = lines.find('td', {'align': 'right'}).span.text.replace(' ', '')
225 result_lines = bs.findAll('table')[-1].findAll('tr')
228 for tr in result_lines[1:]:
229 d = {'station': station}
230 th = tr.findAll('th')
233 #TODO replace with logger
234 print "[DEBUG] Unable to find th in:\n%s" % str(tr)
236 # underground site looks different -.-
239 d['direction'] = th[0].text.replace(' ', '')
243 print th[-1].find('img') and th[-1].img.has_key('alt')
244 d['lowfloor'] = th[-1].find('img') and th[-1].img.has_key('alt')
245 d['line'] = th[0].text.replace(' ', '')
246 d['direction'] = th[1].text.replace(' ', '')
249 tim = t.text.split(' ')
251 # print '[WARNING] Invalid time: %s' % time
252 # TODO: Issue a warning OR convert "HH:MM" format to countdown
257 if tim.find('rze...') >= 0:
260 # if time to next departure in cell convert to int
263 # check if time of next departue in cell
264 t = tim.strip(' ').split(':')
265 if len(t) == 2 and all(map(lambda x: x.isdigit(), t)):
270 #TODO replace with logger
271 print "[DEBUG] Invalid data:\n%s" % time
273 dep.append(Departure(**d))
277 def get_departures(self, url):
278 """ Get list of next departures as Departure object
281 #TODO parse line name and direction for station site parsing
284 # FIXME prevent from calling this method with None
285 print "ERROR empty url"
288 # open url for 90 min timeslot / get departure for next 90 min
290 tries = 2 # try a second time before return empty list
293 html = urlopen(url + "&departureSizeTimeSlot=90").read()
294 dep = self.parse_departures(html)
308 UBAHN, TRAM, BUS, NIGHTLINE, OTHER = range(5)
309 LINE_TYPE_NAMES = ['U-Bahn', 'Strassenbahn', 'Bus', 'Nightline', 'Andere']
311 def get_line_sort_key(name):
312 """Return a sort key for a line name
314 >>> get_line_sort_key('U6')
317 >>> get_line_sort_key('D')
320 >>> get_line_sort_key('59A')
323 txt = ''.join(x for x in name if not x.isdigit())
324 num = ''.join(x for x in name if x.isdigit()) or '0'
326 return (txt, int(num))
328 def get_line_type(name):
329 """Get the type of line for the given name
331 >>> get_line_type('U1')
333 >>> get_line_type('59A')
338 elif name.endswith('A') or name.endswith('B') and name[1].isdigit():
340 elif name.startswith('U'):
342 elif name.startswith('N'):
344 elif name in ('D', 'O', 'VRT', 'WLB'):
349 def categorize_lines(lines):
350 """Return a categorized version of a list of line names
352 >>> categorize_lines(['U4', 'U3', '59A'])
353 [('U-Bahn', ['U3', 'U4']), ('Bus', ['59A'])]
355 categorized_lines = collections.defaultdict(list)
357 for line in sorted(lines):
358 line_type = get_line_type(line)
359 categorized_lines[line_type].append(line)
361 for lines in categorized_lines.values():
362 lines.sort(key=get_line_sort_key)
364 return [(LINE_TYPE_NAMES[key], categorized_lines[key])
365 for key in sorted(categorized_lines)]