1 # -*- coding: utf-8 -*-
3 from gotovienna.BeautifulSoup import BeautifulSoup
4 #from urllib2 import urlopen
5 from urllib import quote_plus
6 # Use urlopen proxy for fake user agent
7 from UrlOpener import urlopen
8 from datetime import time, datetime, timedelta
9 import datetime as date
12 from errors import LineNotFoundError, StationNotFoundError
14 from cache import Stations
15 from time import sleep
16 from utils import sort_departures
18 from gotovienna import defaults
20 class Departure(dict):
21 def __init__(self, line, station, direction, time, lowfloor):
23 self['station'] = station
24 self['direction'] = direction
26 if type(time) == date.time:
27 time = make_datetime(now, time)
28 if type(time) == datetime:
29 # FIXME convert in ModelList
30 self['realtime'] = False
31 self['time'] = (time - now).seconds/60
32 self['departure'] = time
33 elif type(time) == int:
34 # FIXME convert in ModelList
35 self['realtime'] = True
37 self['departure'] = now + timedelta(minutes=self['time'])
39 raise ValueError('Wrong type: time')
41 # FIXME convert in ModelList
42 self['ftime'] = str(self['time'])
43 self['lowfloor'] = lowfloor
47 self._lines = cache.lines
49 def parse_stations(self, html):
50 bs = BeautifulSoup(html)
51 tables = bs.findAll('table', {'class': 'text_10pix'})
55 dir = tables[i].div.contents[-1].strip()[6:-6]
58 for tr in tables[i].findAll('tr', {'onmouseout': 'obj_unhighlight(this);'}):
60 sta.append((tr.a.text, defaults.line_overview + tr.a['href']))
62 sta.append((tr.text.strip(' '), None))
67 def get_stations(self, name):
68 """ Get station by direction
69 {'Directionname': [('Station name', 'url')]}
71 if not name in self.lines:
77 st = self.parse_stations(urlopen(self.lines[name]).read())
81 def parse_lines(self, html):
82 """ Parse lines from html
84 bs = BeautifulSoup(html)
86 lines = bs.findAll('td', {'class': 'linie'})
92 href = defaults.line_overview + line.a['href']
96 l[line.img['alt']] = href
102 """ Dictionary of Line names with url as value
105 self._lines = self.parse_lines(urlopen(defaults.line_overview).read())
109 def get_url_from_direction(self, line, direction, station):
110 stations = self.get_stations(line)
112 for stationname, url in stations.get(direction, []):
113 if stationname == station:
118 def parse_departures_by_station(self, html):
119 """ Parse departure page
120 precondition: html is correct departure page
121 handle select station page before calling this method
123 bs = BeautifulSoup(html)
127 li = bs.ul.findAll('li')
129 station = bs.strong.text.split(',')[0]
134 if d.find('»') == -1:
137 direction = d.replace('»', '').strip()
138 if direction.startswith('NICHT EINSTEIGEN'):
142 for span in l.findAll('span'):
143 if span.text.isdigit():
145 elif span.text.find(':') >= 0:
146 tim = time(*map(int, span.text.split(':')))
148 print 'Warning: %s' % span.text
151 if span['class'] == 'departureBarrierFree':
156 dep.append(Departure(line, station, direction, tim, lowfloor))
158 except Exception as e:
159 print 'Warning: %s' % e.message
162 except AttributeError:
163 print 'Error while getting station %s' % station
168 def get_departures_by_station(self, station):
169 """ Get list of Departures for one station
172 # TODO 1. Error handling
173 # TODO 2. more error handling
174 # TODO 3. ultimative error handling
176 html = urlopen(defaults.departures_by_station % quote_plus(station.encode('UTF-8'))).read()
178 li = BeautifulSoup(html).ul.findAll('li')
181 # Dirty workaround for ambiguous station
182 html = urlopen(defaults.qando + li[0].a['href']).read()
184 dep = self.parse_departures_by_station(html)
186 self.parse_departures_by_station(html)
189 def parse_departures(self, html):
190 bs = BeautifulSoup(html)
192 # Check for error messages
193 msg = bs.findAll('span', {'class': 'rot fett'})
194 if msg and len(msg) > 0 and unicode(msg[0].text).find(u'technischen St') > 0:
195 print '\n'.join(map(lambda x: x.text.replace(' ', ''), msg))
198 mainform = bs.find('form', {'name': 'mainform'})
202 lines = mainform.table.findAll('tr')[1]
204 if len(lines.findAll('td', {'class': 'info'})) > 0:
205 station = lines.span.text.replace(' ', '')
206 line = lines.findAll('span')[-1].text.replace(' ', '')
208 station = lines.td.span.text.replace(' ', '')
209 line = lines.find('td', {'align': 'right'}).span.text.replace(' ', '')
211 result_lines = bs.findAll('table')[-1].findAll('tr')
214 for tr in result_lines[1:]:
215 d = {'station': station}
216 th = tr.findAll('th')
219 #TODO replace with logger
220 print "[DEBUG] Unable to find th in:\n%s" % str(tr)
222 # underground site looks different -.-
225 d['direction'] = th[0].text.replace(' ', '')
229 d['lowfloor'] = th[-1].find('img') and th[-1].img.has_key('alt')
230 d['line'] = th[0].text.replace(' ', '')
231 d['direction'] = th[1].text.replace(' ', '')
234 tim = t.text.split(' ')
236 # print '[WARNING] Invalid time: %s' % time
237 # TODO: Issue a warning OR convert "HH:MM" format to countdown
242 if tim.find('rze...') >= 0:
245 # if time to next departure in cell convert to int
248 # check if time of next departue in cell
249 t = tim.strip(' ').split(':')
250 if len(t) == 2 and all(map(lambda x: x.isdigit(), t)):
252 d['time'] = make_datetime(datetime.now(), time(*t))
255 #TODO replace with logger
256 print "[DEBUG] Invalid data:\n%s" % time
258 dep.append(Departure(**d))
262 def get_departures(self, url):
263 """ Get list of next departures as Departure objects
266 #TODO parse line name and direction for station site parsing
269 # FIXME prevent from calling this method with None
270 print "ERROR empty url"
273 # open url for 90 min timeslot / get departure for next 90 min
275 tries = 2 # try a second time before return empty list
278 html = urlopen(url + "&departureSizeTimeSlot=90").read()
279 dep = self.parse_departures(html)
290 def get_departures_test(self, line, station):
291 """ replacement for get_departure
292 hide url in higher levels :)
294 raise NotImplementedError
297 UBAHN, TRAM, BUS, NIGHTLINE, OTHER = range(5)
298 LINE_TYPE_NAMES = ['U-Bahn', 'Strassenbahn', 'Bus', 'Nightline', 'Andere']
300 def get_line_sort_key(name):
301 """Return a sort key for a line name
303 >>> get_line_sort_key('U6')
306 >>> get_line_sort_key('D')
309 >>> get_line_sort_key('59A')
312 txt = ''.join(x for x in name if not x.isdigit())
313 num = ''.join(x for x in name if x.isdigit()) or '0'
315 return (txt, int(num))
317 def get_line_type(name):
318 """Get the type of line for the given name
320 >>> get_line_type('U1')
322 >>> get_line_type('59A')
327 elif name.endswith('A') or name.endswith('B') and name[1].isdigit():
329 elif name.startswith('U'):
331 elif name.startswith('N'):
333 elif name in ('D', 'O', 'VRT', 'WLB'):
338 def categorize_lines(lines):
339 """Return a categorized version of a list of line names
341 >>> categorize_lines(['U4', 'U3', '59A'])
342 [('U-Bahn', ['U3', 'U4']), ('Bus', ['59A'])]
344 categorized_lines = collections.defaultdict(list)
346 for line in sorted(lines):
347 line_type = get_line_type(line)
348 categorized_lines[line_type].append(line)
350 for lines in categorized_lines.values():
351 lines.sort(key=get_line_sort_key)
353 return [(LINE_TYPE_NAMES[key], categorized_lines[key])
354 for key in sorted(categorized_lines)]
356 def make_datetime(date, time):
357 """ Ugly workaround, immutable datetime ftw -.-
360 if date.hour > time.hour:
361 date = date + timedelta(1)
362 return datetime(year=date.year,