vcs.maemo.org Git - pywienerlinien/blob - gotovienna/realtime.py

   1 # -*- coding: utf-8 -*-
   2
   3 from gotovienna.BeautifulSoup import BeautifulSoup
   4 #from urllib2 import urlopen
   5 from urllib import quote_plus
   6 # Use urlopen proxy for fake user agent
   7 from UrlOpener import urlopen
   8 from datetime import time, datetime, timedelta
   9 import datetime as date
  10 import re
  11 import collections
  12 from errors import LineNotFoundError, StationNotFoundError
  13 import cache
  14 from cache import Stations
  15 from time import sleep
  16 from utils import sort_departures
  17
  18 from gotovienna import defaults
  19
  20 class Departure(dict):
  21     def __init__(self, line, station, direction, time, lowfloor):
  22         self['line'] = line
  23         self['station'] = station
  24         self['direction'] = direction
  25         now = datetime.now()
  26         if type(time) == date.time:
  27             time = make_datetime(now, time)
  28         if type(time) == datetime:
  29             # FIXME convert in ModelList
  30             self['realtime'] = False
  31             self['time'] = (time - now).seconds/60
  32             self['departure'] = time
  33         elif type(time) == int:
  34             # FIXME convert in ModelList
  35             self['realtime'] = True
  36             self['time'] = time
  37             self['departure'] = now + timedelta(minutes=self['time'])
  38         else:
  39             raise ValueError('Wrong type: time')
  40
  41         # FIXME convert in ModelList
  42         self['ftime'] = str(self['time'])
  43         self['lowfloor'] = lowfloor
  44
  45 class ITipParser:
  46     def __init__(self):
  47         self._lines = cache.lines
  48
  49     def parse_stations(self, html):
  50         bs = BeautifulSoup(html)
  51         tables = bs.findAll('table', {'class': 'text_10pix'})
  52         st = {}
  53
  54         for i in range(2):
  55             dir = tables[i].div.contents[-1].strip()[6:-6]
  56
  57             sta = []
  58             for tr in tables[i].findAll('tr', {'onmouseout': 'obj_unhighlight(this);'}):
  59                 if tr.a:
  60                     sta.append((tr.a.text, defaults.line_overview + tr.a['href']))
  61                 else:
  62                     sta.append((tr.text.strip('&nbsp;'), None))
  63
  64             st[dir] = sta
  65         return st
  66
  67     def get_stations(self, name):
  68         """ Get station by direction
  69         {'Directionname': [('Station name', 'url')]}
  70         """
  71         if not name in self.lines:
  72             return {}
  73
  74         st = Stations(name)
  75
  76         if not st:
  77             st = self.parse_stations(urlopen(self.lines[name]).read())
  78
  79         return st
  80
  81     def parse_lines(self, html):
  82         """ Parse lines from html
  83         """
  84         bs = BeautifulSoup(html)
  85         # get tables
  86         lines = bs.findAll('td', {'class': 'linie'})
  87
  88         l = {}
  89
  90         for line in lines:
  91             if line.a:
  92                 href = defaults.line_overview + line.a['href']
  93                 if line.text:
  94                     l[line.text] = href
  95                 elif line.img:
  96                     l[line.img['alt']] = href
  97
  98         return l
  99
 100     @property
 101     def lines(self):
 102         """ Dictionary of Line names with url as value
 103         """
 104         if not self._lines:
 105             self._lines = self.parse_lines(urlopen(defaults.line_overview).read())
 106
 107         return self._lines
 108
 109     def get_url_from_direction(self, line, direction, station):
 110         stations = self.get_stations(line)
 111
 112         for stationname, url in stations.get(direction, []):
 113             if stationname == station:
 114                 return url
 115
 116         return None
 117
 118     def parse_departures_by_station(self, html):
 119         """ Parse departure page
 120         precondition: html is correct departure page
 121         handle select station page before calling this method
 122         """
 123         bs = BeautifulSoup(html)
 124         dep = []
 125
 126         try:
 127             li = bs.ul.findAll('li')
 128
 129             station = bs.strong.text.split(',')[0]
 130
 131             for l in li:
 132                 try:
 133                     d = l.div.next
 134                     if d.find('&raquo;') == -1:
 135                         d = d.next.next
 136
 137                     direction = d.replace('&raquo;', '').strip()
 138                     if direction.startswith('NICHT EINSTEIGEN'):
 139                         continue
 140
 141                     line = l.img['alt']
 142                     for span in l.findAll('span'):
 143                         if span.text.isdigit():
 144                             tim = int(span.text)
 145                         elif span.text.find(':') >= 0:
 146                             tim = time(*map(int, span.text.split(':')))
 147                         else:
 148                             print 'Warning: %s' % span.text
 149                             continue
 150
 151                         if span['class'] == 'departureBarrierFree':
 152                             lowfloor = True
 153                         else:
 154                             lowfloor = False
 155
 156                         dep.append(Departure(line, station, direction, tim, lowfloor))
 157
 158                 except Exception as e:
 159                     print 'Warning: %s' % e.message
 160                     continue
 161
 162         except AttributeError:
 163             print 'Error while getting station %s' % station
 164
 165         finally:
 166             return dep
 167
 168     def get_departures_by_station(self, station):
 169         """ Get list of Departures for one station
 170         """
 171
 172         # TODO 1. Error handling
 173         # TODO 2. more error handling
 174         # TODO 3. ultimative error handling
 175
 176         html = urlopen(defaults.departures_by_station % quote_plus(station.encode('UTF-8'))).read()
 177
 178         li = BeautifulSoup(html).ul.findAll('li')
 179
 180         if li[0].a:
 181             # Dirty workaround for ambiguous station
 182             html = urlopen(defaults.qando + li[0].a['href']).read()
 183
 184         dep = self.parse_departures_by_station(html)
 185
 186         self.parse_departures_by_station(html)
 187         return dep
 188
 189     def parse_departures(self, html):
 190         bs = BeautifulSoup(html)
 191
 192         # Check for error messages
 193         msg = bs.findAll('span', {'class': 'rot fett'})
 194         if msg and len(msg) > 0 and unicode(msg[0].text).find(u'technischen St') > 0:
 195             print '\n'.join(map(lambda x: x.text.replace('&nbsp;', ''), msg))
 196             return []
 197
 198         mainform = bs.find('form', {'name': 'mainform'})
 199         if not mainform:
 200             return []
 201
 202         lines = mainform.table.findAll('tr')[1]
 203
 204         if len(lines.findAll('td', {'class': 'info'})) > 0:
 205             station = lines.span.text.replace('&nbsp;', '')
 206             line = lines.findAll('span')[-1].text.replace('&nbsp;', '')
 207         else:
 208             station = lines.td.span.text.replace('&nbsp;', '')
 209             line = lines.find('td', {'align': 'right'}).span.text.replace('&nbsp;', '')
 210
 211         result_lines = bs.findAll('table')[-1].findAll('tr')
 212
 213         dep = []
 214         for tr in result_lines[1:]:
 215             d = {'station': station}
 216             th = tr.findAll('th')
 217
 218             if len(th) < 2:
 219                 #TODO replace with logger
 220                 print "[DEBUG] Unable to find th in:\n%s" % str(tr)
 221             elif len(th) == 2:
 222                 # underground site looks different -.-
 223                 d['lowfloor'] = True
 224                 d['line'] = line
 225                 d['direction'] = th[0].text.replace('&nbsp;', '')
 226                 t = th[-1]
 227             else:
 228                 # all other lines
 229                 d['lowfloor'] = th[-1].find('img') and th[-1].img.has_key('alt')
 230                 d['line'] = th[0].text.replace('&nbsp;', '')
 231                 d['direction'] = th[1].text.replace('&nbsp;', '')
 232                 t = th[-2]
 233             # parse time
 234             tim = t.text.split(' ')
 235             if len(tim) < 2:
 236                 # print '[WARNING] Invalid time: %s' % time
 237                 # TODO: Issue a warning OR convert "HH:MM" format to countdown
 238                 tim = tim[0]
 239             else:
 240                 tim = tim[1]
 241
 242             if tim.find('rze...') >= 0:
 243                     d['time'] = 0
 244             elif tim.isdigit():
 245                 # if time to next departure in cell convert to int
 246                 d['time'] = int(tim)
 247             else:
 248                 # check if time of next departue in cell
 249                 t = tim.strip('&nbsp;').split(':')
 250                 if len(t) == 2 and all(map(lambda x: x.isdigit(), t)):
 251                     t = map(int, t)
 252                     d['time'] = make_datetime(datetime.now(), time(*t))
 253                 else:
 254                     # Unexpected content
 255                     #TODO replace with logger
 256                     print "[DEBUG] Invalid data:\n%s" % time
 257
 258             dep.append(Departure(**d))
 259
 260         return dep
 261
 262     def get_departures(self, url):
 263         """ Get list of next departures as Departure objects
 264         """
 265
 266         #TODO parse line name and direction for station site parsing
 267
 268         if not url:
 269             # FIXME prevent from calling this method with None
 270             print "ERROR empty url"
 271             return []
 272
 273         # open url for 90 min timeslot / get departure for next 90 min
 274         retry = 0
 275         tries = 2 # try a second time before return empty list
 276
 277         while retry < tries:
 278             html = urlopen(url + "&departureSizeTimeSlot=90").read()
 279             dep = self.parse_departures(html)
 280
 281             if dep:
 282                 return dep
 283
 284             retry += 1
 285             if retry == tries:
 286                 return []
 287
 288             sleep(0.5)
 289
 290     def get_departures_test(self, line, station):
 291         """ replacement for get_departure
 292             hide url in higher levels :)
 293         """
 294         raise NotImplementedError
 295
 296
 297 UBAHN, TRAM, BUS, NIGHTLINE, OTHER = range(5)
 298 LINE_TYPE_NAMES = ['U-Bahn', 'Strassenbahn', 'Bus', 'Nightline', 'Andere']
 299
 300 def get_line_sort_key(name):
 301     """Return a sort key for a line name
 302
 303     >>> get_line_sort_key('U6')
 304     ('U', 6)
 305
 306     >>> get_line_sort_key('D')
 307     ('D', 0)
 308
 309     >>> get_line_sort_key('59A')
 310     ('A', 59)
 311     """
 312     txt = ''.join(x for x in name if not x.isdigit())
 313     num = ''.join(x for x in name if x.isdigit()) or '0'
 314
 315     return (txt, int(num))
 316
 317 def get_line_type(name):
 318     """Get the type of line for the given name
 319
 320     >>> get_line_type('U1')
 321     UBAHN
 322     >>> get_line_type('59A')
 323     BUS
 324     """
 325     if name.isdigit():
 326         return TRAM
 327     elif name.endswith('A') or name.endswith('B') and name[1].isdigit():
 328         return BUS
 329     elif name.startswith('U'):
 330         return UBAHN
 331     elif name.startswith('N'):
 332         return NIGHTLINE
 333     elif name in ('D', 'O', 'VRT', 'WLB'):
 334         return TRAM
 335
 336     return OTHER
 337
 338 def categorize_lines(lines):
 339     """Return a categorized version of a list of line names
 340
 341     >>> categorize_lines(['U4', 'U3', '59A'])
 342     [('U-Bahn', ['U3', 'U4']), ('Bus', ['59A'])]
 343     """
 344     categorized_lines = collections.defaultdict(list)
 345
 346     for line in sorted(lines):
 347         line_type = get_line_type(line)
 348         categorized_lines[line_type].append(line)
 349
 350     for lines in categorized_lines.values():
 351         lines.sort(key=get_line_sort_key)
 352
 353     return [(LINE_TYPE_NAMES[key], categorized_lines[key])
 354         for key in sorted(categorized_lines)]
 355
 356 def make_datetime(date, time):
 357     """ Ugly workaround, immutable datetime ftw -.-
 358         If
 359     """
 360     if date.hour > time.hour:
 361         date = date + timedelta(1)
 362     return datetime(year=date.year,
 363                     month=date.month,
 364                     day=date.day,
 365                     hour=time.hour,
 366                     minute=time.minute,
 367                     second=time.second)