vcs.maemo.org Git - pywienerlinien/blob - gotovienna/realtime.py

   1 # -*- coding: utf-8 -*-
   2
   3 from gotovienna.BeautifulSoup import BeautifulSoup
   4 #from urllib2 import urlopen
   5 from urllib import quote_plus
   6 from UrlOpener import urlopen
   7 from datetime import time, datetime, timedelta
   8 import re
   9 import collections
  10 from errors import LineNotFoundError, StationNotFoundError
  11 import cache
  12 from cache import Stations
  13 from time import sleep
  14 from utils import sort_departures
  15
  16 from gotovienna import defaults
  17
  18 class Departure(dict):
  19     def __init__(self, line, station, direction, time, lowfloor):
  20         self['line'] = line
  21         self['station'] = station
  22         self['direction'] = direction
  23         self['time'] = time
  24         self['lowfloor'] = lowfloor
  25
  26     def __getitem__(self, *args, **kwargs):
  27         if args[0] == 'ftime':
  28             # string representation of time/minutes
  29             return self.ftime
  30         elif args[0] == 'deltatime':
  31             # minutes
  32             return self.departure_deltatime
  33         elif args[0] == 'atime':
  34             # time object
  35             return self.departure_time
  36         return dict.__getitem__(self, *args, **kwargs)
  37
  38     @property
  39     def departure_time(self):
  40         """ return time object of departure time
  41         """
  42         if type(self['time']) == time:
  43             return self['time']
  44         else:
  45             return (datetime.now() + timedelta(0, self['time']) * 60).time()
  46
  47     @property
  48     def departure_deltatime(self):
  49         """ return int representing minutes until departure
  50         """
  51         if type(self['time']) == int:
  52             return self['time']
  53         else:
  54             raise NotImplementedError()
  55
  56     @property
  57     def ftime(self):
  58         if type(self['time']) == int:
  59             return str(self['time'])
  60         elif type(self['time']) == time:
  61             return self['time'].strftime('%H:%M')
  62
  63 class ITipParser:
  64     def __init__(self):
  65         self._lines = cache.lines
  66
  67     def parse_stations(self, html):
  68         bs = BeautifulSoup(html)
  69         tables = bs.findAll('table', {'class': 'text_10pix'})
  70         st = {}
  71
  72         for i in range(2):
  73             dir = tables[i].div.contents[-1].strip()[6:-6]
  74
  75             sta = []
  76             for tr in tables[i].findAll('tr', {'onmouseout': 'obj_unhighlight(this);'}):
  77                 if tr.a:
  78                     sta.append((tr.a.text, defaults.line_overview + tr.a['href']))
  79                 else:
  80                     sta.append((tr.text.strip('&nbsp;'), None))
  81
  82             st[dir] = sta
  83         return st
  84
  85     def get_stations(self, name):
  86         """ Get station by direction
  87         {'Directionname': [('Station name', 'url')]}
  88         """
  89         if not name in self.lines:
  90             return {}
  91
  92         st = Stations(name)
  93
  94         if not st:
  95             st = self.parse_stations(urlopen(self.lines[name]).read())
  96
  97         return st
  98
  99     def parse_lines(self, html):
 100         """ Parse lines from html
 101         """
 102         bs = BeautifulSoup(html)
 103         # get tables
 104         lines = bs.findAll('td', {'class': 'linie'})
 105
 106         l = {}
 107
 108         for line in lines:
 109             if line.a:
 110                 href = defaults.line_overview + line.a['href']
 111                 if line.text:
 112                     l[line.text] = href
 113                 elif line.img:
 114                     l[line.img['alt']] = href
 115
 116         return l
 117
 118     @property
 119     def lines(self):
 120         """ Dictionary of Line names with url as value
 121         """
 122         if not self._lines:
 123             self._lines = self.parse_lines(urlopen(defaults.line_overview).read())
 124
 125         return self._lines
 126
 127     def get_url_from_direction(self, line, direction, station):
 128         stations = self.get_stations(line)
 129
 130         for stationname, url in stations.get(direction, []):
 131             if stationname == station:
 132                 return url
 133
 134         return None
 135
 136     def parse_departures_by_station(self, html):
 137         """ Parse departure page
 138         precondition: html is correct departure page
 139         handle select station page before calling this method
 140         """
 141         bs = BeautifulSoup(html)
 142         dep = []
 143
 144         try:
 145             li = bs.ul.findAll('li')
 146
 147             station = bs.strong.text.split(',')[0]
 148
 149             for l in li:
 150                 try:
 151                     d = l.div.next
 152                     if d.find('&raquo;') == -1:
 153                         d = d.next.next
 154
 155                     direction = d.replace('&raquo;', '').strip()
 156                     if direction.startswith('NICHT EINSTEIGEN'):
 157                         continue
 158
 159                     line = l.img['alt']
 160                     for span in l.findAll('span'):
 161                         if span.text.isdigit():
 162                             tim = int(span.text)
 163                         elif span.text.find(':') >= 0:
 164                             tim = time(*map(int, span.text.split(':')))
 165                         else:
 166                             print 'Warning: %s' % span.text
 167                             continue
 168
 169                         if span['class'] == 'departureBarrierFree':
 170                             lowfloor = True
 171                         else:
 172                             lowfloor = False
 173
 174                         dep.append(Departure(line, station, direction, tim, lowfloor))
 175
 176                 except Exception as e:
 177                     print 'Warning: %s' % e.message
 178                     continue
 179
 180         except AttributeError:
 181             print 'Error while getting station %s' % station
 182
 183         finally:
 184             return dep
 185
 186     def get_departures_by_station(self, station):
 187         """ Get list of Departures for one station
 188         """
 189
 190         # TODO 1. Error handling
 191         # TODO 2. more error handling
 192         # TODO 3. ultimative error handling
 193
 194         html = urlopen(defaults.departures_by_station % quote_plus(station.encode('UTF-8'))).read()
 195
 196         li = BeautifulSoup(html).ul.findAll('li')
 197
 198         if li[0].a:
 199             # Dirty workaround for ambiguous station
 200             html = urlopen(defaults.qando + li[0].a['href']).read()
 201
 202         dep = self.parse_departures_by_station(html)
 203
 204         self.parse_departures_by_station(html)
 205         return dep
 206
 207     def parse_departures(self, html):
 208         bs = BeautifulSoup(html)
 209
 210         # Check for error messages
 211         msg = bs.findAll('span', {'class': 'rot fett'})
 212         if msg and len(msg) > 0 and unicode(msg[0].text).find(u'technischen St') > 0:
 213             print '\n'.join(map(lambda x: x.text.replace('&nbsp;', ''), msg))
 214             return []
 215
 216         lines = bs.find('form', {'name': 'mainform'}).table.findAll('tr')[1]
 217
 218         if len(lines.findAll('td', {'class': 'info'})) > 0:
 219             station = lines.span.text.replace('&nbsp;', '')
 220             line = lines.findAll('span')[-1].text.replace('&nbsp;', '')
 221         else:
 222             station = lines.td.span.text.replace('&nbsp;', '')
 223             line = lines.find('td', {'align': 'right'}).span.text.replace('&nbsp;', '')
 224
 225         result_lines = bs.findAll('table')[-1].findAll('tr')
 226
 227         dep = []
 228         for tr in result_lines[1:]:
 229             d = {'station': station}
 230             th = tr.findAll('th')
 231
 232             if len(th) < 2:
 233                 #TODO replace with logger
 234                 print "[DEBUG] Unable to find th in:\n%s" % str(tr)
 235             elif len(th) == 2:
 236                 # underground site looks different -.-
 237                 d['lowfloor'] = True
 238                 d['line'] = line
 239                 d['direction'] = th[0].text.replace('&nbsp;', '')
 240                 t = th[-1]
 241             else:
 242                 # all other lines
 243                 print th[-1].find('img') and th[-1].img.has_key('alt')
 244                 d['lowfloor'] = th[-1].find('img') and th[-1].img.has_key('alt')
 245                 d['line'] = th[0].text.replace('&nbsp;', '')
 246                 d['direction'] = th[1].text.replace('&nbsp;', '')
 247                 t = th[-2]
 248             # parse time
 249             tim = t.text.split(' ')
 250             if len(tim) < 2:
 251                 # print '[WARNING] Invalid time: %s' % time
 252                 # TODO: Issue a warning OR convert "HH:MM" format to countdown
 253                 tim = tim[0]
 254             else:
 255                 tim = tim[1]
 256
 257             if tim.find('rze...') >= 0:
 258                     d['time'] = 0
 259             elif tim.isdigit():
 260                 # if time to next departure in cell convert to int
 261                 d['time'] = int(tim)
 262             else:
 263                 # check if time of next departue in cell
 264                 t = tim.strip('&nbsp;').split(':')
 265                 if len(t) == 2 and all(map(lambda x: x.isdigit(), t)):
 266                     t = map(int, t)
 267                     d['time'] = time(*t)
 268                 else:
 269                     # Unexpected content
 270                     #TODO replace with logger
 271                     print "[DEBUG] Invalid data:\n%s" % time
 272
 273             dep.append(Departure(**d))
 274
 275         return dep
 276
 277     def get_departures(self, url):
 278         """ Get list of next departures as Departure object
 279         """
 280
 281         #TODO parse line name and direction for station site parsing
 282
 283         if not url:
 284             # FIXME prevent from calling this method with None
 285             print "ERROR empty url"
 286             return []
 287
 288         # open url for 90 min timeslot / get departure for next 90 min
 289         retry = 0
 290         tries = 2 # try a second time before return empty list
 291
 292         while retry < tries:
 293             html = urlopen(url + "&departureSizeTimeSlot=90").read()
 294             dep = self.parse_departures(html)
 295
 296             if dep:
 297                 return dep
 298
 299             retry += 1
 300             if retry == tries:
 301                 return []
 302
 303             sleep(0.5)
 304
 305
 306
 307
 308 UBAHN, TRAM, BUS, NIGHTLINE, OTHER = range(5)
 309 LINE_TYPE_NAMES = ['U-Bahn', 'Strassenbahn', 'Bus', 'Nightline', 'Andere']
 310
 311 def get_line_sort_key(name):
 312     """Return a sort key for a line name
 313
 314     >>> get_line_sort_key('U6')
 315     ('U', 6)
 316
 317     >>> get_line_sort_key('D')
 318     ('D', 0)
 319
 320     >>> get_line_sort_key('59A')
 321     ('A', 59)
 322     """
 323     txt = ''.join(x for x in name if not x.isdigit())
 324     num = ''.join(x for x in name if x.isdigit()) or '0'
 325
 326     return (txt, int(num))
 327
 328 def get_line_type(name):
 329     """Get the type of line for the given name
 330
 331     >>> get_line_type('U1')
 332     UBAHN
 333     >>> get_line_type('59A')
 334     BUS
 335     """
 336     if name.isdigit():
 337         return TRAM
 338     elif name.endswith('A') or name.endswith('B') and name[1].isdigit():
 339         return BUS
 340     elif name.startswith('U'):
 341         return UBAHN
 342     elif name.startswith('N'):
 343         return NIGHTLINE
 344     elif name in ('D', 'O', 'VRT', 'WLB'):
 345         return TRAM
 346
 347     return OTHER
 348
 349 def categorize_lines(lines):
 350     """Return a categorized version of a list of line names
 351
 352     >>> categorize_lines(['U4', 'U3', '59A'])
 353     [('U-Bahn', ['U3', 'U4']), ('Bus', ['59A'])]
 354     """
 355     categorized_lines = collections.defaultdict(list)
 356
 357     for line in sorted(lines):
 358         line_type = get_line_type(line)
 359         categorized_lines[line_type].append(line)
 360
 361     for lines in categorized_lines.values():
 362         lines.sort(key=get_line_sort_key)
 363
 364     return [(LINE_TYPE_NAMES[key], categorized_lines[key])
 365             for key in sorted(categorized_lines)]