vcs.maemo.org Git - pywienerlinien/blob - gotovienna/realtime.py

   1 # -*- coding: utf-8 -*-
   2
   3 from gotovienna.BeautifulSoup import BeautifulSoup
   4 from urllib2 import urlopen
   5 from datetime import time
   6 import re
   7 import collections
   8
   9 from gotovienna import defaults
  10
  11 class ITipParser:
  12     def __init__(self):
  13         self._stations = {}
  14         self._lines = {}
  15
  16     def get_stations(self, name):
  17         """ Get station by direction
  18         {'Directionname': [('Station name', 'url')]}
  19         """
  20         if not self._stations.has_key(name):
  21             st = {}
  22
  23             if not self.lines.has_key(name):
  24                 return None
  25
  26             bs = BeautifulSoup(urlopen(self.lines[name]))
  27             tables = bs.findAll('table', {'class': 'text_10pix'})
  28             for i in range(2):
  29                 dir = tables[i].div.contents[-1].strip('&nbsp;')
  30
  31                 sta = []
  32                 for tr in tables[i].findAll('tr', {'onmouseout': 'obj_unhighlight(this);'}):
  33                     if tr.a:
  34                         sta.append((tr.a.text, defaults.line_overview + tr.a['href']))
  35                     else:
  36                         sta.append((tr.text.strip('&nbsp;'), None))
  37
  38                 st[dir] = sta
  39             self._stations[name] = st
  40
  41         return self._stations[name]
  42
  43     @property
  44     def lines(self):
  45         """ Dictionary of Line names with url as value
  46         """
  47         if not self._lines:
  48             bs = BeautifulSoup(urlopen(defaults.line_overview))
  49             # get tables
  50             lines = bs.findAll('td', {'class': 'linie'})
  51
  52             for line in lines:
  53                 if line.a:
  54                     href = defaults.line_overview + line.a['href']
  55                     if line.text:
  56                         self._lines[line.text] = href
  57                     elif line.img:
  58                         self._lines[line.img['alt']] = href
  59
  60         return self._lines
  61
  62     def get_url_from_direction(self, line, direction, station):
  63         stations = self.get_stations(line)
  64
  65         for stationname, url in stations.get(direction, []):
  66             if stationname == station:
  67                 return url
  68
  69         return None
  70
  71     def get_departures(self, url):
  72         """ Get list of next departures
  73         integer if time until next departure
  74         time if time of next departure
  75         """
  76
  77         #TODO parse line name and direction for station site parsing
  78
  79         if not url:
  80             # FIXME prevent from calling this method with None
  81             return []
  82
  83         bs = BeautifulSoup(urlopen(url))
  84         result_lines = bs.findAll('table')[-1].findAll('tr')
  85
  86         dep = []
  87         for tr in result_lines[1:]:
  88             th = tr.findAll('th')
  89             if len(th) < 2:
  90                 #TODO replace with logger
  91                 print "[DEBUG] Unable to find th in:\n%s" % str(tr)
  92                 continue
  93
  94             # parse time
  95             time = th[-2].text.split(' ')
  96             if len(time) < 2:
  97                 #print 'Invalid time: %s' % time
  98                 # TODO: Issue a warning OR convert "HH:MM" format to countdown
  99                 continue
 100
 101             time = time[1]
 102
 103             if time.find('rze...') >= 0:
 104                     dep.append(0)
 105             elif time.isdigit():
 106                 # if time to next departure in cell convert to int
 107                 dep.append(int(time))
 108             else:
 109                 # check if time of next departue in cell
 110                 t = time.strip('&nbsp;').split(':')
 111                 if len(t) == 2 and all(map(lambda x: x.isdigit(), t)):
 112                     t = map(int, t)
 113                     dep.append(time(*t))
 114                 else:
 115                     # Unexpected content
 116                     #TODO replace with logger
 117                     print "[DEBUG] Invalid data:\n%s" % time
 118
 119         return dep
 120
 121
 122 UBAHN, TRAM, BUS, NIGHTLINE, OTHER = range(5)
 123 LINE_TYPE_NAMES = ['U-Bahn', 'Strassenbahn', 'Bus', 'Nightline', 'Andere']
 124
 125 def get_line_sort_key(name):
 126     """Return a sort key for a line name
 127
 128     >>> get_line_sort_key('U6')
 129     ('U', 6)
 130
 131     >>> get_line_sort_key('D')
 132     ('D', 0)
 133
 134     >>> get_line_sort_key('59A')
 135     ('A', 59)
 136     """
 137     txt = ''.join(x for x in name if not x.isdigit())
 138     num = ''.join(x for x in name if x.isdigit()) or '0'
 139
 140     return (txt, int(num))
 141
 142 def get_line_type(name):
 143     """Get the type of line for the given name
 144
 145     >>> get_line_type('U1')
 146     UBAHN
 147     >>> get_line_type('59A')
 148     BUS
 149     """
 150     if name.isdigit():
 151         return TRAM
 152     elif name.endswith('A') or name.endswith('B') and name[1].isdigit():
 153         return BUS
 154     elif name.startswith('U'):
 155         return UBAHN
 156     elif name.startswith('N'):
 157         return NIGHTLINE
 158     elif name in ('D', 'O', 'VRT', 'WLB'):
 159         return TRAM
 160
 161     return OTHER
 162
 163 def categorize_lines(lines):
 164     """Return a categorized version of a list of line names
 165
 166     >>> categorize_lines(['U4', 'U3', '59A'])
 167     [('U-Bahn', ['U3', 'U4']), ('Bus', ['59A'])]
 168     """
 169     categorized_lines = collections.defaultdict(list)
 170
 171     for line in sorted(lines):
 172         line_type = get_line_type(line)
 173         categorized_lines[line_type].append(line)
 174
 175     for lines in categorized_lines.values():
 176         lines.sort(key=get_line_sort_key)
 177
 178     return [(LINE_TYPE_NAMES[key], categorized_lines[key])
 179             for key in sorted(categorized_lines)]
 180