vcs.maemo.org Git - pywienerlinien/blob - gotovienna/routing.py

   1 #!/usr/bin/env python
   2 # -*- coding: UTF-8 -*-
   3
   4 from gotovienna.BeautifulSoup import BeautifulSoup, NavigableString
   5 #from urllib2 import urlopen
   6 from UrlOpener import urlopen
   7 from urllib import urlencode
   8 from datetime import datetime, time, timedelta
   9 from textwrap import wrap
  10 import sys
  11 import os.path
  12 import re
  13
  14 from gotovienna import defaults
  15
  16 POSITION_TYPES = ('stop', 'address', 'poi')
  17 TIMEFORMAT = '%H:%M'
  18 DEBUGLOG = os.path.expanduser('~/gotoVienna.debug')
  19
  20 class ParserError(Exception):
  21
  22     def __init__(self, msg='Parser error'):
  23         self.message = msg
  24
  25 class PageType:
  26     UNKNOWN, CORRECTION, RESULT = range(3)
  27
  28
  29 def extract_city(station):
  30     """ Extract city from string if present,
  31     else return default city
  32
  33     >>> extract_city('Karlsplatz, Wien')
  34     'Wien'
  35     """
  36     if len(station.split(',')) > 1:
  37         return station.split(',')[-1].strip()
  38     else:
  39         return 'Wien'
  40
  41 def extract_station(station):
  42     """ Remove city from string
  43
  44     >>> extract_station('Karlsplatz, Wien')
  45     'Karlsplatz'
  46     """
  47     if len(station.split(',')) > 1:
  48         return station[:station.rindex(',')].strip()
  49     else:
  50         return station
  51
  52 def split_station(station):
  53     """ >>> split_station('Karlsplatz, Wien')
  54     ('Karlsplatz', 'Wien')
  55     >>> split_station('Karlsplatz')
  56     ('Karlsplatz', 'Wien')
  57     """
  58     if len(station.split(',')) > 1:
  59         return (station[:station.rindex(',')].strip(), station.split(',')[-1].strip())
  60     else:
  61         return (station, 'Wien')
  62
  63 def guess_location_type(location):
  64     """Guess type (stop, address, poi) of a location
  65
  66     >>> guess_location_type('pilgramgasse')
  67     'stop'
  68
  69     >>> guess_location_type('karlsplatz 14')
  70     'address'
  71
  72     >>> guess_location_type('reumannplatz 12/34')
  73     'address'
  74     """
  75     parts = location.split()
  76     first_part = parts[0]
  77     last_part = parts[-1]
  78
  79     # Assume all single-word locations are stops
  80     if len(parts) == 1:
  81         return 'stop'
  82
  83     # If the last part is numeric, assume address
  84     if last_part.isdigit() and len(parts) > 1:
  85         return 'address'
  86
  87     # Addresses with door number (e.g. "12/34")
  88     if all(x.isdigit() or x == '/' for x in last_part):
  89         return 'address'
  90
  91     # Sane default - assume it's a stop/station name
  92     return 'stop'
  93
  94 def search(origin_tuple, destination_tuple, dtime=None):
  95     """ build route request
  96     returns html result (as urllib response)
  97     """
  98     if not dtime:
  99         dtime = datetime.now()
 100
 101     origin, origin_type = origin_tuple
 102     origin, origin_city = split_station(origin)
 103
 104     destination, destination_type = destination_tuple
 105     destination, destination_city = split_station(destination)
 106
 107
 108     if origin_type is None:
 109         origin_type = guess_location_type(origin)
 110         print 'Guessed origin type:', origin_type
 111
 112     if destination_type is None:
 113         destination_type = guess_location_type(destination)
 114         print 'Guessed destination type:', destination_type
 115
 116     if (origin_type not in POSITION_TYPES or
 117             destination_type not in POSITION_TYPES):
 118         raise ParserError('Invalid position type')
 119
 120     post = defaults.search_post
 121     post['name_origin'] = origin
 122     post['type_origin'] = origin_type
 123     post['name_destination'] = destination
 124     post['type_destination'] = destination_type
 125     post['itdDateDayMonthYear'] = dtime.strftime('%d.%m.%Y')
 126     post['itdTime'] = dtime.strftime('%H:%M')
 127     post['place_origin'] = origin_city
 128     post['place_destination'] = destination_city
 129     params = urlencode(post)
 130     url = '%s?%s' % (defaults.action, params)
 131     #print url
 132
 133     return urlopen(url)
 134
 135
 136 class sParser:
 137     """ Parser for search response
 138     """
 139
 140     def __init__(self, html):
 141         self.soup = BeautifulSoup(html)
 142
 143     def check_page(self):
 144         if self.soup.find('form', {'id': 'form_efaresults'}):
 145             return PageType.RESULT
 146
 147         if self.soup.find('div', {'class':'form_error'}):
 148             return PageType.CORRECTION
 149
 150         return PageType.UNKNOWN
 151
 152     state = property(check_page)
 153
 154     def get_correction(self):
 155         names_origin = self.soup.find('select', {'id': 'nameList_origin'})
 156         names_destination = self.soup.find('select', {'id': 'nameList_destination'})
 157         places_origin = self.soup.find('select', {'id': 'placeList_origin'})
 158         places_destination = self.soup.find('select', {'id': 'placeList_destination'})
 159
 160
 161         if any([names_origin, names_destination, places_origin, places_destination]):
 162             dict = {}
 163
 164             if names_origin:
 165                 dict['origin'] = map(lambda x: x.text,
 166                                      names_origin.findAll('option'))
 167             if names_destination:
 168                 dict['destination'] = map(lambda x: x.text,
 169                                           names_destination.findAll('option'))
 170
 171             if places_origin:
 172                 dict['place_origin'] = map(lambda x: x.text,
 173                                            names_origin.findAll('option'))
 174             if names_destination:
 175                 dict['place_destination'] = map(lambda x: x.text,
 176                                                 names_destination.findAll('option'))
 177
 178             return dict
 179
 180         else:
 181             raise ParserError('Unable to parse html')
 182
 183     def get_result(self):
 184         return rParser(str(self.soup))
 185
 186
 187
 188 class rParser:
 189     """ Parser for routing results
 190     """
 191
 192     def __init__(self, html):
 193         self.soup = BeautifulSoup(html)
 194         self._overview = None
 195         self._details = None
 196
 197     @classmethod
 198     def get_tdtext(cls, x, cl):
 199             return x.find('td', {'class': cl}).text
 200
 201     @classmethod
 202     def get_change(cls, x):
 203         y = rParser.get_tdtext(x, 'col_change')
 204         if y:
 205             return int(y)
 206         else:
 207             return 0
 208
 209     @classmethod
 210     def get_price(cls, x):
 211         y = rParser.get_tdtext(x, 'col_price')
 212         if y == '*':
 213             return 0.0
 214         if y.find(','):
 215             return float(y.replace(',', '.'))
 216         else:
 217             return 0.0
 218
 219     @classmethod
 220     def get_date(cls, x):
 221         y = rParser.get_tdtext(x, 'col_date')
 222         if y:
 223             return datetime.strptime(y, '%d.%m.%Y').date()
 224         else:
 225             return None
 226
 227     @classmethod
 228     def get_datetime(cls, x):
 229         y = rParser.get_tdtext(x, 'col_time')
 230         if y:
 231             if (y.find("-") > 0):
 232                 # overview mode
 233                 times = map(lambda z: time(*map(int, z.split(':'))), y.split('-'))
 234                 d = rParser.get_date(x)
 235                 from_dtime = datetime.combine(d, times[0])
 236                 if times[0] > times[1]:
 237                     # dateline crossing
 238                     to_dtime = datetime.combine(d + timedelta(1), times[1])
 239                 else:
 240                     to_dtime = datetime.combine(d, times[1])
 241
 242                 return [from_dtime, to_dtime]
 243
 244             else:
 245                 dtregex = {'date' : '\d\d\.\d\d',
 246                            'time': '\d\d:\d\d'}
 247
 248                 regex = "\s*(?P<date1>{date})?\s*(?P<time1>{time})\s*(?P<date2>{date})?\s*(?P<time2>{time})\s*".format(**dtregex)
 249                 ma = re.match(regex, y)
 250
 251                 if not ma:
 252                     return []
 253
 254                 gr = ma.groupdict()
 255
 256                 def extract_datetime(gr, n):
 257                     if 'date%d' % n in gr and gr['date%d' % n]:
 258                         if gr['time%d' % n] == '24:00':
 259                             gr['time%d' % n] = '0:00'
 260                         from_dtime = datetime.strptime(str(datetime.today().year) + gr['date%d' % n] + gr['time%d' % n], '%Y%d.%m.%H:%M')
 261                     else:
 262                         d = datetime.today().date()
 263                         # Strange times possible at wienerlinien
 264                         if gr['time%d' % n] == '24:00':
 265                             gr['time%d' % n] = '0:00'
 266                             d += timedelta(days=1)
 267                         t = datetime.strptime(gr['time%d' % n], '%H:%M').time()
 268
 269                         return datetime.combine(d, t)
 270
 271                 # detail mode
 272                 from_dtime = extract_datetime(gr, 1)
 273                 to_dtime = extract_datetime(gr, 2)
 274
 275                 return [from_dtime, to_dtime]
 276
 277         else:
 278             return []
 279
 280     def __iter__(self):
 281         for detail in self.details():
 282             yield detail
 283
 284     def _parse_details(self):
 285         tours = self.soup.findAll('div', {'class': 'data_table tourdetail'})
 286
 287         trips = map(lambda x: map(lambda y: {
 288                         'timespan': rParser.get_datetime(y),
 289                         'station': map(lambda z: z[2:].strip(),
 290                                        filter(lambda x: type(x) == NavigableString, y.find('td', {'class': 'col_station'}).contents)), # filter non NaviStrings
 291                         'info': map(lambda x: x.strip(),
 292                                     filter(lambda z: type(z) == NavigableString, y.find('td', {'class': 'col_info'}).contents)),
 293                     }, x.find('tbody').findAll('tr')),
 294                     tours) # all routes
 295         return trips
 296
 297     @property
 298     def details(self):
 299         """returns list of trip details
 300         [ [ { 'time': [datetime.time, datetime.time] if time else [],
 301               'station': [u'start', u'end'] if station else [],
 302               'info': [u'start station' if station else u'details for walking', u'end station' if station else u'walking duration']
 303             }, ... # next trip step
 304           ], ... # next trip possibility
 305         ]
 306         """
 307         if not self._details:
 308             self._details = self._parse_details()
 309
 310         return self._details
 311
 312     def _parse_overview(self):
 313
 314         # get overview table
 315         table = self.soup.find('table', {'id': 'tbl_fahrten'})
 316
 317         # check if there is an overview table
 318         if table and table.findAll('tr'):
 319             # get rows
 320             rows = table.findAll('tr')[1:] # cut off headline
 321
 322             overview = map(lambda x: {
 323                                'timespan': rParser.get_datetime(x),
 324                                'change': rParser.get_change(x),
 325                                'price': rParser.get_price(x),
 326                            },
 327                            rows)
 328         else:
 329             raise ParserError('Unable to parse overview')
 330
 331         return overview
 332
 333     @property
 334     def overview(self):
 335         """dict containing
 336         date: datetime
 337         time: [time, time]
 338         duration: time
 339         change: int
 340         price: float
 341         """
 342         if not self._overview:
 343             try:
 344                 self._overview = self._parse_overview()
 345             except AttributeError:
 346                 f = open(DEBUGLOG, 'w')
 347                 f.write(str(self.soup))
 348                 f.close()
 349
 350         return self._overview
 351