vcs.maemo.org Git - pywienerlinien/blob - gotovienna/routing.py

   1 #!/usr/bin/env python
   2 # -*- coding: UTF-8 -*-
   3
   4 from BeautifulSoup import BeautifulSoup, NavigableString
   5 from urllib2 import urlopen
   6 from urllib import urlencode
   7 from datetime import datetime, time, timedelta
   8 from textwrap import wrap
   9 import argparse
  10 import sys
  11 import os.path
  12 import re
  13
  14 from gotovienna import defaults
  15
  16 POSITION_TYPES = ('stop', 'address', 'poi')
  17 TIMEFORMAT = '%H:%M'
  18 DEBUGLOG = os.path.expanduser('~/gotoVienna.debug')
  19
  20 class ParserError(Exception):
  21
  22     def __init__(self, msg='Parser error'):
  23         self.message = msg
  24
  25 class PageType:
  26     UNKNOWN, CORRECTION, RESULT = range(3)
  27
  28
  29 def extract_city(station):
  30     """ Extract city from string if present,
  31     else return default city
  32
  33     >>> extract_city('Karlsplatz, Wien')
  34     'Wien'
  35     """
  36     if len(station.split(',')) > 1:
  37         return station.split(',')[-1].strip()
  38     else:
  39         return 'Wien'
  40
  41 def extract_station(station):
  42     """ Remove city from string
  43
  44     >>> extract_station('Karlsplatz, Wien')
  45     'Karlsplatz'
  46     """
  47     if len(station.split(',')) > 1:
  48         return station[:station.rindex(',')].strip()
  49     else:
  50         return station
  51
  52 def split_station(station):
  53     """ >>> split_station('Karlsplatz, Wien')
  54     ('Karlsplatz', 'Wien')
  55     >>> split_station('Karlsplatz')
  56     ('Karlsplatz', 'Wien')
  57     """
  58     if len(station.split(',')) > 1:
  59         return (station[:station.rindex(',')].strip(), station.split(',')[-1].strip())
  60     else:
  61         return (station, 'Wien')
  62
  63 def search(origin_tuple, destination_tuple, dtime=None):
  64     """ build route request
  65     returns html result (as urllib response)
  66     """
  67     if not dtime:
  68         dtime = datetime.now()
  69
  70     origin, origin_type = origin_tuple
  71     origin, origin_city = split_station(origin)
  72
  73     destination, destination_type = destination_tuple
  74     destination, destination_city = split_station(destination)
  75
  76
  77     if not origin_type in POSITION_TYPES or\
  78         not destination_type in POSITION_TYPES:
  79         raise ParserError('Invalid position type')
  80
  81     post = defaults.search_post
  82     post['name_origin'] = origin
  83     post['type_origin'] = origin_type
  84     post['name_destination'] = destination
  85     post['type_destination'] = destination_type
  86     post['itdDateDayMonthYear'] = dtime.strftime('%d.%m.%Y')
  87     post['itdTime'] = dtime.strftime('%H:%M')
  88     post['place_origin'] = origin_city
  89     post['place_destination'] = destination_city
  90     params = urlencode(post)
  91     url = '%s?%s' % (defaults.action, params)
  92
  93     try:
  94         f = open(DEBUGLOG, 'a')
  95         f.write(url + '\n')
  96         f.close()
  97     except:
  98         print 'Unable to write to DEBUGLOG: %s' % DEBUGLOG
  99
 100     return urlopen(url)
 101
 102
 103 class sParser:
 104     """ Parser for search response
 105     """
 106
 107     def __init__(self, html):
 108         self.soup = BeautifulSoup(html)
 109
 110     def check_page(self):
 111         if self.soup.find('form', {'id': 'form_efaresults'}):
 112             return PageType.RESULT
 113
 114         if self.soup.find('div', {'class':'form_error'}):
 115             return PageType.CORRECTION
 116
 117         return PageType.UNKNOWN
 118
 119     def get_correction(self):
 120         names_origin = self.soup.find('select', {'id': 'nameList_origin'})
 121         names_destination = self.soup.find('select', {'id': 'nameList_destination'})
 122         places_origin = self.soup.find('select', {'id': 'placeList_origin'})
 123         places_destination = self.soup.find('select', {'id': 'placeList_destination'})
 124
 125
 126         if names_origin or names_destination or places_origin or places_destination:
 127             dict = {}
 128
 129             if names_origin:
 130                 dict['origin'] = map(lambda x: x.text, names_origin.findAll('option'))
 131             if names_destination:
 132                 dict['destination'] = map(lambda x: x.text, names_destination.findAll('option'))
 133
 134             if places_origin:
 135                 dict['place_origin'] = map(lambda x: x.text, names_origin.findAll('option'))
 136             if names_destination:
 137                 dict['place_destination'] = map(lambda x: x.text, names_destination.findAll('option'))
 138
 139             return dict
 140
 141         else:
 142             raise ParserError('Unable to parse html')
 143
 144     def get_result(self):
 145         return rParser(str(self.soup))
 146
 147
 148
 149 class rParser:
 150     """ Parser for routing results
 151     """
 152
 153     def __init__(self, html):
 154         self.soup = BeautifulSoup(html)
 155         self._overview = None
 156         self._details = None
 157
 158     @classmethod
 159     def get_tdtext(cls, x, cl):
 160             return x.find('td', {'class': cl}).text
 161
 162     @classmethod
 163     def get_change(cls, x):
 164         y = rParser.get_tdtext(x, 'col_change')
 165         if y:
 166             return int(y)
 167         else:
 168             return 0
 169
 170     @classmethod
 171     def get_price(cls, x):
 172         y = rParser.get_tdtext(x, 'col_price')
 173         if y == '*':
 174             return 0.0
 175         if y.find(','):
 176             return float(y.replace(',', '.'))
 177         else:
 178             return 0.0
 179
 180     @classmethod
 181     def get_date(cls, x):
 182         y = rParser.get_tdtext(x, 'col_date')
 183         if y:
 184             return datetime.strptime(y, '%d.%m.%Y').date()
 185         else:
 186             return None
 187
 188     @classmethod
 189     def get_datetime(cls, x):
 190         y = rParser.get_tdtext(x, 'col_time')
 191         if y:
 192             if (y.find("-") > 0):
 193                 # overview mode
 194                 times = map(lambda z: time(*map(int, z.split(':'))), y.split('-'))
 195                 d = rParser.get_date(x)
 196                 from_dtime = datetime.combine(d, times[0])
 197                 if times[0] > times[1]:
 198                     # dateline crossing
 199                     to_dtime = datetime.combine(d + timedelta(1), times[1])
 200                 else:
 201                     to_dtime = datetime.combine(d, times[1])
 202
 203                 return [from_dtime, to_dtime]
 204
 205             else:
 206                 dtregex = {'date' : '\d\d\.\d\d',
 207                            'time': '\d\d:\d\d'}
 208
 209                 regex = "\s*(?P<date1>{date})?\s*(?P<time1>{time})\s*(?P<date2>{date})?\s*(?P<time2>{time})\s*".format(**dtregex)
 210                 ma = re.match(regex, y)
 211
 212                 if not ma:
 213                     return []
 214
 215                 gr = ma.groupdict()
 216
 217                 def extract_datetime(gr, n):
 218                     if 'date%d' % n in gr and gr['date%d' % n]:
 219                         from_dtime = datetime.strptime(str(datetime.today().year) + gr['date%d' % n] + gr['time%d' % n], '%Y%d.%m.%H:%M')
 220                     else:
 221                         t = datetime.strptime(gr['time%d' % n], '%H:%M').time()
 222                         d = datetime.today().date()
 223                         return datetime.combine(d, t)
 224
 225                 # detail mode
 226                 from_dtime = extract_datetime(gr, 1)
 227                 to_dtime = extract_datetime(gr, 2)
 228
 229                 return [from_dtime, to_dtime]
 230
 231         else:
 232             return []
 233
 234     def __iter__(self):
 235         for detail in self.details():
 236             yield detail
 237
 238     def _parse_details(self):
 239         tours = self.soup.findAll('div', {'class': 'data_table tourdetail'})
 240
 241         trips = map(lambda x: map(lambda y: {
 242                         'timespan': rParser.get_datetime(y),
 243                         'station': map(lambda z: z[2:].strip(),
 244                                        filter(lambda x: type(x) == NavigableString, y.find('td', {'class': 'col_station'}).contents)), # filter non NaviStrings
 245                         'info': map(lambda x: x.strip(),
 246                                     filter(lambda z: type(z) == NavigableString, y.find('td', {'class': 'col_info'}).contents)),
 247                     }, x.find('tbody').findAll('tr')),
 248                     tours) # all routes
 249         return trips
 250
 251     @property
 252     def details(self):
 253         """returns list of trip details
 254         [ [ { 'time': [datetime.time, datetime.time] if time else [],
 255               'station': [u'start', u'end'] if station else [],
 256               'info': [u'start station' if station else u'details for walking', u'end station' if station else u'walking duration']
 257             }, ... # next trip step
 258           ], ... # next trip possibility
 259         ]
 260         """
 261         if not self._details:
 262             self._details = self._parse_details()
 263
 264         return self._details
 265
 266     def _parse_overview(self):
 267
 268         # get overview table
 269         table = self.soup.find('table', {'id': 'tbl_fahrten'})
 270
 271         # check if there is an overview table
 272         if table and table.findAll('tr'):
 273             # get rows
 274             rows = table.findAll('tr')[1:] # cut off headline
 275
 276             overview = map(lambda x: {
 277                                'timespan': rParser.get_datetime(x),
 278                                'change': rParser.get_change(x),
 279                                'price': rParser.get_price(x),
 280                            },
 281                            rows)
 282         else:
 283             raise ParserError('Unable to parse overview')
 284
 285         return overview
 286
 287     @property
 288     def overview(self):
 289         """dict containing
 290         date: datetime
 291         time: [time, time]
 292         duration: time
 293         change: int
 294         price: float
 295         """
 296         if not self._overview:
 297             try:
 298                 self._overview = self._parse_overview()
 299             except AttributeError:
 300                 f = open(DEBUGLOG, 'w')
 301                 f.write(str(self.soup))
 302                 f.close()
 303
 304         return self._overview
 305