vcs.maemo.org Git - pywienerlinien/blob - scotty.py

   1 from BeautifulSoup import BeautifulSoup, NavigableString
   2 from urllib2 import urlopen
   3 from urllib import urlencode
   4 import settings
   5 from datetime import datetime, time
   6 from textwrap import wrap
   7 import argparse
   8 import sys
   9
  10 POSITION_TYPES = ('stop', 'address', 'poi')
  11 TIMEFORMAT = '%H:%M'
  12
  13 class ParserError(Exception):
  14
  15     def __init__(self, msg='Parser error'):
  16         self.message = msg
  17
  18 class PageType:
  19     UNKNOWN, CORRECTION, RESULT = range(3)
  20
  21
  22 def search(origin_tuple, destination_tuple, dtime=None):
  23     """ build route request
  24     returns html result (as urllib response)
  25     """
  26     if not dtime:
  27         dtime = datetime.now()
  28
  29     origin, origin_type = origin_tuple
  30     destination, destination_type = destination_tuple
  31     if not origin_type in POSITION_TYPES or\
  32         not destination_type in POSITION_TYPES:
  33         raise ParserError('Invalid position type')
  34
  35     post = settings.search_post
  36     post['name_origin'] = origin
  37     post['type_origin'] = origin_type
  38     post['name_destination'] = destination
  39     post['type_destination'] = destination_type
  40     post['itdDateDayMonthYear'] = dtime.strftime('%d.%m.%Y')
  41     post['itdTime'] = dtime.strftime('%H:%M')
  42     params = urlencode(post)
  43     url = '%s?%s' % (settings.action, params)
  44
  45     print "\nurl %s url\n\n%s\n\nurl %s url\n" % ('~' * 100, url, '~' * 100)
  46
  47     return urlopen(url)
  48
  49
  50 class sParser:
  51     """ Parser for search response
  52     """
  53
  54     def __init__(self, html):
  55         self.soup = BeautifulSoup(html)
  56
  57     def check_page(self):
  58         if self.soup.find('form', {'id': 'form_efaresults'}):
  59             return PageType.RESULT
  60
  61         if self.soup.find('div', {'class':'form_error'}):
  62             return PageType.CORRECTION
  63
  64         return PageType.UNKNOWN
  65
  66     def get_correction(self):
  67         nlo = self.soup.find('select', {'id': 'nameList_origin'})
  68         nld = self.soup.find('select', {'id': 'nameList_destination'})
  69
  70         if not nlo and not nld:
  71             raise ParserError('Unable to parse html')
  72
  73         if nlo:
  74             origin = map(lambda x: x.text, nlo.findAll('option'))
  75         else:
  76             origin = []
  77         if nld:
  78             destination = map(lambda x: x.text, nld.findAll('option'))
  79         else:
  80             destination = []
  81
  82         return (origin, destination)
  83
  84     def get_result(self):
  85         return rParser(str(self.soup))
  86
  87
  88
  89 class rParser:
  90     """ Parser for routing results
  91     """
  92
  93     def __init__(self, html):
  94         self.soup = BeautifulSoup(html)
  95         self._overview = None
  96         self._details = None
  97
  98     @classmethod
  99     def get_tdtext(cls, x, cl):
 100             return x.find('td', {'class': cl}).text
 101
 102     @classmethod
 103     def get_change(cls, x):
 104         y = rParser.get_tdtext(x, 'col_change')
 105         if y:
 106             return int(y)
 107         else:
 108             return 0
 109
 110     @classmethod
 111     def get_price(cls, x):
 112         y = rParser.get_tdtext(x, 'col_price')
 113         if y.find(','):
 114             return float(y.replace(',', '.'))
 115         else:
 116             return 0.0
 117
 118     @classmethod
 119     def get_date(cls, x):
 120         y = rParser.get_tdtext(x, 'col_date')
 121         if y:
 122             return datetime.strptime(y, '%d.%m.%Y').date()
 123         else:
 124             return None
 125
 126     @classmethod
 127     def get_time(cls, x):
 128         y = rParser.get_tdtext(x, 'col_time')
 129         if y:
 130             if (y.find("-") > 0):
 131                 return map(lambda z: time(*map(int, z.split(':'))), y.split('-'))
 132             else:
 133                 return map(lambda z: time(*map(int, z.split(':'))), wrap(y, 5))
 134         else:
 135             return []
 136
 137     @classmethod
 138     def get_duration(cls, x):
 139         y = rParser.get_tdtext(x, 'col_duration')
 140         if y:
 141             return time(*map(int, y.split(":")))
 142         else:
 143             return None
 144
 145     def __iter__(self):
 146         for detail in self.details():
 147             yield detail
 148
 149     def _parse_details(self):
 150         tours = self.soup.findAll('div', {'class': 'data_table tourdetail'})
 151
 152         trips = map(lambda x: map(lambda y: {
 153                         'time': rParser.get_time(y),
 154                         'station': map(lambda z: z[2:].strip(),
 155                                        filter(lambda x: type(x) == NavigableString, y.find('td', {'class': 'col_station'}).contents)), # filter non NaviStrings
 156                         'info': map(lambda x: x.strip(),
 157                                     filter(lambda z: type(z) == NavigableString, y.find('td', {'class': 'col_info'}).contents)),
 158                     }, x.find('tbody').findAll('tr')),
 159                     tours) # all routes
 160         return trips
 161
 162     @property
 163     def details(self):
 164         """returns list of trip details
 165         [ [ { 'time': [datetime.time, datetime.time] if time else [],
 166               'station': [u'start', u'end'] if station else [],
 167               'info': [u'start station' if station else u'details for walking', u'end station' if station else u'walking duration']
 168             }, ... # next trip step
 169           ], ... # next trip possibility
 170         ]
 171         """
 172         if not self._details:
 173             self._details = self._parse_details()
 174
 175         return self._details
 176
 177     def _parse_overview(self):
 178
 179         # get overview table
 180         table = self.soup.find('table', {'id': 'tbl_fahrten'})
 181
 182         # check if there is an overview table
 183         if table and table.findAll('tr'):
 184             # get rows
 185             rows = table.findAll('tr')[1:] # cut off headline
 186
 187             overview = map(lambda x: {
 188                                'date': rParser.get_date(x),
 189                                'time': rParser.get_time(x),
 190                                'duration': rParser.get_duration(x), # grab duration
 191                                'change': rParser.get_change(x),
 192                                'price': rParser.get_price(x),
 193                            },
 194                            rows)
 195         else:
 196             raise ParserError('Unable to parse overview')
 197
 198         return overview
 199
 200     @property
 201     def overview(self):
 202         """dict containing
 203         date: datetime
 204         time: [time, time]
 205         duration: time
 206         change: int
 207         price: float
 208         """
 209         if not self._overview:
 210             try:
 211                 self._overview = self._parse_overview()
 212             except AttributeError:
 213                 f = open('DEBUG', 'w')
 214                 f.write(str(self.soup))
 215                 f.close()
 216
 217         return self._overview
 218
 219 if __name__ == '__main__':
 220     parser = argparse.ArgumentParser(description='Get public transport route for Vienna')
 221     parser.add_argument('-o', metavar='name', type=str, help='origin', required=True)
 222     parser.add_argument('-d', metavar='name', type=str, help='destination', required=True)
 223     parser.add_argument('-ot', metavar='type', type=str, help='origin type: %s' % ' | '.join(POSITION_TYPES), default='stop', choices=POSITION_TYPES)
 224     parser.add_argument('-dt', metavar='type', type=str, help='destination type: %s' % ' | '.join(POSITION_TYPES), default='stop', choices=POSITION_TYPES)
 225
 226     args = parser.parse_args()
 227     print args.o
 228     html = search((args.o, args.ot), (args.d, args.dt)).read()
 229
 230     parser = sParser(html)
 231     state = parser.check_page()
 232
 233     if state == PageType.CORRECTION:
 234         try:
 235             cor = parser.get_correction()
 236             if cor[0]:
 237                 print
 238                 print '* Origin ambiguous:'
 239                 lo = None
 240                 while not lo or not lo.isdigit() or int(lo) > len(cor[0]):
 241                     i = 1
 242                     for c in cor[0]:
 243                         print '%d. %s' % (i, c)
 244                         i += 1
 245                     lo = sys.stdin.readline().strip()
 246
 247                 args.o = cor[0][int(lo) - 1]
 248
 249             if cor[1]:
 250                 print
 251                 print '* Destination ambiguous:'
 252                 ld = None
 253                 while not ld or not ld.isdigit() or int(ld) > len(cor[1]):
 254                     j = 1
 255                     for c in cor[1]:
 256                         print '%d. %s' % (j, c)
 257                         j += 1
 258                     ld = sys.stdin.readline().strip()
 259
 260                 args.d = cor[1][int(ld) - 1]
 261
 262             html = search((args.o.encode('UTF-8'), args.ot), (args.d.encode('UTF-8'), args.dt)).read()
 263
 264             parser = sParser(html)
 265             state = parser.check_page()
 266
 267         except ParserError:
 268             print 'PANIC at correction page'
 269
 270     if state == PageType.RESULT:
 271         parser = rParser(html)
 272         try:
 273             overviews = parser.overview
 274             details = parser.details
 275             l = ''
 276             while not l == 'q':
 277                 for r in range(len(overviews)):
 278                     print '%d. [%s] %s-%s (%s)' % (r + 1, overviews[r]['date'], overviews[r]['time'][0], overviews[r]['time'][1], overviews[r]['duration'])
 279                 print 'q. Quit'
 280                 l = sys.stdin.readline().strip()
 281                 print
 282                 print '~' * 100
 283
 284                 if l.isdigit() and int(l) <= len(details):
 285                     for detail in details[int(l) - 1]:
 286                         if detail['time'] and detail['station']:
 287                             time = '%s - %s' % (detail['time'][0].strftime(TIMEFORMAT), detail['time'][1].strftime(TIMEFORMAT))
 288                             print '[%s] %s\n%s' % (time, ' -> '.join(detail['station']), '\n'.join(detail['info']))
 289                         else:
 290                             print '\n'.join(detail['info'])
 291                         print '-' * 100
 292                 print
 293
 294         except ParserError:
 295             print 'parsererror'
 296
 297     elif state == PageType.UNKNOWN:
 298         print 'PANIC unknown result'