Merge branch 'master' into experimental

author Florian Schweikert <kelvan@logic.at>

Fri, 7 Oct 2011 00:41:01 +0000 (02:41 +0200)

committer Florian Schweikert <kelvan@logic.at>

Fri, 7 Oct 2011 00:41:01 +0000 (02:41 +0200)
author Florian Schweikert <kelvan@logic.at>
Fri, 7 Oct 2011 00:41:01 +0000 (02:41 +0200)
committer Florian Schweikert <kelvan@logic.at>
Fri, 7 Oct 2011 00:41:01 +0000 (02:41 +0200)
diff --combined gotovienna/routing.py

index 279f70a,f60ae84..31cf95c
--- 1/gotovienna/routing.py
--- 2/gotovienna/routing.py
+++ b/gotovienna/routing.py
@@@ -4,12 -4,11 +4,12 @@@
   from BeautifulSoup import BeautifulSoup, NavigableString
   from urllib2 import urlopen
   from urllib import urlencode
- -from datetime import datetime, time
+ +from datetime import datetime, time, timedelta
   from textwrap import wrap
   import argparse
   import sys
   import os.path
+ +import re
   
   from gotovienna import defaults
   
@@@ -26,40 -25,38 +26,71 @@@ class PageType
       UNKNOWN, CORRECTION, RESULT = range(3)
   
   
+ +def extract_city(station):
+ +    """ Extract city from string if present,
+ +    else return default city
+ +    
+ +    >>> extract_city('Karlsplatz, Wien')
+ +    'Wien'
+ +    """
+ +    if len(station.split(',')) > 1:
+ +        return station.split(',')[-1].strip()
+ +    else:
+ +        return 'Wien'
+ +        
+ +def extract_station(station):
+ +    """ Remove city from string
+ +    
+ +    >>> extract_station('Karlsplatz, Wien')
+ +    'Karlsplatz'
+ +    """
+ +    if len(station.split(',')) > 1:
+ +        return station[:station.rindex(',')].strip()
+ +    else:
+ +        return station
+ +    
+ +def split_station(station):
+ +    """ >>> split_station('Karlsplatz, Wien')
+ +    ('Karlsplatz', 'Wien')
+ +    >>> split_station('Karlsplatz')
+ +    ('Karlsplatz', 'Wien')
+ +    """
+ +    if len(station.split(',')) > 1:
+ +        return (station[:station.rindex(',')].strip(), station.split(',')[-1].strip())
+ +    else:
+ +        return (station, 'Wien')
+ +
+ def guess_location_type(location):
+     """Guess type (stop, address, poi) of a location
+ 
+     >>> guess_location_type('pilgramgasse')
+     'stop'
+ 
+     >>> guess_location_type('karlsplatz 14')
+     'address'
+ 
+     >>> guess_location_type('reumannplatz 12/34')
+     'address'
+     """
+     parts = location.split()
+     first_part = parts[0]
+     last_part = parts[-1]
+ 
+     # Assume all single-word locations are stops
+     if len(parts) == 1:
+         return 'stop'
+ 
+     # If the last part is numeric, assume address
+     if last_part.isdigit() and len(parts) > 1:
+         return 'address'
+ 
+     # Addresses with door number (e.g. "12/34")
+     if all(x.isdigit() or x == '/' for x in last_part):
+         return 'address'
+ 
+     # Sane default - assume it's a stop/station name
+     return 'stop'
+ 
- -
   def search(origin_tuple, destination_tuple, dtime=None):
       """ build route request
       returns html result (as urllib response)
@@@ -68,14 -65,18 +99,25 @@@
           dtime = datetime.now()
   
       origin, origin_type = origin_tuple
+ +    origin, origin_city = split_station(origin)
+ +    
       destination, destination_type = destination_tuple
+ +    destination, destination_city = split_station(destination)
+ +
+ +
+ +    if not origin_type in POSITION_TYPES or\
+ +        not destination_type in POSITION_TYPES:
+ 
+     if origin_type is None:
+         origin_type = guess_location_type(origin)
+         print 'Guessed origin type:', origin_type
+ 
+     if destination_type is None:
+         destination_type = guess_location_type(destination)
+         print 'Guessed destination type:', destination_type
+ 
+     if (origin_type not in POSITION_TYPES or
+             destination_type not in POSITION_TYPES):
           raise ParserError('Invalid position type')
   
       post = defaults.search_post
@@@ -85,8 -86,6 +127,8 @@@
       post['type_destination'] = destination_type
       post['itdDateDayMonthYear'] = dtime.strftime('%d.%m.%Y')
       post['itdTime'] = dtime.strftime('%H:%M')
+ +    post['place_origin'] = origin_city
+ +    post['place_destination'] = destination_city
       params = urlencode(post)
       url = '%s?%s' % (defaults.action, params)
   
@@@ -116,30 -115,25 +158,32 @@@ class sParser
   
           return PageType.UNKNOWN
   
+     state = property(check_page)
+ 
       def get_correction(self):
- -        nlo = self.soup.find('select', {'id': 'nameList_origin'})
- -        nld = self.soup.find('select', {'id': 'nameList_destination'})
- -
- -        if not nlo and not nld:
- -            raise ParserError('Unable to parse html')
- -
- -        if nlo:
- -            origin = map(lambda x: x.text, nlo.findAll('option'))
- -        else:
- -            origin = []
- -        if nld:
- -            destination = map(lambda x: x.text, nld.findAll('option'))
+ +        names_origin = self.soup.find('select', {'id': 'nameList_origin'})
+ +        names_destination = self.soup.find('select', {'id': 'nameList_destination'})
+ +        places_origin = self.soup.find('select', {'id': 'placeList_origin'})
+ +        places_destination = self.soup.find('select', {'id': 'placeList_destination'})
+ +        
+ +
+ +        if names_origin or names_destination or places_origin or places_destination:
+ +            dict = {}
+ +            
+ +            if names_origin:
+ +                dict['origin'] = map(lambda x: x.text, names_origin.findAll('option'))
+ +            if names_destination:
+ +                dict['destination'] = map(lambda x: x.text, names_destination.findAll('option'))
+ +                
+ +            if places_origin:
+ +                dict['place_origin'] = map(lambda x: x.text, names_origin.findAll('option'))
+ +            if names_destination:
+ +                dict['place_destination'] = map(lambda x: x.text, names_destination.findAll('option'))
+ +    
+ +            return dict
+ +        
           else:
- -            destination = []
- -
- -        return (origin, destination)
+ +            raise ParserError('Unable to parse html')
   
       def get_result(self):
           return rParser(str(self.soup))
@@@ -186,51 -180,24 +230,51 @@@ class rParser
               return None
   
       @classmethod
- -    def get_time(cls, x):
+ +    def get_datetime(cls, x):
           y = rParser.get_tdtext(x, 'col_time')
           if y:
               if (y.find("-") > 0):
- -                return map(lambda z: time(*map(int, z.split(':'))), y.split('-'))
+ +                # overview mode
+ +                times = map(lambda z: time(*map(int, z.split(':'))), y.split('-'))
+ +                d = rParser.get_date(x)
+ +                from_dtime = datetime.combine(d, times[0])
+ +                if times[0] > times[1]:
+ +                    # dateline crossing
+ +                    to_dtime = datetime.combine(d + timedelta(1), times[1])
+ +                else:
+ +                    to_dtime = datetime.combine(d, times[1])
+ +                    
+ +                return [from_dtime, to_dtime]
+ +            
               else:
- -                return map(lambda z: time(*map(int, z.split(':'))), wrap(y, 5))
+ +                dtregex = {'date' : '\d\d\.\d\d',
+ +                           'time': '\d\d:\d\d'}
+ +                
+ +                regex = "\s*(?P<date1>{date})?\s*(?P<time1>{time})\s*(?P<date2>{date})?\s*(?P<time2>{time})\s*".format(**dtregex)
+ +                ma = re.match(regex, y)
+ +                
+ +                if not ma:
+ +                    return []
+ +                
+ +                gr = ma.groupdict()
+ +                
+ +                def extract_datetime(gr, n):
+ +                    if 'date%d' % n in gr and gr['date%d' % n]:
+ +                        from_dtime = datetime.strptime(str(datetime.today().year) + gr['date%d' % n] + gr['time%d' % n], '%Y%d.%m.%H:%M')
+ +                    else:
+ +                        t = datetime.strptime(gr['time%d' % n], '%H:%M').time()
+ +                        d = datetime.today().date()
+ +                        return datetime.combine(d, t)
+ +                
+ +                # detail mode
+ +                from_dtime = extract_datetime(gr, 1)
+ +                to_dtime = extract_datetime(gr, 2)
+ +                
+ +                return [from_dtime, to_dtime]
+ +                
           else:
               return []
   
- -    @classmethod
- -    def get_duration(cls, x):
- -        y = rParser.get_tdtext(x, 'col_duration')
- -        if y:
- -            return time(*map(int, y.split(":")))
- -        else:
- -            return None
- -
       def __iter__(self):
           for detail in self.details():
               yield detail
@@@ -239,7 -206,7 +283,7 @@@
           tours = self.soup.findAll('div', {'class': 'data_table tourdetail'})
   
           trips = map(lambda x: map(lambda y: {
- -                        'time': rParser.get_time(y),
+ +                        'timespan': rParser.get_datetime(y),
                           'station': map(lambda z: z[2:].strip(),
                                          filter(lambda x: type(x) == NavigableString, y.find('td', {'class': 'col_station'}).contents)), # filter non NaviStrings
                           'info': map(lambda x: x.strip(),
@@@ -274,7 -241,9 +318,7 @@@
               rows = table.findAll('tr')[1:] # cut off headline
   
               overview = map(lambda x: {
- -                               'date': rParser.get_date(x),
- -                               'time': rParser.get_time(x),
- -                               'duration': rParser.get_duration(x), # grab duration
+ +                               'timespan': rParser.get_datetime(x),
                                  'change': rParser.get_change(x),
                                  'price': rParser.get_price(x),
                              },
diff --combined itip

index 049a17d,3f04e1d..4af78b1
--- 1/itip
--- 2/itip
+++ b/itip
@@@ -3,51 -3,102 +3,120 @@@
   
   import argparse
   
+ from gotovienna.utils import *
   from gotovienna.realtime import *
   
- 
   parser = argparse.ArgumentParser(description='Get realtime public transport information for Vienna')
- parser.add_argument('-l', metavar='name', type=str, help='line name')
- parser.add_argument('-s', metavar='name', type=str, help='station name')
+ parser.add_argument('line', nargs='?', help='line name (e.g. 59A)')
+ parser.add_argument('station', nargs='?', help='station name (e.g. Karlsplatz)')
   
   args = parser.parse_args()
   
   itip = ITipParser()
- lines = itip.lines
- if args.l:
-     l = args.l.upper()
- else:
-     l = None
- if args.s:
-     s = args.s.decode('UTF-8')
+ 
+ if args.line:
+     # Convert line name to uppercase (e.g. 'u4' -> 'U4')
+     args.line = args.line.upper()
+ 
+ if args.station:
+     args.station = args.station.decode('utf-8')
+ 
+ if args.line in itip.lines:
+     ITEM_WIDTH = 33
+     ITEM_SPACING = 4
+ 
+     # FIXME: change get_stations() to return (headers, stations) directly
+     stations = sorted(itip.get_stations(args.line).items())
+     headers, stations = zip(*stations)
+ 
+     maxlength = max(len(stops) for stops in stations)
+     for stops in stations:
+         # Pad station list with empty items for printing, so that
+         # different-sized lists aren't truncated (with zip below)
+         stops.extend([('', '')]*(maxlength-len(stops)))
+ 
+     stations_table = zip(*stations)
+     fmt = '%%-%ds' % ITEM_WIDTH
+     spacer = ' ' * ITEM_SPACING
+ 
+     print
+     print spacer, spacer.join(inblue(fmt % ('Richtung %s' % name))
+             for name in headers)
+     print spacer, spacer.join('-'*ITEM_WIDTH for name in headers)
+ 
+     def match_station(query, station):
+         return query and station and (query.lower() in station.lower())
+ 
+     for row in stations_table:
+         print spacer, spacer.join(ingreen(fmt%name)
+                 if match_station(args.station, name)
+                 else fmt%name
+                 for name, url in row)
+     print
+ 
+     # Get matching stations
+     stations = zip(headers, stations)
+     details = [(direction, name, url) for direction, stops in stations
+             for name, url in stops if match_station(args.station, name)]
+ 
+     # User entered a station, but no matches were found
+     if args.station and not details:
+         print inred('No station matched your query.')
+         print
+ 
+     # Format a departure time (in minutes from now) for display
+     def format_departure(minutes):
+         if minutes == 0:
+             return inred('now')
+         elif minutes == 1:
+             return inblue('1') + ' min'
+         else:
+             return inblue('%d' % minutes) + ' mins'
+ 
+     # Print the departure times for all matched stations
+     for direction, name, url in details:
+         print ingreen(name), '->', inblue(direction)
+ 
+         departures = itip.get_departures(url)
+         if departures:
+             print '  Next departures:', ', '.join(format_departure(x)
+                     for x in departures)
+         else:
+             print '  No departure information.'
+         print
   else:
+ +    s = ''
+ +
+ +if l and l in lines:
+ +    stations = itip.get_stations(l)
+ +    for key in stations.keys():
+ +        if not s:
+ +            print '* %s:' % key
+ +        for station in stations[key]:
+ +            if s:
+ +                if s.startswith(station[0]) or station[0].startswith(s):
+ +                    if station[0] == key:
+ +                        # skip station if destination
+ +                        continue
+ +                    # FIXME
+ +                    print '* %s\n  %s .....' % (key, station[0]), itip.get_departures(station[1])
+ +            else:
+ +                print '    %s' % station[0]
+ +elif not l:
       ITEMS_PER_LINE = 12
       ITEM_WIDTH = 5
       LINE_WIDTH = (ITEMS_PER_LINE*ITEM_WIDTH + ITEMS_PER_LINE)
   
+     if args.line:
+         print
+         print inred('The given line was not found. Valid lines:')
+ 
       print
-     for label, remaining in categorize_lines(lines.keys()):
+     for label, remaining in categorize_lines(itip.lines):
           prefix, fill, postfix = '|== ', '=', '==- -'
           before, after = prefix+label+' ', postfix
           padding = LINE_WIDTH - len(before+after)
+         before = before.replace(label, inblue(label))
           print ''.join((before, fill*padding, after))
   
           while remaining:
diff --combined scotty

index cf5086b,51dcfd6..c768149
--- 1/scotty
--- 2/scotty
+++ b/scotty
@@@ -4,13 -4,14 +4,14 @@@
   import argparse
   import sys
   
+ from gotovienna.utils import *
   from gotovienna.routing import *
   
   parser = argparse.ArgumentParser(description='Get public transport route for Vienna')
- parser.add_argument('-ot', metavar='type', type=str, help='origin type: %s' % ' | '.join(POSITION_TYPES), default='stop', choices=POSITION_TYPES)
- parser.add_argument('-dt', metavar='type', type=str, help='destination type: %s' % ' | '.join(POSITION_TYPES), default='stop', choices=POSITION_TYPES)
- parser.add_argument('origin', nargs='?')
- parser.add_argument('destination', nargs='?')
+ parser.add_argument('-ot', metavar='type', type=str, help='origin type: %s' % ' | '.join(POSITION_TYPES), default=None, choices=POSITION_TYPES)
+ parser.add_argument('-dt', metavar='type', type=str, help='destination type: %s' % ' | '.join(POSITION_TYPES), default=None, choices=POSITION_TYPES)
+ parser.add_argument('origin', nargs='?', help='origin station name')
+ parser.add_argument('destination', nargs='?', help='destination station name')
   
   args = parser.parse_args()
   
@@@ -20,123 -21,78 +21,195 @@@ if not args.origin
   if not args.destination:
       args.destination = raw_input('Destination: ')
   
- print >>sys.stderr, 'Searching...',
- html = search((args.origin, args.ot), (args.destination, args.dt)).read()
+ def do_search(args):
+     if isinstance(args.origin, unicode):
+         args.origin = args.origin.encode('utf-8', 'ignore')
+     elif isinstance(args.destination, unicode):
+         args.destination = args.destination.encode('utf-8', 'ignore')
+ 
+     result = search((args.origin, args.ot),
+             (args.destination, args.dt))
+ 
+     return sParser(result.read())
+ 
+ print >>sys.stderr, 'Searching...\n',
+ parser = do_search(args)
   print >>sys.stderr, 'done.'
   
+ +finished = False
+ +while not finished:
+ +
+ +    html = search((args.origin, args.ot), (args.destination, args.dt)).read()
+ +    
+ +    parser = sParser(html)
+ +    state = parser.check_page()
+ +
+ +    if state == PageType.CORRECTION:
+ +        try:
+ +            cor = parser.get_correction()
+ +            origin, origin_place = split_station(args.origin)
+ +            destination, destination_place = split_station(args.destination)
+ +            
+ +            # FIXME refactoring
+ +            
+ +            if cor.has_key('origin'):
+ +                print
+ +                print '* Origin ambiguous:'
+ +                l = None
+ +                while not l or not l.isdigit() or int(l) > len(cor['origin']):
+ +                    i = 1
+ +                    for c in cor['origin']:
+ +                        print '%d. %s' % (i, c)
+ +                        i += 1
+ +                    l = sys.stdin.readline().strip()
+ +    
+ +                origin = cor['origin'][int(l) - 1]
+ +    
+ +            if cor.has_key('destination'):
+ +                print
+ +                print '* Destination ambiguous:'
+ +                l = None
+ +                while not l or not l.isdigit() or int(l) > len(cor['destination']):
+ +                    i = 1
+ +                    for c in cor['destination']:
+ +                        print '%d. %s' % (i, c)
+ +                        i += 1
+ +                    l = sys.stdin.readline().strip()
+ +    
+ +                destination = cor['destination'][int(l) - 1]
+ +                
+ +            if cor.has_key('origin_place'):
+ +                print
+ +                print '* Origin place ambiguous:'
+ +                l = None
+ +                while not l or not l.isdigit() or int(l) > len(cor['origin_place']):
+ +                    i = 1
+ +                    for c in cor['origin_place']:
+ +                        print '%d. %s' % (i, c)
+ +                        i += 1
+ +                    l = sys.stdin.readline().strip()
+ +    
+ +                origin_place = cor['origin_place'][int(l) - 1]
+ +    
+ +            if cor.has_key('destination_place'):
+ +                print
+ +                print '* Destination place ambiguous:'
+ +                l = None
+ +                while not l or not l.isdigit() or int(l) > len(cor['destination_place']):
+ +                    i = 1
+ +                    for c in cor['destination_place']:
+ +                        print '%d. %s' % (i, c)
+ +                        i += 1
+ +                    l = sys.stdin.readline().strip()
+ +    
+ +                destination_place = cor['destination_place'][int(l) - 1]
+ +                
+ +            args.origin = '%s, %s' % (origin, origin_place)
+ +            args.destination = '%s, %s' %(destination, destination_place)
+ +            
+ +        except ParserError:
+ +            print 'PANIC at correction page'
+ +            finished = True
+ +    
+ +    elif state == PageType.RESULT:
+ +        parser = rParser(html)
+ +        try:
+ +            overviews = parser.overview
+ +            details = parser.details
+ +            l = ''
+ +            while not l == 'q':
+ +                for idx, overview in enumerate(overviews):
+ +                    timespan = overview['timespan']
+ +                    if not timespan:
+ +                        # XXX: Bogus data for e.g. Pilgramgasse->Karlsplatz?!
+ +                        continue
+ +                    
+ +                    str_timespan = timespan[0].strftime('[%y-%d-%m] %H:%M')
+ +                    str_timespan += '-' + timespan[1].strftime('%H:%M')
+ +                    timedelta = timespan[1] - timespan[0]
+ +                    print '%d. %s (%s)' % (idx + 1,
+ +                            str_timespan,
+ +                            timedelta)
+ +                print 'q. Quit'
+ +                l = sys.stdin.readline().strip()
+ +                print
+ +                print '~' * 80
+ +    
+ +                if l.isdigit() and int(l) <= len(details):
+ +                    for detail in details[int(l) - 1]:
+ +                        if detail['timespan'] and detail['station']:
+ +                            time = '%s - %s' % (detail['timespan'][0].strftime(TIMEFORMAT), detail['timespan'][1].strftime(TIMEFORMAT))
+ +                            print '[%s] %s\n%s' % (time, ' -> '.join(detail['station']), '\n'.join(detail['info']))
+ +                        else:
+ +                            print '\n'.join(detail['info'])
+ +                        print '-' * 80
+ +                print
+ +        
+ +            finished = True
+ +        
+ +        except ParserError:
+ +            print 'parsererror'
+ +    
+ +    elif state == PageType.UNKNOWN:
+ +        print 'PANIC unknown result'
++
+ while parser.state == PageType.CORRECTION:
+     origin_corr, destination_corr = parser.get_correction()
+ 
+     if origin_corr:
+         print
+         print '* Origin ambiguous:'
+         lo = None
+         while not lo or not lo.isdigit() or int(lo) > len(origin_corr):
+             for idx, correction in enumerate(origin_corr):
+                 print '%3d. %s' % (idx+1, correction)
+             lo = sys.stdin.readline().strip()
+ 
+         args.origin = origin_corr[int(lo) - 1]
+ 
+     if destination_corr:
+         print
+         print '* Destination ambiguous:'
+         ld = None
+         while not ld or not ld.isdigit() or int(ld) > len(destination_corr):
+             for idx, correction in enumerate(destination_corr):
+                 print '%3d. %s' % (idx+1, correction)
+             ld = sys.stdin.readline().strip()
+ 
+         args.destination = destination_corr[int(ld) - 1]
+ 
+     parser = do_search(args)
+ 
+ if parser.state == PageType.RESULT:
+     parser = parser.get_result()
+     overviews = parser.overview
+     details = parser.details
+     l = ''
+     while not l == 'q':
+         for idx, overview in enumerate(overviews):
+             if not overview['date'] or not overview['time']:
+                 # XXX: Bogus data for e.g. Pilgramgasse->Karlsplatz?!
+                 continue
+ 
+             print '%d. [%s] %s-%s (%s)' % (idx + 1,
+                     overview['date'],
+                     overview['time'][0],
+                     overview['time'][1],
+                     overview['duration'])
+         print 'q. Quit'
+         l = sys.stdin.readline().strip()
+         print
+         print '~' * 79
+ 
+         if l.isdigit() and int(l) <= len(details):
+             for detail in details[int(l) - 1]:
+                 if detail['time'] and detail['station']:
+                     time = '%s - %s' % (detail['time'][0].strftime(TIMEFORMAT), detail['time'][1].strftime(TIMEFORMAT))
+                     print '[%s] %s\n%s' % (time, ' -> '.join(detail['station']), '\n'.join(detail['info']))
+                 else:
+                     print '\n'.join(detail['info'])
+                 print '-' * 79
+         print
+ else:
+     print 'Error - unknown page returned.'
+
author	Florian Schweikert <kelvan@logic.at>
	Fri, 7 Oct 2011 00:41:01 +0000 (02:41 +0200)
committer	Florian Schweikert <kelvan@logic.at>
	Fri, 7 Oct 2011 00:41:01 +0000 (02:41 +0200)
		1	2
gotovienna/routing.py	patch \|	diff1 \|	diff2 \|	blob \| history
itip	patch \|	diff1 \|	diff2 \|	blob \| history
scotty	patch \|	diff1 \|	diff2 \|	blob \| history