Merge branch 'master' into experimental

author Florian Schweikert <kelvan@logic.at>

Fri, 7 Oct 2011 00:41:01 +0000 (02:41 +0200)

committer Florian Schweikert <kelvan@logic.at>

Fri, 7 Oct 2011 00:41:01 +0000 (02:41 +0200)
author Florian Schweikert <kelvan@logic.at>
Fri, 7 Oct 2011 00:41:01 +0000 (02:41 +0200)
committer Florian Schweikert <kelvan@logic.at>
Fri, 7 Oct 2011 00:41:01 +0000 (02:41 +0200)
diff --git a/gotovienna/routing.py b/gotovienna/routing.py

index f60ae84..31cf95c 100644 (file)
--- a/gotovienna/routing.py
+++ b/gotovienna/routing.py
@@ -4,11 +4,12 @@
  from BeautifulSoup import BeautifulSoup, NavigableString
  from urllib2 import urlopen
  from urllib import urlencode
-from datetime import datetime, time
+from datetime import datetime, time, timedelta
  from textwrap import wrap
  import argparse
  import sys
  import os.path
+import re
  
  from gotovienna import defaults
  
@@ -25,6 +26,40 @@ class PageType:
      UNKNOWN, CORRECTION, RESULT = range(3)
  
  
+def extract_city(station):
+    """ Extract city from string if present,
+    else return default city
+    
+    >>> extract_city('Karlsplatz, Wien')
+    'Wien'
+    """
+    if len(station.split(',')) > 1:
+        return station.split(',')[-1].strip()
+    else:
+        return 'Wien'
+        
+def extract_station(station):
+    """ Remove city from string
+    
+    >>> extract_station('Karlsplatz, Wien')
+    'Karlsplatz'
+    """
+    if len(station.split(',')) > 1:
+        return station[:station.rindex(',')].strip()
+    else:
+        return station
+    
+def split_station(station):
+    """ >>> split_station('Karlsplatz, Wien')
+    ('Karlsplatz', 'Wien')
+    >>> split_station('Karlsplatz')
+    ('Karlsplatz', 'Wien')
+    """
+    if len(station.split(',')) > 1:
+        return (station[:station.rindex(',')].strip(), station.split(',')[-1].strip())
+    else:
+        return (station, 'Wien')
+
  def guess_location_type(location):
      """Guess type (stop, address, poi) of a location
  
@@ -56,7 +91,6 @@ def guess_location_type(location):
      # Sane default - assume it's a stop/station name
      return 'stop'
  
-
  def search(origin_tuple, destination_tuple, dtime=None):
      """ build route request
      returns html result (as urllib response)
@@ -65,7 +99,14 @@ def search(origin_tuple, destination_tuple, dtime=None):
          dtime = datetime.now()
  
      origin, origin_type = origin_tuple
+    origin, origin_city = split_station(origin)
+    
      destination, destination_type = destination_tuple
+    destination, destination_city = split_station(destination)
+
+
+    if not origin_type in POSITION_TYPES or\
+        not destination_type in POSITION_TYPES:
  
      if origin_type is None:
          origin_type = guess_location_type(origin)
@@ -86,6 +127,8 @@ def search(origin_tuple, destination_tuple, dtime=None):
      post['type_destination'] = destination_type
      post['itdDateDayMonthYear'] = dtime.strftime('%d.%m.%Y')
      post['itdTime'] = dtime.strftime('%H:%M')
+    post['place_origin'] = origin_city
+    post['place_destination'] = destination_city
      params = urlencode(post)
      url = '%s?%s' % (defaults.action, params)
  
@@ -118,22 +161,29 @@ class sParser:
      state = property(check_page)
  
      def get_correction(self):
-        nlo = self.soup.find('select', {'id': 'nameList_origin'})
-        nld = self.soup.find('select', {'id': 'nameList_destination'})
-
-        if not nlo and not nld:
-            raise ParserError('Unable to parse html')
-
-        if nlo:
-            origin = map(lambda x: x.text, nlo.findAll('option'))
-        else:
-            origin = []
-        if nld:
-            destination = map(lambda x: x.text, nld.findAll('option'))
+        names_origin = self.soup.find('select', {'id': 'nameList_origin'})
+        names_destination = self.soup.find('select', {'id': 'nameList_destination'})
+        places_origin = self.soup.find('select', {'id': 'placeList_origin'})
+        places_destination = self.soup.find('select', {'id': 'placeList_destination'})
+        
+
+        if names_origin or names_destination or places_origin or places_destination:
+            dict = {}
+            
+            if names_origin:
+                dict['origin'] = map(lambda x: x.text, names_origin.findAll('option'))
+            if names_destination:
+                dict['destination'] = map(lambda x: x.text, names_destination.findAll('option'))
+                
+            if places_origin:
+                dict['place_origin'] = map(lambda x: x.text, names_origin.findAll('option'))
+            if names_destination:
+                dict['place_destination'] = map(lambda x: x.text, names_destination.findAll('option'))
+    
+            return dict
+        
          else:
-            destination = []
-
-        return (origin, destination)
+            raise ParserError('Unable to parse html')
  
      def get_result(self):
          return rParser(str(self.soup))
@@ -180,24 +230,51 @@ class rParser:
              return None
  
      @classmethod
-    def get_time(cls, x):
+    def get_datetime(cls, x):
          y = rParser.get_tdtext(x, 'col_time')
          if y:
              if (y.find("-") > 0):
-                return map(lambda z: time(*map(int, z.split(':'))), y.split('-'))
+                # overview mode
+                times = map(lambda z: time(*map(int, z.split(':'))), y.split('-'))
+                d = rParser.get_date(x)
+                from_dtime = datetime.combine(d, times[0])
+                if times[0] > times[1]:
+                    # dateline crossing
+                    to_dtime = datetime.combine(d + timedelta(1), times[1])
+                else:
+                    to_dtime = datetime.combine(d, times[1])
+                    
+                return [from_dtime, to_dtime]
+            
              else:
-                return map(lambda z: time(*map(int, z.split(':'))), wrap(y, 5))
+                dtregex = {'date' : '\d\d\.\d\d',
+                           'time': '\d\d:\d\d'}
+                
+                regex = "\s*(?P<date1>{date})?\s*(?P<time1>{time})\s*(?P<date2>{date})?\s*(?P<time2>{time})\s*".format(**dtregex)
+                ma = re.match(regex, y)
+                
+                if not ma:
+                    return []
+                
+                gr = ma.groupdict()
+                
+                def extract_datetime(gr, n):
+                    if 'date%d' % n in gr and gr['date%d' % n]:
+                        from_dtime = datetime.strptime(str(datetime.today().year) + gr['date%d' % n] + gr['time%d' % n], '%Y%d.%m.%H:%M')
+                    else:
+                        t = datetime.strptime(gr['time%d' % n], '%H:%M').time()
+                        d = datetime.today().date()
+                        return datetime.combine(d, t)
+                
+                # detail mode
+                from_dtime = extract_datetime(gr, 1)
+                to_dtime = extract_datetime(gr, 2)
+                
+                return [from_dtime, to_dtime]
+                
          else:
              return []
  
-    @classmethod
-    def get_duration(cls, x):
-        y = rParser.get_tdtext(x, 'col_duration')
-        if y:
-            return time(*map(int, y.split(":")))
-        else:
-            return None
-
      def __iter__(self):
          for detail in self.details():
              yield detail
@@ -206,7 +283,7 @@ class rParser:
          tours = self.soup.findAll('div', {'class': 'data_table tourdetail'})
  
          trips = map(lambda x: map(lambda y: {
-                        'time': rParser.get_time(y),
+                        'timespan': rParser.get_datetime(y),
                          'station': map(lambda z: z[2:].strip(),
                                         filter(lambda x: type(x) == NavigableString, y.find('td', {'class': 'col_station'}).contents)), # filter non NaviStrings
                          'info': map(lambda x: x.strip(),
@@ -241,9 +318,7 @@ class rParser:
              rows = table.findAll('tr')[1:] # cut off headline
  
              overview = map(lambda x: {
-                               'date': rParser.get_date(x),
-                               'time': rParser.get_time(x),
-                               'duration': rParser.get_duration(x), # grab duration
+                               'timespan': rParser.get_datetime(x),
                                 'change': rParser.get_change(x),
                                 'price': rParser.get_price(x),
                             },
diff --git a/itip b/itip

index 3f04e1d..4af78b1 100755 (executable)
--- a/itip
+++ b/itip
@@ -85,6 +85,24 @@ if args.line in itip.lines:
              print '  No departure information.'
          print
  else:
+    s = ''
+
+if l and l in lines:
+    stations = itip.get_stations(l)
+    for key in stations.keys():
+        if not s:
+            print '* %s:' % key
+        for station in stations[key]:
+            if s:
+                if s.startswith(station[0]) or station[0].startswith(s):
+                    if station[0] == key:
+                        # skip station if destination
+                        continue
+                    # FIXME
+                    print '* %s\n  %s .....' % (key, station[0]), itip.get_departures(station[1])
+            else:
+                print '    %s' % station[0]
+elif not l:
      ITEMS_PER_LINE = 12
      ITEM_WIDTH = 5
      LINE_WIDTH = (ITEMS_PER_LINE*ITEM_WIDTH + ITEMS_PER_LINE)
diff --git a/scotty b/scotty

index 51dcfd6..c768149 100755 (executable)
--- a/scotty
+++ b/scotty
@@ -36,6 +36,123 @@ print >>sys.stderr, 'Searching...\n',
  parser = do_search(args)
  print >>sys.stderr, 'done.'
  
+finished = False
+while not finished:
+
+    html = search((args.origin, args.ot), (args.destination, args.dt)).read()
+    
+    parser = sParser(html)
+    state = parser.check_page()
+
+    if state == PageType.CORRECTION:
+        try:
+            cor = parser.get_correction()
+            origin, origin_place = split_station(args.origin)
+            destination, destination_place = split_station(args.destination)
+            
+            # FIXME refactoring
+            
+            if cor.has_key('origin'):
+                print
+                print '* Origin ambiguous:'
+                l = None
+                while not l or not l.isdigit() or int(l) > len(cor['origin']):
+                    i = 1
+                    for c in cor['origin']:
+                        print '%d. %s' % (i, c)
+                        i += 1
+                    l = sys.stdin.readline().strip()
+    
+                origin = cor['origin'][int(l) - 1]
+    
+            if cor.has_key('destination'):
+                print
+                print '* Destination ambiguous:'
+                l = None
+                while not l or not l.isdigit() or int(l) > len(cor['destination']):
+                    i = 1
+                    for c in cor['destination']:
+                        print '%d. %s' % (i, c)
+                        i += 1
+                    l = sys.stdin.readline().strip()
+    
+                destination = cor['destination'][int(l) - 1]
+                
+            if cor.has_key('origin_place'):
+                print
+                print '* Origin place ambiguous:'
+                l = None
+                while not l or not l.isdigit() or int(l) > len(cor['origin_place']):
+                    i = 1
+                    for c in cor['origin_place']:
+                        print '%d. %s' % (i, c)
+                        i += 1
+                    l = sys.stdin.readline().strip()
+    
+                origin_place = cor['origin_place'][int(l) - 1]
+    
+            if cor.has_key('destination_place'):
+                print
+                print '* Destination place ambiguous:'
+                l = None
+                while not l or not l.isdigit() or int(l) > len(cor['destination_place']):
+                    i = 1
+                    for c in cor['destination_place']:
+                        print '%d. %s' % (i, c)
+                        i += 1
+                    l = sys.stdin.readline().strip()
+    
+                destination_place = cor['destination_place'][int(l) - 1]
+                
+            args.origin = '%s, %s' % (origin, origin_place)
+            args.destination = '%s, %s' %(destination, destination_place)
+            
+        except ParserError:
+            print 'PANIC at correction page'
+            finished = True
+    
+    elif state == PageType.RESULT:
+        parser = rParser(html)
+        try:
+            overviews = parser.overview
+            details = parser.details
+            l = ''
+            while not l == 'q':
+                for idx, overview in enumerate(overviews):
+                    timespan = overview['timespan']
+                    if not timespan:
+                        # XXX: Bogus data for e.g. Pilgramgasse->Karlsplatz?!
+                        continue
+                    
+                    str_timespan = timespan[0].strftime('[%y-%d-%m] %H:%M')
+                    str_timespan += '-' + timespan[1].strftime('%H:%M')
+                    timedelta = timespan[1] - timespan[0]
+                    print '%d. %s (%s)' % (idx + 1,
+                            str_timespan,
+                            timedelta)
+                print 'q. Quit'
+                l = sys.stdin.readline().strip()
+                print
+                print '~' * 80
+    
+                if l.isdigit() and int(l) <= len(details):
+                    for detail in details[int(l) - 1]:
+                        if detail['timespan'] and detail['station']:
+                            time = '%s - %s' % (detail['timespan'][0].strftime(TIMEFORMAT), detail['timespan'][1].strftime(TIMEFORMAT))
+                            print '[%s] %s\n%s' % (time, ' -> '.join(detail['station']), '\n'.join(detail['info']))
+                        else:
+                            print '\n'.join(detail['info'])
+                        print '-' * 80
+                print
+        
+            finished = True
+        
+        except ParserError:
+            print 'parsererror'
+    
+    elif state == PageType.UNKNOWN:
+        print 'PANIC unknown result'
+
  while parser.state == PageType.CORRECTION:
      origin_corr, destination_corr = parser.get_correction()
author	Florian Schweikert <kelvan@logic.at>
	Fri, 7 Oct 2011 00:41:01 +0000 (02:41 +0200)
committer	Florian Schweikert <kelvan@logic.at>
	Fri, 7 Oct 2011 00:41:01 +0000 (02:41 +0200)
gotovienna/routing.py		patch \| blob \| history
itip		patch \| blob \| history
scotty		patch \| blob \| history