added parser for routing
authorFlorian Schweikert <kelvan@logic.at>
Mon, 5 Sep 2011 14:01:19 +0000 (16:01 +0200)
committerFlorian Schweikert <kelvan@logic.at>
Mon, 5 Sep 2011 14:01:19 +0000 (16:01 +0200)
README
scotty.py [new file with mode: 0644]
settings.py
tests/routing.py [new file with mode: 0644]
tests/scotty.py [deleted file]

diff --git a/README b/README
index d1f6483..e225d65 100644 (file)
--- a/README
+++ b/README
@@ -1,2 +1,6 @@
 Search for connection on wienerlinien.at
 Using qml gui to show results
+
+Show next bus/tram/underground departures.
+
+This program ist unofficial.
diff --git a/scotty.py b/scotty.py
new file mode 100644 (file)
index 0000000..6a6f564
--- /dev/null
+++ b/scotty.py
@@ -0,0 +1,209 @@
+from BeautifulSoup import BeautifulSoup, NavigableString
+from urllib2 import urlopen
+from urllib import urlencode
+import settings
+from datetime import datetime, time
+from textwrap import wrap
+
+POSITION_TYPES = ('stop', 'address', 'poi')
+
+class ParserException(Exception):
+    
+    def __init__(self, msg = 'Parser error'):
+        self.message = msg
+
+class PageType:
+    UNKNOWN, CORRECTION, RESULT = range(3)
+    
+
+def search(origin_tuple, destination_tuple, dtime=None):
+    """ build route request
+    returns html result (as urllib response)
+    """
+    if not dtime:
+        dtime = datetime.now()
+    
+    origin, origin_type = origin_tuple
+    destination, destination_type = destination_tuple
+    if not origin_type in POSITION_TYPES or\
+        not destination_type in POSITION_TYPES:
+        raise ParserException('Invalid position type')
+        
+    post = settings.search_post
+    post['name_origin'] = origin
+    post['type_origin'] = origin_type
+    post['name_destination'] = destination
+    post['type_destination'] = destination_type
+    post['itdDateDayMonthYear'] = dtime.strftime('%d.%m.%Y')
+    post['itdTime'] = dtime.strftime('%H:%M')
+    params = urlencode(post)
+    return urlopen('%s?%s' % (settings.action, params))
+
+
+class sParser:
+    """ Parser for search response
+    """
+
+    def __init__(self, html):
+        self.bs = BeautifulSoup(html)
+    
+    def check_page(self):
+        if self.bs.find('form', {'id': 'form_efaresults'}):
+            return PageType.RESULT
+        
+        if self.bs.find('div', {'class':'form_error'}):
+            return PageType.CORRECTION
+        
+        return PageType.UNKNOWN
+    
+    def get_correction(self):
+        nlo = self.bs.find('select', {'id': 'nameList_origin'})
+        nld = self.bs.find('select', {'id': 'nameList_destination'})
+        
+        if not nlo or not nld:
+            raise ParserError('Unable to parse html')
+        
+        origin = nlo.findAll('option')
+        destination = nld.findAll('option')
+        
+        if not origin:
+            origin = []
+        if not destination:
+            destination = []
+        
+        return (origin, destination)
+    
+    def get_result(self):
+        return rParser(str(self.bs))
+        
+        
+        
+class rParser:
+    """ Parser for routing results
+    """
+
+    def __init__(self, html):
+        self.soup = BeautifulSoup(html)
+        self._overview = None
+        self._details = None
+
+    @classmethod
+    def get_tdtext(cls, x, cl):
+            return x.find('td', {'class': cl}).text
+    
+    @classmethod
+    def get_change(cls, x):
+        y = rParser.get_tdtext(x, 'col_change')
+        if y:
+            return int(y)
+        else:
+            return 0
+
+    @classmethod
+    def get_price(cls, x):
+        y = rParser.get_tdtext(x, 'col_price')
+        if y.find(','):
+            return float(y.replace(',', '.'))
+        else:
+            return 0.0
+
+    @classmethod
+    def get_date(cls, x):
+        y = rParser.get_tdtext(x, 'col_date')
+        if y:
+            return datetime.strptime(y, '%d.%m.%Y').date()
+        else:
+            return None
+        
+    @classmethod
+    def get_time(cls, x):
+        y = rParser.get_tdtext(x, 'col_time')
+        if y:
+            if (y.find("-") > 0):
+                return map(lambda z: time(*map(int, z.split(':'))), y.split('-'))
+            else:
+                return map(lambda z: time(*map(int, z.split(':'))), wrap(y, 5))
+        else:
+            return []
+        
+    @classmethod
+    def get_duration(cls, x):
+        y = rParser.get_tdtext(x, 'col_duration')
+        if y:
+            return time(*map(int, y.split(":")))
+        else:
+            return None
+
+    def __iter__(self):
+        for detail in self.details():
+            yield detail
+
+    def _parse_details(self):
+        tours = self.soup.findAll('div', {'class': 'data_table tourdetail'})
+
+        trips = map(lambda x: map(lambda y: {
+                        'time': rParser.get_time(y),
+                        'station': map(lambda z: z[2:].strip(),
+                                       filter(lambda x: type(x) == NavigableString, y.find('td', {'class': 'col_station'}).contents)), # filter non NaviStrings
+                        'info': map(lambda x: x.strip(),
+                                    filter(lambda z: type(z) == NavigableString, y.find('td', {'class': 'col_info'}).contents)),
+                    }, x.find('tbody').findAll('tr')),
+                    tours) # all routes
+        return trips
+
+    @property
+    def details(self):
+        """returns list of trip details
+        [ [ { 'time': [datetime.time, datetime.time] if time else [],
+              'station': [u'start', u'end'] if station else [],
+              'info': [u'start station' if station else u'details for walking', u'end station' if station else u'walking duration']
+            }, ... # next trip step 
+          ], ... # next trip possibility
+        ]
+        """
+        if not self._details:
+            self._details = self._parse_details()
+
+        return self._details
+
+    def _parse_overview(self):
+
+        # get overview table
+        table = self.soup.find('table', {'id': 'tbl_fahrten'})
+
+        # check if there is an overview table
+        if table and table.findAll('tr'):
+            # get rows
+            rows = table.findAll('tr')[1:] # cut off headline
+            
+            overview = map(lambda x: {
+                               'date': rParser.get_date(x),
+                               'time': rParser.get_time(x),
+                               'duration': rParser.get_duration(x), # grab duration
+                               'change': rParser.get_change(x), 
+                               'price': rParser.get_price(x),
+                           },
+                           rows)
+        else:
+            raise ParserError('Unable to parse details')
+
+        return overview
+
+    @property
+    def overview(self):
+        """dict containing
+        date: datetime
+        time: [time, time]
+        duration: time
+        change: int
+        price: float
+        """
+        if not self._overview:
+            try:
+                self._overview = self._parse_overview()
+            except AttributeError:
+                f = open('DEBUG', 'w')
+                f.write(str(self.soup))
+                f.close()
+
+        return self._overview
index 2b86135..81b0b96 100644 (file)
@@ -9,3 +9,35 @@ hist_file = path.join(folder, '.wl_history')
 
 line_overview = 'http://www.wienerlinien.at/itip/linienwahl/'
 stations = 'http://www.wienerlinien.at/itip/haltestelle?letter=%s'
+
+search_post = {'language': 'de',
+            'sessionID': 0,
+            'requestID': 0,
+            'execInst': 'normal',
+            'command': '',
+            'anySigWhenPerfectNoOtherMatches': 1,
+            'itdLPxx_locationServerActive': '',
+            'locationServerActive': 0,
+            'typeInfo_origin': 'invalid',
+            'placeState_origin': 'empty',
+            'placeInfo_origin': 'invalid',
+            'place_origin': 'Wien', # overwrite if necessary
+            'type_origin': None, # stop/address/poi
+            'nameState_origin': 'empty',
+            'nameInfo_origin': 'invalid',
+            'anyType_origin': '',
+            'name_origin': None,
+            'typeInfo_destination': 'invalid',
+            'placeState_destination': 'empty',
+            'placeInfo_destination': 'invalid',
+            'place_destination': 'Wien', # overwrite if necessary
+            'type_destination': None, # stop/address/poi
+            'nameState_destination': 'empty',
+            'nameInfo_destination': 'invalid',
+            'anyType_destination': '', # maybe nice
+            'name_destination': None,
+            'itdTripDateTimeDepArr': 'dep',
+            'itdDateDayMonthYear': None, # DD.MM.YYYY
+            'itdTime': None, # HH:MM
+            'submitbutton': 'SUCHEN'
+        }
\ No newline at end of file
diff --git a/tests/routing.py b/tests/routing.py
new file mode 100644 (file)
index 0000000..c382338
--- /dev/null
@@ -0,0 +1,66 @@
+import unittest
+from wlSearch import Search
+from datetime import datetime
+from parseHtml import Parser, iParser, ParserError
+from BeautifulSoup import BeautifulSoup
+
+origin = 'Karlsplatz'
+destination = 'Handelskai'
+dtime = datetime.now()
+dtime = dtime.replace(hour=15, minute=0)
+search = Search(origin, destination)
+bs = BeautifulSoup(search.get_html(dtime))
+
+class FetchTest(unittest.TestCase):
+
+    def test_overview(self):
+        self.assertEquals(1, len(bs.findAll('table', {'id': 'tbl_fahrten'})))
+
+    def test_details(self):
+        self.assertTrue(len(bs.findAll('div', {'class': 'data_table tourdetail'})) > 0)
+
+origin = 'Zwicklgasse 1'
+destination = 'Himmelstrasse 1'
+ot = dt = 'address'
+s = Search(origin, destination, origin_type=ot, destination_type=dt)
+p = Parser(s.get_html(dtime))
+
+origin = 'Foobar Strasse 123'
+destination = 'Bazgasse 321'
+s = Search(origin, destination, origin_type=ot, destination_type=dt)
+invalid_parser = Parser(s.get_html(dtime))
+
+
+class ParseTest(unittest.TestCase):
+
+    def test_overview_shouldFindMultipleItems(self):
+        # TODO Replace with assertGreater in new python version
+        self.assertTrue(len(p.overview) > 1)
+
+    def test_detail_shouldFindMultipleItems(self):
+        # TODO Replace with assertGreater in new python version
+        self.assertTrue(len(p.details) > 1)
+
+    def test_detail_shouldFindMultipleStations(self):
+        # TODO Replace with assertGreater in new python version
+        self.assertTrue(len(p.details[0]) > 1)
+
+    def test_parser_overviewAndDetailsShouldHaveSameLength(self):
+        self.assertEqual(len(p.details), len(p.overview))
+
+    def test_parser_shouldRaiseError(self):
+        # TODO Replace with expectedFailure decorator in new python version
+        self.assertRaises(ParserError, invalid_parser._parse_overview)
+
+    def test_parser_shouldFindMoreThanOneChange(self):
+        self.assertTrue(p.overview[0]['change'] > 0)
+
+    def test_parser_shouldFindPriceGreaterZero(self):
+        self.assertTrue(p.overview[0]['price'] > 0.0)
+
+    def test_parser_shouldFindDate(self):
+        self.assertTrue(p.overview[0]['date'] == dtime.date())
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/scotty.py b/tests/scotty.py
deleted file mode 100644 (file)
index c382338..0000000
+++ /dev/null
@@ -1,66 +0,0 @@
-import unittest
-from wlSearch import Search
-from datetime import datetime
-from parseHtml import Parser, iParser, ParserError
-from BeautifulSoup import BeautifulSoup
-
-origin = 'Karlsplatz'
-destination = 'Handelskai'
-dtime = datetime.now()
-dtime = dtime.replace(hour=15, minute=0)
-search = Search(origin, destination)
-bs = BeautifulSoup(search.get_html(dtime))
-
-class FetchTest(unittest.TestCase):
-
-    def test_overview(self):
-        self.assertEquals(1, len(bs.findAll('table', {'id': 'tbl_fahrten'})))
-
-    def test_details(self):
-        self.assertTrue(len(bs.findAll('div', {'class': 'data_table tourdetail'})) > 0)
-
-origin = 'Zwicklgasse 1'
-destination = 'Himmelstrasse 1'
-ot = dt = 'address'
-s = Search(origin, destination, origin_type=ot, destination_type=dt)
-p = Parser(s.get_html(dtime))
-
-origin = 'Foobar Strasse 123'
-destination = 'Bazgasse 321'
-s = Search(origin, destination, origin_type=ot, destination_type=dt)
-invalid_parser = Parser(s.get_html(dtime))
-
-
-class ParseTest(unittest.TestCase):
-
-    def test_overview_shouldFindMultipleItems(self):
-        # TODO Replace with assertGreater in new python version
-        self.assertTrue(len(p.overview) > 1)
-
-    def test_detail_shouldFindMultipleItems(self):
-        # TODO Replace with assertGreater in new python version
-        self.assertTrue(len(p.details) > 1)
-
-    def test_detail_shouldFindMultipleStations(self):
-        # TODO Replace with assertGreater in new python version
-        self.assertTrue(len(p.details[0]) > 1)
-
-    def test_parser_overviewAndDetailsShouldHaveSameLength(self):
-        self.assertEqual(len(p.details), len(p.overview))
-
-    def test_parser_shouldRaiseError(self):
-        # TODO Replace with expectedFailure decorator in new python version
-        self.assertRaises(ParserError, invalid_parser._parse_overview)
-
-    def test_parser_shouldFindMoreThanOneChange(self):
-        self.assertTrue(p.overview[0]['change'] > 0)
-
-    def test_parser_shouldFindPriceGreaterZero(self):
-        self.assertTrue(p.overview[0]['price'] > 0.0)
-
-    def test_parser_shouldFindDate(self):
-        self.assertTrue(p.overview[0]['date'] == dtime.date())
-
-
-if __name__ == '__main__':
-    unittest.main()