added line parser, station parser and next departure parser to iParser
authorFlorian Schweikert <kelvan@logic.at>
Sun, 4 Sep 2011 01:39:26 +0000 (03:39 +0200)
committerFlorian Schweikert <kelvan@logic.at>
Sun, 4 Sep 2011 01:39:26 +0000 (03:39 +0200)
class
added two testcases

iTip.py
parseCorrection.py
parseHtml.py
tests.py [deleted file]
tests/__init__.py [new file with mode: 0644]
tests/realtime.py [new file with mode: 0644]
tests/scotty.py [new file with mode: 0644]
wlSearch.py

diff --git a/iTip.py b/iTip.py
index 1b82e83..cbae266 100644 (file)
--- a/iTip.py
+++ b/iTip.py
@@ -1,33 +1,99 @@
 from BeautifulSoup import BeautifulSoup
-import urllib2
+from urllib2 import urlopen
 import settings
+from datetime import time
 
 class iParser:
 
     def __init__(self):
         self._stations = {}
-        self._lines = []
+        self._lines = {}
 
-    def get_stations(self, letter):
-        if not self._stations.has_key(letter):
-            bs = BeautifulSoup(urllib2.urlopen(settings.stations % letter))
-            self._stations[letter] = map(lambda x: x['value'], bs.find('select', {'id': 'letter'}).findAll('option'))
+    def get_stations(self, name):
+        """ Get station by direction
+        {'Directionname': [('Station name', 'url')]}
+        """
+        if not self._stations.has_key(name):
+            st = {}
+            
+            if not self.lines.has_key(name):
+                return None
+            
+            bs = BeautifulSoup(urlopen(self.lines[name]))
+            tables = bs.findAll('table', {'class': 'text_10pix'})
+            for i in range (2):
+                dir = tables[i].div.contents[-1].strip('&nbsp;')
+                
+                sta = []
+                for tr in tables[0].findAll('tr', {'onmouseout': 'obj_unhighlight(this);'}):
+                    if tr.a:
+                        sta.append((tr.a.text, settings.line_overview + tr.a['href']))
+                    else:
+                        sta.append((tr.text.strip('&nbsp;'), None))
+                    
+                st[dir] = sta
+            self._stations[name] = st
 
-        return self._stations[letter]
+        return self._stations[name]
 
-    def get_lines(self):
+    @property
+    def lines(self):
+        """ Dictionary of Line names with url as value
+        """
         if not self._lines:
-            bs = BeautifulSoup(urllib2.urlopen(settings.line_overview))
+            bs = BeautifulSoup(urlopen(settings.line_overview))
             # get tables
             lines = bs.findAll('td', {'class': 'linie'})
-            self._lines = []
             
             for line in lines:
                 if line.a:
-                    print line.text
+                    href = settings.line_overview + line.a['href']
                     if line.text:
-                        self._lines.append((line.text, line.a['href']))
+                        self._lines[line.text] = href
                     elif line.img:
-                        self._lines.append((line.img['alt'], line.a['href']))
+                        self._lines[line.img['alt']] = href
                         
-        return self._lines
\ No newline at end of file
+        return self._lines
+
+    def get_departures(self, url):
+        """ Get list of next departures
+        integer if time until next departure
+        time if time of next departure
+        """
+        
+        #TODO parse line name and direction for station site parsing
+        
+        bs = BeautifulSoup(urlopen(url))
+        result_lines = bs.findAll('table')[-1].findAll('tr')
+        
+        dep = []
+        for tr in result_lines[1:]:
+            th = tr.findAll('th')
+            if len(th) < 2:
+                #TODO replace with logger
+                print "[DEBUG] Unable to find th in:\n%s" % str(tr)
+                continue
+            
+            # parse time
+            time = th[-2].text.split(' ')
+            if len(time) < 2:
+                print 'Invalid time: %s' % time
+                continue
+            
+            time = time[1]
+            
+            if time.isdigit():
+                # if time to next departure in cell convert to int
+                dep.append(int(time))
+            else:
+                # check if time of next departue in cell
+                t = time.strip('&nbsp;').split(':')
+                if len(t) == 2 and all(map(lambda x: x.isdigit(), t)):
+                    t = map(int, t)
+                    dep.append(time(*t))
+                else:
+                    # Unexpected content
+                    #TODO replace with logger
+                    print "[DEBUG] Invalid data:\n%s" % time
+                
+        return dep
\ No newline at end of file
index be37f5d..7d8756f 100644 (file)
@@ -8,7 +8,7 @@ class ParserError(Exception):
      def __str__(self):
          return repr(self.value)
 
-class Parser:
+class cParser:
 
     def __init__(self, html):
         self.soup = BeautifulSoup(html)
index 132f84c..7875490 100644 (file)
@@ -3,7 +3,6 @@ import urllib2
 from datetime import time, datetime
 from textwrap import wrap
 import settings
-import wlSearch
 
 class ParserError(Exception):
      def __init__(self, value='', code=0):
@@ -14,14 +13,11 @@ class ParserError(Exception):
          return repr(self.value)
 
 class Parser:
-    STATE_ERROR = -1
-    STATE_START, STATE_SEARCH, STATE_RESULT = range(3)
 
     def __init__(self, html):
         self.soup = BeautifulSoup(html)
         self._overview = None
         self._details = None
-        self._current_state = 0
 
     @classmethod
     def get_tdtext(cls, x, cl):
@@ -153,28 +149,4 @@ class Parser:
 
     @property
     def request_state(self):
-        return self._current_state
-
-
-class iParser:
-
-    def __init__(self):
-        self._stations = {}
-        self._lines = []
-
-    def get_stations(self, letter):
-        if not self._stations.has_key(letter):
-            bs = BeautifulSoup(urllib2.urlopen(settings.stations % letter))
-            self._stations[letter] = map(lambda x: x['value'], bs.find('select', {'id': 'letter'}).findAll('option'))
-
-        return self._stations[letter]
-
-    def get_lines(self):
-        if not self._lines:
-            bs = BeautifulSoup(urllib2.urlopen(settings.line_overview))
-            # get tables
-            lines = bs.findAll('table', {'class': 'linie'})
-            # cut line parameter out of href
-            self._lines = map(lambda x: map(lambda x: x['href'][x['href'].find('=') + 1:], x.findAll('a')), lines)
-
-        return self._lines
+        return self._current_state
\ No newline at end of file
diff --git a/tests.py b/tests.py
deleted file mode 100644 (file)
index c382338..0000000
--- a/tests.py
+++ /dev/null
@@ -1,66 +0,0 @@
-import unittest
-from wlSearch import Search
-from datetime import datetime
-from parseHtml import Parser, iParser, ParserError
-from BeautifulSoup import BeautifulSoup
-
-origin = 'Karlsplatz'
-destination = 'Handelskai'
-dtime = datetime.now()
-dtime = dtime.replace(hour=15, minute=0)
-search = Search(origin, destination)
-bs = BeautifulSoup(search.get_html(dtime))
-
-class FetchTest(unittest.TestCase):
-
-    def test_overview(self):
-        self.assertEquals(1, len(bs.findAll('table', {'id': 'tbl_fahrten'})))
-
-    def test_details(self):
-        self.assertTrue(len(bs.findAll('div', {'class': 'data_table tourdetail'})) > 0)
-
-origin = 'Zwicklgasse 1'
-destination = 'Himmelstrasse 1'
-ot = dt = 'address'
-s = Search(origin, destination, origin_type=ot, destination_type=dt)
-p = Parser(s.get_html(dtime))
-
-origin = 'Foobar Strasse 123'
-destination = 'Bazgasse 321'
-s = Search(origin, destination, origin_type=ot, destination_type=dt)
-invalid_parser = Parser(s.get_html(dtime))
-
-
-class ParseTest(unittest.TestCase):
-
-    def test_overview_shouldFindMultipleItems(self):
-        # TODO Replace with assertGreater in new python version
-        self.assertTrue(len(p.overview) > 1)
-
-    def test_detail_shouldFindMultipleItems(self):
-        # TODO Replace with assertGreater in new python version
-        self.assertTrue(len(p.details) > 1)
-
-    def test_detail_shouldFindMultipleStations(self):
-        # TODO Replace with assertGreater in new python version
-        self.assertTrue(len(p.details[0]) > 1)
-
-    def test_parser_overviewAndDetailsShouldHaveSameLength(self):
-        self.assertEqual(len(p.details), len(p.overview))
-
-    def test_parser_shouldRaiseError(self):
-        # TODO Replace with expectedFailure decorator in new python version
-        self.assertRaises(ParserError, invalid_parser._parse_overview)
-
-    def test_parser_shouldFindMoreThanOneChange(self):
-        self.assertTrue(p.overview[0]['change'] > 0)
-
-    def test_parser_shouldFindPriceGreaterZero(self):
-        self.assertTrue(p.overview[0]['price'] > 0.0)
-
-    def test_parser_shouldFindDate(self):
-        self.assertTrue(p.overview[0]['date'] == dtime.date())
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/tests/realtime.py b/tests/realtime.py
new file mode 100644 (file)
index 0000000..c6a07aa
--- /dev/null
@@ -0,0 +1,22 @@
+import unittest
+import sys
+import os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
+from iTip import iParser
+
+parser = iParser()
+
+class ParseTest(unittest.TestCase):
+    
+    def test_lines(self):
+        lines = parser.lines
+        self.assertTrue(type(lines) == dict)
+        self.assertTrue(lines)
+    
+    def test_stations(self):
+        lines = parser.lines
+        
+        s = []
+        for line in lines:
+            s.append(parser.get_stations(line[0]))
+        self.assertTrue(s)
\ No newline at end of file
diff --git a/tests/scotty.py b/tests/scotty.py
new file mode 100644 (file)
index 0000000..c382338
--- /dev/null
@@ -0,0 +1,66 @@
+import unittest
+from wlSearch import Search
+from datetime import datetime
+from parseHtml import Parser, iParser, ParserError
+from BeautifulSoup import BeautifulSoup
+
+origin = 'Karlsplatz'
+destination = 'Handelskai'
+dtime = datetime.now()
+dtime = dtime.replace(hour=15, minute=0)
+search = Search(origin, destination)
+bs = BeautifulSoup(search.get_html(dtime))
+
+class FetchTest(unittest.TestCase):
+
+    def test_overview(self):
+        self.assertEquals(1, len(bs.findAll('table', {'id': 'tbl_fahrten'})))
+
+    def test_details(self):
+        self.assertTrue(len(bs.findAll('div', {'class': 'data_table tourdetail'})) > 0)
+
+origin = 'Zwicklgasse 1'
+destination = 'Himmelstrasse 1'
+ot = dt = 'address'
+s = Search(origin, destination, origin_type=ot, destination_type=dt)
+p = Parser(s.get_html(dtime))
+
+origin = 'Foobar Strasse 123'
+destination = 'Bazgasse 321'
+s = Search(origin, destination, origin_type=ot, destination_type=dt)
+invalid_parser = Parser(s.get_html(dtime))
+
+
+class ParseTest(unittest.TestCase):
+
+    def test_overview_shouldFindMultipleItems(self):
+        # TODO Replace with assertGreater in new python version
+        self.assertTrue(len(p.overview) > 1)
+
+    def test_detail_shouldFindMultipleItems(self):
+        # TODO Replace with assertGreater in new python version
+        self.assertTrue(len(p.details) > 1)
+
+    def test_detail_shouldFindMultipleStations(self):
+        # TODO Replace with assertGreater in new python version
+        self.assertTrue(len(p.details[0]) > 1)
+
+    def test_parser_overviewAndDetailsShouldHaveSameLength(self):
+        self.assertEqual(len(p.details), len(p.overview))
+
+    def test_parser_shouldRaiseError(self):
+        # TODO Replace with expectedFailure decorator in new python version
+        self.assertRaises(ParserError, invalid_parser._parse_overview)
+
+    def test_parser_shouldFindMoreThanOneChange(self):
+        self.assertTrue(p.overview[0]['change'] > 0)
+
+    def test_parser_shouldFindPriceGreaterZero(self):
+        self.assertTrue(p.overview[0]['price'] > 0.0)
+
+    def test_parser_shouldFindDate(self):
+        self.assertTrue(p.overview[0]['date'] == dtime.date())
+
+
+if __name__ == '__main__':
+    unittest.main()
index dfc5aa0..ac051b4 100644 (file)
@@ -8,9 +8,11 @@ import webbrowser
 import urllib2
 
 from parseHtml import Parser
+import parseCorrection
 
 from PySide.QtCore import Qt
 from PySide.QtDeclarative import QDeclarativeView
+from BeautifulSoup import BeautifulSoup
 
 def QMLModel(overview, details):
     # Mapping from the "overview" data structure to a "plain" data
@@ -65,7 +67,16 @@ class Search:
             dtime = datetime.now()
         #FIXME replace with logger
         print "open_qml (%s:%s:%s)" % tuple(dtime.timetuple())[3:6]
-        p = Parser(self.get_html(dtime))
+        html = self.get_html(dtime)
+        if BeautifulSoup(html).find('form', {'id': 'form_fahrplanauskunft'}):
+            cor = parseCorrection.Parser(html)
+            if p.origins:
+                # TODO: Show selection
+                pass
+            if p.destinations:
+                # TODO: Show selection
+                pass
+        p = Parser(html)
         self.qml_model = QMLModel(p.overview, p.details)
         self.view = QDeclarativeView(self.parent)
         self.view.setWindowTitle('Search results')