added line parser, station parser and next departure parser to iParser

author Florian Schweikert <kelvan@logic.at>

Sun, 4 Sep 2011 01:39:26 +0000 (03:39 +0200)

committer Florian Schweikert <kelvan@logic.at>

Sun, 4 Sep 2011 01:39:26 +0000 (03:39 +0200)
author Florian Schweikert <kelvan@logic.at>
Sun, 4 Sep 2011 01:39:26 +0000 (03:39 +0200)
committer Florian Schweikert <kelvan@logic.at>
Sun, 4 Sep 2011 01:39:26 +0000 (03:39 +0200)
diff --git a/iTip.py b/iTip.py

index 1b82e83..cbae266 100644 (file)
--- a/iTip.py
+++ b/iTip.py
@@ -1,33 +1,99 @@
  from BeautifulSoup import BeautifulSoup
-import urllib2
+from urllib2 import urlopen
  import settings
+from datetime import time
  
  class iParser:
  
      def __init__(self):
          self._stations = {}
-        self._lines = []
+        self._lines = {}
  
-    def get_stations(self, letter):
-        if not self._stations.has_key(letter):
-            bs = BeautifulSoup(urllib2.urlopen(settings.stations % letter))
-            self._stations[letter] = map(lambda x: x['value'], bs.find('select', {'id': 'letter'}).findAll('option'))
+    def get_stations(self, name):
+        """ Get station by direction
+        {'Directionname': [('Station name', 'url')]}
+        """
+        if not self._stations.has_key(name):
+            st = {}
+            
+            if not self.lines.has_key(name):
+                return None
+            
+            bs = BeautifulSoup(urlopen(self.lines[name]))
+            tables = bs.findAll('table', {'class': 'text_10pix'})
+            for i in range (2):
+                dir = tables[i].div.contents[-1].strip('&nbsp;')
+                
+                sta = []
+                for tr in tables[0].findAll('tr', {'onmouseout': 'obj_unhighlight(this);'}):
+                    if tr.a:
+                        sta.append((tr.a.text, settings.line_overview + tr.a['href']))
+                    else:
+                        sta.append((tr.text.strip('&nbsp;'), None))
+                    
+                st[dir] = sta
+            self._stations[name] = st
  
-        return self._stations[letter]
+        return self._stations[name]
  
-    def get_lines(self):
+    @property
+    def lines(self):
+        """ Dictionary of Line names with url as value
+        """
          if not self._lines:
-            bs = BeautifulSoup(urllib2.urlopen(settings.line_overview))
+            bs = BeautifulSoup(urlopen(settings.line_overview))
              # get tables
              lines = bs.findAll('td', {'class': 'linie'})
-            self._lines = []
              
              for line in lines:
                  if line.a:
-                    print line.text
+                    href = settings.line_overview + line.a['href']
                      if line.text:
-                        self._lines.append((line.text, line.a['href']))
+                        self._lines[line.text] = href
                      elif line.img:
-                        self._lines.append((line.img['alt'], line.a['href']))
+                        self._lines[line.img['alt']] = href
                          
-        return self._lines
\ No newline at end of file
+        return self._lines
+
+    def get_departures(self, url):
+        """ Get list of next departures
+        integer if time until next departure
+        time if time of next departure
+        """
+        
+        #TODO parse line name and direction for station site parsing
+        
+        bs = BeautifulSoup(urlopen(url))
+        result_lines = bs.findAll('table')[-1].findAll('tr')
+        
+        dep = []
+        for tr in result_lines[1:]:
+            th = tr.findAll('th')
+            if len(th) < 2:
+                #TODO replace with logger
+                print "[DEBUG] Unable to find th in:\n%s" % str(tr)
+                continue
+            
+            # parse time
+            time = th[-2].text.split(' ')
+            if len(time) < 2:
+                print 'Invalid time: %s' % time
+                continue
+            
+            time = time[1]
+            
+            if time.isdigit():
+                # if time to next departure in cell convert to int
+                dep.append(int(time))
+            else:
+                # check if time of next departue in cell
+                t = time.strip('&nbsp;').split(':')
+                if len(t) == 2 and all(map(lambda x: x.isdigit(), t)):
+                    t = map(int, t)
+                    dep.append(time(*t))
+                else:
+                    # Unexpected content
+                    #TODO replace with logger
+                    print "[DEBUG] Invalid data:\n%s" % time
+                
+        return dep
\ No newline at end of file
diff --git a/parseCorrection.py b/parseCorrection.py

index be37f5d..7d8756f 100644 (file)
--- a/parseCorrection.py
+++ b/parseCorrection.py
@@ -8,7 +8,7 @@ class ParserError(Exception):
       def __str__(self):
           return repr(self.value)
  
-class Parser:
+class cParser:
  
      def __init__(self, html):
          self.soup = BeautifulSoup(html)
diff --git a/parseHtml.py b/parseHtml.py

index 132f84c..7875490 100644 (file)
--- a/parseHtml.py
+++ b/parseHtml.py
@@ -3,7 +3,6 @@ import urllib2
  from datetime import time, datetime
  from textwrap import wrap
  import settings
-import wlSearch
  
  class ParserError(Exception):
       def __init__(self, value='', code=0):
@@ -14,14 +13,11 @@ class ParserError(Exception):
           return repr(self.value)
  
  class Parser:
-    STATE_ERROR = -1
-    STATE_START, STATE_SEARCH, STATE_RESULT = range(3)
  
      def __init__(self, html):
          self.soup = BeautifulSoup(html)
          self._overview = None
          self._details = None
-        self._current_state = 0
  
      @classmethod
      def get_tdtext(cls, x, cl):
@@ -153,28 +149,4 @@ class Parser:
  
      @property
      def request_state(self):
-        return self._current_state
-
-
-class iParser:
-
-    def __init__(self):
-        self._stations = {}
-        self._lines = []
-
-    def get_stations(self, letter):
-        if not self._stations.has_key(letter):
-            bs = BeautifulSoup(urllib2.urlopen(settings.stations % letter))
-            self._stations[letter] = map(lambda x: x['value'], bs.find('select', {'id': 'letter'}).findAll('option'))
-
-        return self._stations[letter]
-
-    def get_lines(self):
-        if not self._lines:
-            bs = BeautifulSoup(urllib2.urlopen(settings.line_overview))
-            # get tables
-            lines = bs.findAll('table', {'class': 'linie'})
-            # cut line parameter out of href
-            self._lines = map(lambda x: map(lambda x: x['href'][x['href'].find('=') + 1:], x.findAll('a')), lines)
-
-        return self._lines
+        return self._current_state
\ No newline at end of file
diff --git a/tests.py b/tests.py

deleted file mode 100644 (file)

index c382338..0000000
--- a/tests.py
+++ /dev/null
@@ -1,66 +0,0 @@
-import unittest
-from wlSearch import Search
-from datetime import datetime
-from parseHtml import Parser, iParser, ParserError
-from BeautifulSoup import BeautifulSoup
-
-origin = 'Karlsplatz'
-destination = 'Handelskai'
-dtime = datetime.now()
-dtime = dtime.replace(hour=15, minute=0)
-search = Search(origin, destination)
-bs = BeautifulSoup(search.get_html(dtime))
-
-class FetchTest(unittest.TestCase):
-
-    def test_overview(self):
-        self.assertEquals(1, len(bs.findAll('table', {'id': 'tbl_fahrten'})))
-
-    def test_details(self):
-        self.assertTrue(len(bs.findAll('div', {'class': 'data_table tourdetail'})) > 0)
-
-origin = 'Zwicklgasse 1'
-destination = 'Himmelstrasse 1'
-ot = dt = 'address'
-s = Search(origin, destination, origin_type=ot, destination_type=dt)
-p = Parser(s.get_html(dtime))
-
-origin = 'Foobar Strasse 123'
-destination = 'Bazgasse 321'
-s = Search(origin, destination, origin_type=ot, destination_type=dt)
-invalid_parser = Parser(s.get_html(dtime))
-
-
-class ParseTest(unittest.TestCase):
-
-    def test_overview_shouldFindMultipleItems(self):
-        # TODO Replace with assertGreater in new python version
-        self.assertTrue(len(p.overview) > 1)
-
-    def test_detail_shouldFindMultipleItems(self):
-        # TODO Replace with assertGreater in new python version
-        self.assertTrue(len(p.details) > 1)
-
-    def test_detail_shouldFindMultipleStations(self):
-        # TODO Replace with assertGreater in new python version
-        self.assertTrue(len(p.details[0]) > 1)
-
-    def test_parser_overviewAndDetailsShouldHaveSameLength(self):
-        self.assertEqual(len(p.details), len(p.overview))
-
-    def test_parser_shouldRaiseError(self):
-        # TODO Replace with expectedFailure decorator in new python version
-        self.assertRaises(ParserError, invalid_parser._parse_overview)
-
-    def test_parser_shouldFindMoreThanOneChange(self):
-        self.assertTrue(p.overview[0]['change'] > 0)
-
-    def test_parser_shouldFindPriceGreaterZero(self):
-        self.assertTrue(p.overview[0]['price'] > 0.0)
-
-    def test_parser_shouldFindDate(self):
-        self.assertTrue(p.overview[0]['date'] == dtime.date())
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/tests/__init__.py b/tests/__init__.py

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/tests/realtime.py b/tests/realtime.py

new file mode 100644 (file)

index 0000000..c6a07aa
--- /dev/null
+++ b/tests/realtime.py
@@ -0,0 +1,22 @@
+import unittest
+import sys
+import os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
+from iTip import iParser
+
+parser = iParser()
+
+class ParseTest(unittest.TestCase):
+    
+    def test_lines(self):
+        lines = parser.lines
+        self.assertTrue(type(lines) == dict)
+        self.assertTrue(lines)
+    
+    def test_stations(self):
+        lines = parser.lines
+        
+        s = []
+        for line in lines:
+            s.append(parser.get_stations(line[0]))
+        self.assertTrue(s)
\ No newline at end of file
diff --git a/tests/scotty.py b/tests/scotty.py

new file mode 100644 (file)

index 0000000..c382338
--- /dev/null
+++ b/tests/scotty.py
@@ -0,0 +1,66 @@
+import unittest
+from wlSearch import Search
+from datetime import datetime
+from parseHtml import Parser, iParser, ParserError
+from BeautifulSoup import BeautifulSoup
+
+origin = 'Karlsplatz'
+destination = 'Handelskai'
+dtime = datetime.now()
+dtime = dtime.replace(hour=15, minute=0)
+search = Search(origin, destination)
+bs = BeautifulSoup(search.get_html(dtime))
+
+class FetchTest(unittest.TestCase):
+
+    def test_overview(self):
+        self.assertEquals(1, len(bs.findAll('table', {'id': 'tbl_fahrten'})))
+
+    def test_details(self):
+        self.assertTrue(len(bs.findAll('div', {'class': 'data_table tourdetail'})) > 0)
+
+origin = 'Zwicklgasse 1'
+destination = 'Himmelstrasse 1'
+ot = dt = 'address'
+s = Search(origin, destination, origin_type=ot, destination_type=dt)
+p = Parser(s.get_html(dtime))
+
+origin = 'Foobar Strasse 123'
+destination = 'Bazgasse 321'
+s = Search(origin, destination, origin_type=ot, destination_type=dt)
+invalid_parser = Parser(s.get_html(dtime))
+
+
+class ParseTest(unittest.TestCase):
+
+    def test_overview_shouldFindMultipleItems(self):
+        # TODO Replace with assertGreater in new python version
+        self.assertTrue(len(p.overview) > 1)
+
+    def test_detail_shouldFindMultipleItems(self):
+        # TODO Replace with assertGreater in new python version
+        self.assertTrue(len(p.details) > 1)
+
+    def test_detail_shouldFindMultipleStations(self):
+        # TODO Replace with assertGreater in new python version
+        self.assertTrue(len(p.details[0]) > 1)
+
+    def test_parser_overviewAndDetailsShouldHaveSameLength(self):
+        self.assertEqual(len(p.details), len(p.overview))
+
+    def test_parser_shouldRaiseError(self):
+        # TODO Replace with expectedFailure decorator in new python version
+        self.assertRaises(ParserError, invalid_parser._parse_overview)
+
+    def test_parser_shouldFindMoreThanOneChange(self):
+        self.assertTrue(p.overview[0]['change'] > 0)
+
+    def test_parser_shouldFindPriceGreaterZero(self):
+        self.assertTrue(p.overview[0]['price'] > 0.0)
+
+    def test_parser_shouldFindDate(self):
+        self.assertTrue(p.overview[0]['date'] == dtime.date())
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/wlSearch.py b/wlSearch.py

index dfc5aa0..ac051b4 100644 (file)
--- a/wlSearch.py
+++ b/wlSearch.py
@@ -8,9 +8,11 @@ import webbrowser
  import urllib2
  
  from parseHtml import Parser
+import parseCorrection
  
  from PySide.QtCore import Qt
  from PySide.QtDeclarative import QDeclarativeView
+from BeautifulSoup import BeautifulSoup
  
  def QMLModel(overview, details):
      # Mapping from the "overview" data structure to a "plain" data
@@ -65,7 +67,16 @@ class Search:
              dtime = datetime.now()
          #FIXME replace with logger
          print "open_qml (%s:%s:%s)" % tuple(dtime.timetuple())[3:6]
-        p = Parser(self.get_html(dtime))
+        html = self.get_html(dtime)
+        if BeautifulSoup(html).find('form', {'id': 'form_fahrplanauskunft'}):
+            cor = parseCorrection.Parser(html)
+            if p.origins:
+                # TODO: Show selection
+                pass
+            if p.destinations:
+                # TODO: Show selection
+                pass
+        p = Parser(html)
          self.qml_model = QMLModel(p.overview, p.details)
          self.view = QDeclarativeView(self.parent)
          self.view.setWindowTitle('Search results')
author	Florian Schweikert <kelvan@logic.at>
	Sun, 4 Sep 2011 01:39:26 +0000 (03:39 +0200)
committer	Florian Schweikert <kelvan@logic.at>
	Sun, 4 Sep 2011 01:39:26 +0000 (03:39 +0200)
iTip.py		patch \| blob \| history
parseCorrection.py		patch \| blob \| history
parseHtml.py		patch \| blob \| history
tests.py	[deleted file]	patch \| blob \| history
tests/__init__.py	[new file with mode: 0644]	patch \| blob
tests/realtime.py	[new file with mode: 0644]	patch \| blob
tests/scotty.py	[new file with mode: 0644]	patch \| blob
wlSearch.py		patch \| blob \| history