from BeautifulSoup import BeautifulSoup
-import urllib2
+from urllib2 import urlopen
import settings
+from datetime import time
class iParser:
def __init__(self):
self._stations = {}
- self._lines = []
+ self._lines = {}
- def get_stations(self, letter):
- if not self._stations.has_key(letter):
- bs = BeautifulSoup(urllib2.urlopen(settings.stations % letter))
- self._stations[letter] = map(lambda x: x['value'], bs.find('select', {'id': 'letter'}).findAll('option'))
+ def get_stations(self, name):
+ """ Get station by direction
+ {'Directionname': [('Station name', 'url')]}
+ """
+ if not self._stations.has_key(name):
+ st = {}
+
+ if not self.lines.has_key(name):
+ return None
+
+ bs = BeautifulSoup(urlopen(self.lines[name]))
+ tables = bs.findAll('table', {'class': 'text_10pix'})
+ for i in range (2):
+ dir = tables[i].div.contents[-1].strip(' ')
+
+ sta = []
+ for tr in tables[0].findAll('tr', {'onmouseout': 'obj_unhighlight(this);'}):
+ if tr.a:
+ sta.append((tr.a.text, settings.line_overview + tr.a['href']))
+ else:
+ sta.append((tr.text.strip(' '), None))
+
+ st[dir] = sta
+ self._stations[name] = st
- return self._stations[letter]
+ return self._stations[name]
- def get_lines(self):
+ @property
+ def lines(self):
+ """ Dictionary of Line names with url as value
+ """
if not self._lines:
- bs = BeautifulSoup(urllib2.urlopen(settings.line_overview))
+ bs = BeautifulSoup(urlopen(settings.line_overview))
# get tables
lines = bs.findAll('td', {'class': 'linie'})
- self._lines = []
for line in lines:
if line.a:
- print line.text
+ href = settings.line_overview + line.a['href']
if line.text:
- self._lines.append((line.text, line.a['href']))
+ self._lines[line.text] = href
elif line.img:
- self._lines.append((line.img['alt'], line.a['href']))
+ self._lines[line.img['alt']] = href
- return self._lines
\ No newline at end of file
+ return self._lines
+
+ def get_departures(self, url):
+ """ Get list of next departures
+ integer if time until next departure
+ time if time of next departure
+ """
+
+ #TODO parse line name and direction for station site parsing
+
+ bs = BeautifulSoup(urlopen(url))
+ result_lines = bs.findAll('table')[-1].findAll('tr')
+
+ dep = []
+ for tr in result_lines[1:]:
+ th = tr.findAll('th')
+ if len(th) < 2:
+ #TODO replace with logger
+ print "[DEBUG] Unable to find th in:\n%s" % str(tr)
+ continue
+
+ # parse time
+ time = th[-2].text.split(' ')
+ if len(time) < 2:
+ print 'Invalid time: %s' % time
+ continue
+
+ time = time[1]
+
+ if time.isdigit():
+ # if time to next departure in cell convert to int
+ dep.append(int(time))
+ else:
+ # check if time of next departue in cell
+ t = time.strip(' ').split(':')
+ if len(t) == 2 and all(map(lambda x: x.isdigit(), t)):
+ t = map(int, t)
+ dep.append(time(*t))
+ else:
+ # Unexpected content
+ #TODO replace with logger
+ print "[DEBUG] Invalid data:\n%s" % time
+
+ return dep
\ No newline at end of file
def __str__(self):
return repr(self.value)
-class Parser:
+class cParser:
def __init__(self, html):
self.soup = BeautifulSoup(html)
from datetime import time, datetime
from textwrap import wrap
import settings
-import wlSearch
class ParserError(Exception):
def __init__(self, value='', code=0):
return repr(self.value)
class Parser:
- STATE_ERROR = -1
- STATE_START, STATE_SEARCH, STATE_RESULT = range(3)
def __init__(self, html):
self.soup = BeautifulSoup(html)
self._overview = None
self._details = None
- self._current_state = 0
@classmethod
def get_tdtext(cls, x, cl):
@property
def request_state(self):
- return self._current_state
-
-
-class iParser:
-
- def __init__(self):
- self._stations = {}
- self._lines = []
-
- def get_stations(self, letter):
- if not self._stations.has_key(letter):
- bs = BeautifulSoup(urllib2.urlopen(settings.stations % letter))
- self._stations[letter] = map(lambda x: x['value'], bs.find('select', {'id': 'letter'}).findAll('option'))
-
- return self._stations[letter]
-
- def get_lines(self):
- if not self._lines:
- bs = BeautifulSoup(urllib2.urlopen(settings.line_overview))
- # get tables
- lines = bs.findAll('table', {'class': 'linie'})
- # cut line parameter out of href
- self._lines = map(lambda x: map(lambda x: x['href'][x['href'].find('=') + 1:], x.findAll('a')), lines)
-
- return self._lines
+ return self._current_state
\ No newline at end of file
+++ /dev/null
-import unittest
-from wlSearch import Search
-from datetime import datetime
-from parseHtml import Parser, iParser, ParserError
-from BeautifulSoup import BeautifulSoup
-
-origin = 'Karlsplatz'
-destination = 'Handelskai'
-dtime = datetime.now()
-dtime = dtime.replace(hour=15, minute=0)
-search = Search(origin, destination)
-bs = BeautifulSoup(search.get_html(dtime))
-
-class FetchTest(unittest.TestCase):
-
- def test_overview(self):
- self.assertEquals(1, len(bs.findAll('table', {'id': 'tbl_fahrten'})))
-
- def test_details(self):
- self.assertTrue(len(bs.findAll('div', {'class': 'data_table tourdetail'})) > 0)
-
-origin = 'Zwicklgasse 1'
-destination = 'Himmelstrasse 1'
-ot = dt = 'address'
-s = Search(origin, destination, origin_type=ot, destination_type=dt)
-p = Parser(s.get_html(dtime))
-
-origin = 'Foobar Strasse 123'
-destination = 'Bazgasse 321'
-s = Search(origin, destination, origin_type=ot, destination_type=dt)
-invalid_parser = Parser(s.get_html(dtime))
-
-
-class ParseTest(unittest.TestCase):
-
- def test_overview_shouldFindMultipleItems(self):
- # TODO Replace with assertGreater in new python version
- self.assertTrue(len(p.overview) > 1)
-
- def test_detail_shouldFindMultipleItems(self):
- # TODO Replace with assertGreater in new python version
- self.assertTrue(len(p.details) > 1)
-
- def test_detail_shouldFindMultipleStations(self):
- # TODO Replace with assertGreater in new python version
- self.assertTrue(len(p.details[0]) > 1)
-
- def test_parser_overviewAndDetailsShouldHaveSameLength(self):
- self.assertEqual(len(p.details), len(p.overview))
-
- def test_parser_shouldRaiseError(self):
- # TODO Replace with expectedFailure decorator in new python version
- self.assertRaises(ParserError, invalid_parser._parse_overview)
-
- def test_parser_shouldFindMoreThanOneChange(self):
- self.assertTrue(p.overview[0]['change'] > 0)
-
- def test_parser_shouldFindPriceGreaterZero(self):
- self.assertTrue(p.overview[0]['price'] > 0.0)
-
- def test_parser_shouldFindDate(self):
- self.assertTrue(p.overview[0]['date'] == dtime.date())
-
-
-if __name__ == '__main__':
- unittest.main()
--- /dev/null
+import unittest
+import sys
+import os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
+from iTip import iParser
+
+parser = iParser()
+
+class ParseTest(unittest.TestCase):
+
+ def test_lines(self):
+ lines = parser.lines
+ self.assertTrue(type(lines) == dict)
+ self.assertTrue(lines)
+
+ def test_stations(self):
+ lines = parser.lines
+
+ s = []
+ for line in lines:
+ s.append(parser.get_stations(line[0]))
+ self.assertTrue(s)
\ No newline at end of file
--- /dev/null
+import unittest
+from wlSearch import Search
+from datetime import datetime
+from parseHtml import Parser, iParser, ParserError
+from BeautifulSoup import BeautifulSoup
+
+origin = 'Karlsplatz'
+destination = 'Handelskai'
+dtime = datetime.now()
+dtime = dtime.replace(hour=15, minute=0)
+search = Search(origin, destination)
+bs = BeautifulSoup(search.get_html(dtime))
+
+class FetchTest(unittest.TestCase):
+
+ def test_overview(self):
+ self.assertEquals(1, len(bs.findAll('table', {'id': 'tbl_fahrten'})))
+
+ def test_details(self):
+ self.assertTrue(len(bs.findAll('div', {'class': 'data_table tourdetail'})) > 0)
+
+origin = 'Zwicklgasse 1'
+destination = 'Himmelstrasse 1'
+ot = dt = 'address'
+s = Search(origin, destination, origin_type=ot, destination_type=dt)
+p = Parser(s.get_html(dtime))
+
+origin = 'Foobar Strasse 123'
+destination = 'Bazgasse 321'
+s = Search(origin, destination, origin_type=ot, destination_type=dt)
+invalid_parser = Parser(s.get_html(dtime))
+
+
+class ParseTest(unittest.TestCase):
+
+ def test_overview_shouldFindMultipleItems(self):
+ # TODO Replace with assertGreater in new python version
+ self.assertTrue(len(p.overview) > 1)
+
+ def test_detail_shouldFindMultipleItems(self):
+ # TODO Replace with assertGreater in new python version
+ self.assertTrue(len(p.details) > 1)
+
+ def test_detail_shouldFindMultipleStations(self):
+ # TODO Replace with assertGreater in new python version
+ self.assertTrue(len(p.details[0]) > 1)
+
+ def test_parser_overviewAndDetailsShouldHaveSameLength(self):
+ self.assertEqual(len(p.details), len(p.overview))
+
+ def test_parser_shouldRaiseError(self):
+ # TODO Replace with expectedFailure decorator in new python version
+ self.assertRaises(ParserError, invalid_parser._parse_overview)
+
+ def test_parser_shouldFindMoreThanOneChange(self):
+ self.assertTrue(p.overview[0]['change'] > 0)
+
+ def test_parser_shouldFindPriceGreaterZero(self):
+ self.assertTrue(p.overview[0]['price'] > 0.0)
+
+ def test_parser_shouldFindDate(self):
+ self.assertTrue(p.overview[0]['date'] == dtime.date())
+
+
+if __name__ == '__main__':
+ unittest.main()
import urllib2
from parseHtml import Parser
+import parseCorrection
from PySide.QtCore import Qt
from PySide.QtDeclarative import QDeclarativeView
+from BeautifulSoup import BeautifulSoup
def QMLModel(overview, details):
# Mapping from the "overview" data structure to a "plain" data
dtime = datetime.now()
#FIXME replace with logger
print "open_qml (%s:%s:%s)" % tuple(dtime.timetuple())[3:6]
- p = Parser(self.get_html(dtime))
+ html = self.get_html(dtime)
+ if BeautifulSoup(html).find('form', {'id': 'form_fahrplanauskunft'}):
+ cor = parseCorrection.Parser(html)
+ if p.origins:
+ # TODO: Show selection
+ pass
+ if p.destinations:
+ # TODO: Show selection
+ pass
+ p = Parser(html)
self.qml_model = QMLModel(p.overview, p.details)
self.view = QDeclarativeView(self.parent)
self.view.setWindowTitle('Search results')