From: Florian Schweikert Date: Mon, 5 Sep 2011 14:01:19 +0000 (+0200) Subject: added parser for routing X-Git-Url: https://vcs.maemo.org/git/?a=commitdiff_plain;h=9e1e29119cc2eec1a5695e4d46db0ef1c8a63b13;p=pywienerlinien added parser for routing --- diff --git a/README b/README index d1f6483..e225d65 100644 --- a/README +++ b/README @@ -1,2 +1,6 @@ Search for connection on wienerlinien.at Using qml gui to show results + +Show next bus/tram/underground departures. + +This program ist unofficial. diff --git a/scotty.py b/scotty.py new file mode 100644 index 0000000..6a6f564 --- /dev/null +++ b/scotty.py @@ -0,0 +1,209 @@ +from BeautifulSoup import BeautifulSoup, NavigableString +from urllib2 import urlopen +from urllib import urlencode +import settings +from datetime import datetime, time +from textwrap import wrap + +POSITION_TYPES = ('stop', 'address', 'poi') + +class ParserException(Exception): + + def __init__(self, msg = 'Parser error'): + self.message = msg + +class PageType: + UNKNOWN, CORRECTION, RESULT = range(3) + + +def search(origin_tuple, destination_tuple, dtime=None): + """ build route request + returns html result (as urllib response) + """ + if not dtime: + dtime = datetime.now() + + origin, origin_type = origin_tuple + destination, destination_type = destination_tuple + if not origin_type in POSITION_TYPES or\ + not destination_type in POSITION_TYPES: + raise ParserException('Invalid position type') + + post = settings.search_post + post['name_origin'] = origin + post['type_origin'] = origin_type + post['name_destination'] = destination + post['type_destination'] = destination_type + post['itdDateDayMonthYear'] = dtime.strftime('%d.%m.%Y') + post['itdTime'] = dtime.strftime('%H:%M') + params = urlencode(post) + return urlopen('%s?%s' % (settings.action, params)) + + +class sParser: + """ Parser for search response + """ + + def __init__(self, html): + self.bs = BeautifulSoup(html) + + def check_page(self): + if self.bs.find('form', {'id': 'form_efaresults'}): + return PageType.RESULT + + if self.bs.find('div', {'class':'form_error'}): + return PageType.CORRECTION + + return PageType.UNKNOWN + + def get_correction(self): + nlo = self.bs.find('select', {'id': 'nameList_origin'}) + nld = self.bs.find('select', {'id': 'nameList_destination'}) + + if not nlo or not nld: + raise ParserError('Unable to parse html') + + origin = nlo.findAll('option') + destination = nld.findAll('option') + + if not origin: + origin = [] + if not destination: + destination = [] + + return (origin, destination) + + def get_result(self): + return rParser(str(self.bs)) + + + +class rParser: + """ Parser for routing results + """ + + def __init__(self, html): + self.soup = BeautifulSoup(html) + self._overview = None + self._details = None + + @classmethod + def get_tdtext(cls, x, cl): + return x.find('td', {'class': cl}).text + + @classmethod + def get_change(cls, x): + y = rParser.get_tdtext(x, 'col_change') + if y: + return int(y) + else: + return 0 + + @classmethod + def get_price(cls, x): + y = rParser.get_tdtext(x, 'col_price') + if y.find(','): + return float(y.replace(',', '.')) + else: + return 0.0 + + @classmethod + def get_date(cls, x): + y = rParser.get_tdtext(x, 'col_date') + if y: + return datetime.strptime(y, '%d.%m.%Y').date() + else: + return None + + @classmethod + def get_time(cls, x): + y = rParser.get_tdtext(x, 'col_time') + if y: + if (y.find("-") > 0): + return map(lambda z: time(*map(int, z.split(':'))), y.split('-')) + else: + return map(lambda z: time(*map(int, z.split(':'))), wrap(y, 5)) + else: + return [] + + @classmethod + def get_duration(cls, x): + y = rParser.get_tdtext(x, 'col_duration') + if y: + return time(*map(int, y.split(":"))) + else: + return None + + def __iter__(self): + for detail in self.details(): + yield detail + + def _parse_details(self): + tours = self.soup.findAll('div', {'class': 'data_table tourdetail'}) + + trips = map(lambda x: map(lambda y: { + 'time': rParser.get_time(y), + 'station': map(lambda z: z[2:].strip(), + filter(lambda x: type(x) == NavigableString, y.find('td', {'class': 'col_station'}).contents)), # filter non NaviStrings + 'info': map(lambda x: x.strip(), + filter(lambda z: type(z) == NavigableString, y.find('td', {'class': 'col_info'}).contents)), + }, x.find('tbody').findAll('tr')), + tours) # all routes + return trips + + @property + def details(self): + """returns list of trip details + [ [ { 'time': [datetime.time, datetime.time] if time else [], + 'station': [u'start', u'end'] if station else [], + 'info': [u'start station' if station else u'details for walking', u'end station' if station else u'walking duration'] + }, ... # next trip step + ], ... # next trip possibility + ] + """ + if not self._details: + self._details = self._parse_details() + + return self._details + + def _parse_overview(self): + + # get overview table + table = self.soup.find('table', {'id': 'tbl_fahrten'}) + + # check if there is an overview table + if table and table.findAll('tr'): + # get rows + rows = table.findAll('tr')[1:] # cut off headline + + overview = map(lambda x: { + 'date': rParser.get_date(x), + 'time': rParser.get_time(x), + 'duration': rParser.get_duration(x), # grab duration + 'change': rParser.get_change(x), + 'price': rParser.get_price(x), + }, + rows) + else: + raise ParserError('Unable to parse details') + + return overview + + @property + def overview(self): + """dict containing + date: datetime + time: [time, time] + duration: time + change: int + price: float + """ + if not self._overview: + try: + self._overview = self._parse_overview() + except AttributeError: + f = open('DEBUG', 'w') + f.write(str(self.soup)) + f.close() + + return self._overview diff --git a/settings.py b/settings.py index 2b86135..81b0b96 100644 --- a/settings.py +++ b/settings.py @@ -9,3 +9,35 @@ hist_file = path.join(folder, '.wl_history') line_overview = 'http://www.wienerlinien.at/itip/linienwahl/' stations = 'http://www.wienerlinien.at/itip/haltestelle?letter=%s' + +search_post = {'language': 'de', + 'sessionID': 0, + 'requestID': 0, + 'execInst': 'normal', + 'command': '', + 'anySigWhenPerfectNoOtherMatches': 1, + 'itdLPxx_locationServerActive': '', + 'locationServerActive': 0, + 'typeInfo_origin': 'invalid', + 'placeState_origin': 'empty', + 'placeInfo_origin': 'invalid', + 'place_origin': 'Wien', # overwrite if necessary + 'type_origin': None, # stop/address/poi + 'nameState_origin': 'empty', + 'nameInfo_origin': 'invalid', + 'anyType_origin': '', + 'name_origin': None, + 'typeInfo_destination': 'invalid', + 'placeState_destination': 'empty', + 'placeInfo_destination': 'invalid', + 'place_destination': 'Wien', # overwrite if necessary + 'type_destination': None, # stop/address/poi + 'nameState_destination': 'empty', + 'nameInfo_destination': 'invalid', + 'anyType_destination': '', # maybe nice + 'name_destination': None, + 'itdTripDateTimeDepArr': 'dep', + 'itdDateDayMonthYear': None, # DD.MM.YYYY + 'itdTime': None, # HH:MM + 'submitbutton': 'SUCHEN' + } \ No newline at end of file diff --git a/tests/routing.py b/tests/routing.py new file mode 100644 index 0000000..c382338 --- /dev/null +++ b/tests/routing.py @@ -0,0 +1,66 @@ +import unittest +from wlSearch import Search +from datetime import datetime +from parseHtml import Parser, iParser, ParserError +from BeautifulSoup import BeautifulSoup + +origin = 'Karlsplatz' +destination = 'Handelskai' +dtime = datetime.now() +dtime = dtime.replace(hour=15, minute=0) +search = Search(origin, destination) +bs = BeautifulSoup(search.get_html(dtime)) + +class FetchTest(unittest.TestCase): + + def test_overview(self): + self.assertEquals(1, len(bs.findAll('table', {'id': 'tbl_fahrten'}))) + + def test_details(self): + self.assertTrue(len(bs.findAll('div', {'class': 'data_table tourdetail'})) > 0) + +origin = 'Zwicklgasse 1' +destination = 'Himmelstrasse 1' +ot = dt = 'address' +s = Search(origin, destination, origin_type=ot, destination_type=dt) +p = Parser(s.get_html(dtime)) + +origin = 'Foobar Strasse 123' +destination = 'Bazgasse 321' +s = Search(origin, destination, origin_type=ot, destination_type=dt) +invalid_parser = Parser(s.get_html(dtime)) + + +class ParseTest(unittest.TestCase): + + def test_overview_shouldFindMultipleItems(self): + # TODO Replace with assertGreater in new python version + self.assertTrue(len(p.overview) > 1) + + def test_detail_shouldFindMultipleItems(self): + # TODO Replace with assertGreater in new python version + self.assertTrue(len(p.details) > 1) + + def test_detail_shouldFindMultipleStations(self): + # TODO Replace with assertGreater in new python version + self.assertTrue(len(p.details[0]) > 1) + + def test_parser_overviewAndDetailsShouldHaveSameLength(self): + self.assertEqual(len(p.details), len(p.overview)) + + def test_parser_shouldRaiseError(self): + # TODO Replace with expectedFailure decorator in new python version + self.assertRaises(ParserError, invalid_parser._parse_overview) + + def test_parser_shouldFindMoreThanOneChange(self): + self.assertTrue(p.overview[0]['change'] > 0) + + def test_parser_shouldFindPriceGreaterZero(self): + self.assertTrue(p.overview[0]['price'] > 0.0) + + def test_parser_shouldFindDate(self): + self.assertTrue(p.overview[0]['date'] == dtime.date()) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/scotty.py b/tests/scotty.py deleted file mode 100644 index c382338..0000000 --- a/tests/scotty.py +++ /dev/null @@ -1,66 +0,0 @@ -import unittest -from wlSearch import Search -from datetime import datetime -from parseHtml import Parser, iParser, ParserError -from BeautifulSoup import BeautifulSoup - -origin = 'Karlsplatz' -destination = 'Handelskai' -dtime = datetime.now() -dtime = dtime.replace(hour=15, minute=0) -search = Search(origin, destination) -bs = BeautifulSoup(search.get_html(dtime)) - -class FetchTest(unittest.TestCase): - - def test_overview(self): - self.assertEquals(1, len(bs.findAll('table', {'id': 'tbl_fahrten'}))) - - def test_details(self): - self.assertTrue(len(bs.findAll('div', {'class': 'data_table tourdetail'})) > 0) - -origin = 'Zwicklgasse 1' -destination = 'Himmelstrasse 1' -ot = dt = 'address' -s = Search(origin, destination, origin_type=ot, destination_type=dt) -p = Parser(s.get_html(dtime)) - -origin = 'Foobar Strasse 123' -destination = 'Bazgasse 321' -s = Search(origin, destination, origin_type=ot, destination_type=dt) -invalid_parser = Parser(s.get_html(dtime)) - - -class ParseTest(unittest.TestCase): - - def test_overview_shouldFindMultipleItems(self): - # TODO Replace with assertGreater in new python version - self.assertTrue(len(p.overview) > 1) - - def test_detail_shouldFindMultipleItems(self): - # TODO Replace with assertGreater in new python version - self.assertTrue(len(p.details) > 1) - - def test_detail_shouldFindMultipleStations(self): - # TODO Replace with assertGreater in new python version - self.assertTrue(len(p.details[0]) > 1) - - def test_parser_overviewAndDetailsShouldHaveSameLength(self): - self.assertEqual(len(p.details), len(p.overview)) - - def test_parser_shouldRaiseError(self): - # TODO Replace with expectedFailure decorator in new python version - self.assertRaises(ParserError, invalid_parser._parse_overview) - - def test_parser_shouldFindMoreThanOneChange(self): - self.assertTrue(p.overview[0]['change'] > 0) - - def test_parser_shouldFindPriceGreaterZero(self): - self.assertTrue(p.overview[0]['price'] > 0.0) - - def test_parser_shouldFindDate(self): - self.assertTrue(p.overview[0]['date'] == dtime.date()) - - -if __name__ == '__main__': - unittest.main()