From 7403374b55d1ffeed1c3717c44ea9cc1472e3e74 Mon Sep 17 00:00:00 2001 From: Florian Schweikert Date: Fri, 16 Dec 2011 14:52:52 +0100 Subject: [PATCH] 12 new nosetests for departure parsing isolating parsing parts a bit --- gotovienna-qml | 6 - gotovienna/realtime.py | 169 +++++--- gotovienna/tests/data/errorpage.html | 34 ++ gotovienna/tests/data/line_station.html | 84 ++++ gotovienna/tests/data/lines.html | 439 ++++++++++++++++++++ gotovienna/tests/data/nodepartures.html | 90 ++++ gotovienna/tests/data/stationbased.html | 141 +++++++ gotovienna/tests/data/stations1.html | 440 ++++++++++++++++++++ gotovienna/tests/data/stations2.html | 685 +++++++++++++++++++++++++++++++ gotovienna/tests/realtime.py | 80 +++- qml/MainPage.qml | 2 +- 11 files changed, 2084 insertions(+), 86 deletions(-) create mode 100644 gotovienna/tests/data/errorpage.html create mode 100644 gotovienna/tests/data/line_station.html create mode 100644 gotovienna/tests/data/lines.html create mode 100644 gotovienna/tests/data/nodepartures.html create mode 100644 gotovienna/tests/data/stationbased.html create mode 100644 gotovienna/tests/data/stations1.html create mode 100644 gotovienna/tests/data/stations2.html diff --git a/gotovienna-qml b/gotovienna-qml index 458fe68..40e2793 100755 --- a/gotovienna-qml +++ b/gotovienna-qml @@ -144,12 +144,6 @@ class Gui(QObject): threading.Thread(target=load_async).start() - stationsLoaded = Signal() - - @Slot(float, float, result='QStringList') - def get_nearby_stations(self, lat, lon): - return get_nearby_stations(lat, lon) - @Slot(str, str, str, result=str) def get_directions_url(self, line, direction, station): return self.itip.get_url_from_direction(line, direction, station) diff --git a/gotovienna/realtime.py b/gotovienna/realtime.py index e4e3266..6454667 100644 --- a/gotovienna/realtime.py +++ b/gotovienna/realtime.py @@ -64,6 +64,24 @@ class ITipParser: def __init__(self): self._lines = cache.lines + def parse_stations(self, html): + bs = BeautifulSoup(html) + tables = bs.findAll('table', {'class': 'text_10pix'}) + st = {} + + for i in range(2): + dir = tables[i].div.contents[-1].strip()[6:-6] + + sta = [] + for tr in tables[i].findAll('tr', {'onmouseout': 'obj_unhighlight(this);'}): + if tr.a: + sta.append((tr.a.text, defaults.line_overview + tr.a['href'])) + else: + sta.append((tr.text.strip(' '), None)) + + st[dir] = sta + return st + def get_stations(self, name): """ Get station by direction {'Directionname': [('Station name', 'url')]} @@ -74,38 +92,35 @@ class ITipParser: st = Stations(name) if not st: - bs = BeautifulSoup(urlopen(self.lines[name])) - tables = bs.findAll('table', {'class': 'text_10pix'}) - for i in range(2): - dir = tables[i].div.contents[-1].strip()[6:-6] + st = self.parse_stations(urlopen(self.lines[name]).read()) - sta = [] - for tr in tables[i].findAll('tr', {'onmouseout': 'obj_unhighlight(this);'}): - if tr.a: - sta.append((tr.a.text, defaults.line_overview + tr.a['href'])) - else: - sta.append((tr.text.strip(' '), None)) + return st + + def parse_lines(self, html): + """ Parse lines from html + """ + bs = BeautifulSoup(html) + # get tables + lines = bs.findAll('td', {'class': 'linie'}) - st[dir] = sta + l = {} - return st + for line in lines: + if line.a: + href = defaults.line_overview + line.a['href'] + if line.text: + l[line.text] = href + elif line.img: + l[line.img['alt']] = href + + return l @property def lines(self): """ Dictionary of Line names with url as value """ if not self._lines: - bs = BeautifulSoup(urlopen(defaults.line_overview)) - # get tables - lines = bs.findAll('td', {'class': 'linie'}) - - for line in lines: - if line.a: - href = defaults.line_overview + line.a['href'] - if line.text: - self._lines[line.text] = href - elif line.img: - self._lines[line.img['alt']] = href + self._lines = self.parse_lines(urlopen(defaults.line_overview).read()) return self._lines @@ -118,22 +133,18 @@ class ITipParser: return None - def get_departures_by_station(self, station): - """ Get list of Departures for one station + def parse_departures_by_station(self, html): + """ Parse departure page + precondition: html is correct departure page + handle select station page before calling this method """ - - # TODO 1. Error handling - # TODO 2. more error handling - # TODO 3. ultimative error handling - + bs = BeautifulSoup(html) dep = [] - bs = BeautifulSoup(urlopen(defaults.departures_by_station % quote_plus(station.encode('UTF-8')))) + try: li = bs.ul.findAll('li') - if li[0].a: - # Dirty workaround for ambiguous station - bs = BeautifulSoup(urlopen(defaults.qando + li[0].a['href'])) - li = bs.ul.findAll('li') + + station = bs.strong.text.split(',')[0] for l in li: try: @@ -162,8 +173,8 @@ class ITipParser: dep.append(Departure(line, station, direction, tim, lowfloor)) - except: - print 'Warning: %s' % l + except Exception as e: + print 'Warning: %s' % e.message continue except AttributeError: @@ -172,39 +183,37 @@ class ITipParser: finally: return dep - def get_departures(self, url): - """ Get list of next departures as Departure object + def get_departures_by_station(self, station): + """ Get list of Departures for one station """ - #TODO parse line name and direction for station site parsing + # TODO 1. Error handling + # TODO 2. more error handling + # TODO 3. ultimative error handling - if not url: - # FIXME prevent from calling this method with None - print "ERROR empty url" + html = urlopen(defaults.departures_by_station % quote_plus(station.encode('UTF-8'))).read() + + li = BeautifulSoup(html).ul.findAll('li') + + if li[0].a: + # Dirty workaround for ambiguous station + html = urlopen(defaults.qando + li[0].a['href']).read() + + dep = self.parse_departures_by_station(html) + + self.parse_departures_by_station(html) + return dep + + def parse_departures(self, html): + bs = BeautifulSoup(html) + + # Check for error messages + msg = bs.findAll('span', {'class': 'rot fett'}) + if msg and len(msg) > 0 and unicode(msg[0].text).find(u'technischen St') > 0: + print '\n'.join(map(lambda x: x.text.replace(' ', ''), msg)) return [] - # open url for 90 min timeslot / get departure for next 90 min - retry = 0 - tries = 2 # try a second time before return empty list - while retry < tries: - bs = BeautifulSoup(urlopen(url + "&departureSizeTimeSlot=90")) - try: - lines = bs.find('form', {'name': 'mainform'}).table.findAll('tr')[1] - break - - except AttributeError: - print 'FetchError' - msg = bs.findAll('span', {'class': 'rot fett'}) - if len(msg) > 0 and str(msg[0].text).find(u'technischen St') > 0: - print 'Temporary problem' - print '\n'.join(map(lambda x: x.text.replace(' ', ''), msg)) - # FIXME Change to error message after fixing qml gui - return [] - # FIXME more testing - retry += 1 - if retry == tries: - return [] - sleep(0.5) + lines = bs.find('form', {'name': 'mainform'}).table.findAll('tr')[1] if len(lines.findAll('td', {'class': 'info'})) > 0: station = lines.span.text.replace(' ', '') @@ -265,6 +274,36 @@ class ITipParser: return dep + def get_departures(self, url): + """ Get list of next departures as Departure object + """ + + #TODO parse line name and direction for station site parsing + + if not url: + # FIXME prevent from calling this method with None + print "ERROR empty url" + return [] + + # open url for 90 min timeslot / get departure for next 90 min + retry = 0 + tries = 2 # try a second time before return empty list + + while retry < tries: + html = urlopen(url + "&departureSizeTimeSlot=90").read() + dep = self.parse_departures(html) + + if dep: + return dep + + retry += 1 + if retry == tries: + return [] + + sleep(0.5) + + + UBAHN, TRAM, BUS, NIGHTLINE, OTHER = range(5) LINE_TYPE_NAMES = ['U-Bahn', 'Strassenbahn', 'Bus', 'Nightline', 'Andere'] diff --git a/gotovienna/tests/data/errorpage.html b/gotovienna/tests/data/errorpage.html new file mode 100644 index 0000000..50c1393 --- /dev/null +++ b/gotovienna/tests/data/errorpage.html @@ -0,0 +1,34 @@ + + + + + + + + + + + + +Wiener Linien - i.tip-Abfahrtanzeige + + + + + + + + + + +
+ +
  Auf Grund einer technischen Störung
+   ist eine Abfrage derzeit leider nicht möglich.

+
+   An der Störungsbehebung wird bereits gearbeitet.

+
+   Wir ersuchen um Ihr Verständnis.

+ \ No newline at end of file diff --git a/gotovienna/tests/data/line_station.html b/gotovienna/tests/data/line_station.html new file mode 100644 index 0000000..74d2b06 --- /dev/null +++ b/gotovienna/tests/data/line_station.html @@ -0,0 +1,84 @@ + + + + + + + + + + + + +Wiener Linien - i.tip-Abfahrtanzeige + + + + + + + + + + +
+ + + + +
+ + + + + + + + + + + + + + + + + + +
Mit aktiviertem Javascript wird diese Seite alle 30 Sekunden automatisch aktualisiert, ansonsten muß der Button "Aktualisieren" gedrückt werden. Eine Änderung der Sortierung für die gefundenen Abfahrten oder Änderung des Zeitfenster wird erst ab der nächsten manuellen Aktualisierung aktiv.
+ Haltestelle: Kärntner Ring, Oper
+ Zeitintervall: 30 Minuten
+ Gewählte Linie: 1 +
+ + Weitere Linien + +
2,D
Sortierung
+ + nach Abfahrtzeit
+ + nach Ziel und Abfahrtzeit
+ + nach Linie, Ziel und Abfahrtzeit
Bitte wählen Sie ein Zeitfenster aus + + Nur Niederflurfahrzeuge + +
+ + + + + + + +
 Linie Ziel Abfahrt Niederflur-
 Fahrzeug? 
 1  Stefan-Fadinger-Platz  in Kürze... 
 1  Stefan-Fadinger-Platz  in 6 min 
 1  Stefan-Fadinger-Platz  in 10 min Geeignet für mobilitätseingeschränkte Fahrgäste
 1  Stefan-Fadinger-Platz  in 17 min 
 1  Stefan-Fadinger-Platz  in 25 min 
 1  Stefan-Fadinger-Platz  in 29 min 
+
+
+ + + diff --git a/gotovienna/tests/data/lines.html b/gotovienna/tests/data/lines.html new file mode 100644 index 0000000..506730d --- /dev/null +++ b/gotovienna/tests/data/lines.html @@ -0,0 +1,439 @@ + + + + + + + + + + + + +Wiener Linien - itip-Linienauswahl + + + + + + + + + + + +
+ + + + + + + + +
Von folgenden Linien können Echtzeitinformationen zu den Abfahrten abgerufen werden:
Bitte wählen Sie eine Linie aus.
+ + + + +
+ + + + + + + + + +
+ + + + +
S t r a ß e n b a h n
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
 12  56  9
10       18 
      26   
3031 33   3738 
4041424344 46  49
  52     58 
60 62    67  
 71        
DOVRT       
+
+
+ + + + +
A u t o b u s
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
 1A2A3A4A5A 7A8A9A
10A11A12A13A14A15A    
   23A24A25A26A27A28A29A
30A31A32A 34A35A36A 38A39A
40A       48A 
       57A 59A
  62A63A64A65A66A  69A
    74A     
  82A   86A87A88A89A
+
+
+ + + +
N i g h t l i n e
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
      N6   
          
N20  N23 N25N26  N29
 N31   N35N36 N38 
 N41 N43  N46  N49
N50       N58 
N60 N62 N64 N66N67  
 N71   N75    
          
+
+
+ + + + + + + + + + + + +
U - B a h n
+ + + + + + + + +
U1U2U3U4U6
+
 
 
W e i t e r e  L i n i e n
+ + + + + +
11BWLB
+
+
+
+
+ + + diff --git a/gotovienna/tests/data/nodepartures.html b/gotovienna/tests/data/nodepartures.html new file mode 100644 index 0000000..9dcb9a0 --- /dev/null +++ b/gotovienna/tests/data/nodepartures.html @@ -0,0 +1,90 @@ + + + + + + + + + + + + +Wiener Linien - i.tip-Abfahrtanzeige + + + + + + + + +
+ + + + +
+ + + + + + + + + + + + + + + + + + +
Mit aktiviertem Javascript wird diese Seite alle 30 Sekunden automatisch aktualisiert, ansonsten muß der Button "Aktualisieren" gedrückt werden. Eine Änderung der Sortierung für die gefundenen Abfahrten oder Änderung des Zeitfenster wird erst ab der nächsten manuellen Aktualisierung aktiv.
+ Haltestelle: Handelskai S U
+ Zeitintervall: 30 Minuten
+ Gewählte Linie: N6 +
+ + Weitere Linien + +
11A,11B,5A
Sortierung
+ + nach Abfahrtzeit
+ + nach Ziel und Abfahrtzeit
+ + nach Linie, Ziel und Abfahrtzeit
Bitte wählen Sie ein Zeitfenster aus + + Nur Niederflurfahrzeuge + +
+ + + + + + + +
 Linie Ziel Abfahrt Niederflur-
 Fahrzeug? 
+ + + +

+ Keine Abfahrten gefunden.
+
+ +
+ + + diff --git a/gotovienna/tests/data/stationbased.html b/gotovienna/tests/data/stationbased.html new file mode 100644 index 0000000..04d76de --- /dev/null +++ b/gotovienna/tests/data/stationbased.html @@ -0,0 +1,141 @@ + + + + qando MobileWeb + + + + + + +
+ +

Monitor

+

Ergebnisse

+
Karlsplatz, Wien: aktualisiert um 13:04
+ +
Legende
+ +© Wiener Linien und VOR. Alle Rechte vorbehalten.
+Powered by Fluidtime | Design Software Service.
+Impressum und rechtliche Hinweise + + \ No newline at end of file diff --git a/gotovienna/tests/data/stations1.html b/gotovienna/tests/data/stations1.html new file mode 100644 index 0000000..07abc56 --- /dev/null +++ b/gotovienna/tests/data/stations1.html @@ -0,0 +1,440 @@ + + + + + + + + + + + + +Wiener Linien - i.tip-Liniendarstellung + + + + + + + + + + + + +
+ + + + + +
+ + + + + + + + +
Bitte wählen Sie eine Haltestelle aus.
Sie erhalten die Abfahrtsinformationen aller derzeit abfahrenden Linien ab der aktuellen Uhrzeit.
Gewählte Linie : 40
+
+ + + + + + +
+ + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
 Fahrtrichtung 
 Gersthof, Herbeckstraße 
 
 
       
       
       
       
       
       
       
       
       
       
       
       
       
       
 
+
+
     + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
 
 Fahrtrichtung 
 Schottentor U 
 
       
       
       
       
       
       
       
       
       
       
       
       
 
+
+
+
+ + + diff --git a/gotovienna/tests/data/stations2.html b/gotovienna/tests/data/stations2.html new file mode 100644 index 0000000..1e3a454 --- /dev/null +++ b/gotovienna/tests/data/stations2.html @@ -0,0 +1,685 @@ + + + + + + + + + + + + +Wiener Linien - i.tip-Liniendarstellung + + + + + + + + + + + + +
+ + + + + +
+ + + + + + + + +
Bitte wählen Sie eine Haltestelle aus.
Sie erhalten die Abfahrtsinformationen aller derzeit abfahrenden Linien ab der aktuellen Uhrzeit.
Gewählte Linie : 1
+
+ + + + + + +
+ + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
 Fahrtrichtung 
 Stefan-Fadinger-Platz 
 
 
       
       
       
       
       
       
       
       
       
       
       
  
+ Börse
     
       
       
       
       
       
       
       
       
       
       
       
       
       
       
       
       
       
  
+ Stefan-Fadinger-Platz
     
       
 
+
+
     + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
 
 Fahrtrichtung 
 Prater Hauptallee 
 
       
       
       
       
       
       
       
       
       
       
       
       
       
       
       
       
       
       
       
     
+ Börse
  
       
       
       
       
       
       
       
       
       
       
 
+
+
+
+ + + diff --git a/gotovienna/tests/realtime.py b/gotovienna/tests/realtime.py index a0c514c..b61aecb 100644 --- a/gotovienna/tests/realtime.py +++ b/gotovienna/tests/realtime.py @@ -1,26 +1,78 @@ -import unittest +# -*- coding: utf-8 -*- + +from nose.tools import assert_equal, assert_true, assert_false import sys import os +from datetime import time sys.path.insert(0, os.path.dirname(os.path.dirname(__file__))) +DATAPATH = 'data' from gotovienna.realtime import * parser = ITipParser() -class ParseTest(unittest.TestCase): +stationbased = open(os.path.join(DATAPATH, 'stationbased.html'), 'r').read() +line_station = open(os.path.join(DATAPATH, 'line_station.html'), 'r').read() +errorpage = open(os.path.join(DATAPATH, 'errorpage.html'), 'r').read() +nodepartures = open(os.path.join(DATAPATH, 'nodepartures.html'), 'r').read() +lines = open(os.path.join(DATAPATH, 'lines.html'), 'r').read() +stations1 = open(os.path.join(DATAPATH, 'stations1.html'), 'r').read() +stations2 = open(os.path.join(DATAPATH, 'stations2.html'), 'r').read() + +parsed_lines = parser.parse_lines(lines) + +def test_lines(): + assert_equal(dict, type(parsed_lines)) + assert_true(parsed_lines) + +def test_line_amount(): + assert_equal(104, len(parsed_lines.keys())) + +def test_line_link(): + assert_equal('http://www.wienerlinien.at/itip/linienwahl/linie.php?lng=de&lng=de&linie=1', parsed_lines['1']) + +def test_line_links(): + assert_true(filter(lambda x: x.startswith('http://'), parsed_lines.values())) + +def test_stations1(): + st1 = parser.parse_stations(stations1) + assert_true(st1.has_key(u'Gersthof, Herbeckstraße')) + assert_true(st1.has_key(u'Schottentor U')) + assert_equal(14, len(st1[u'Gersthof, Herbeckstraße'])) + assert_equal(12, len(st1[u'Schottentor U'])) + +def test_stations2(): + st2 = parser.parse_stations(stations2) + assert_true(st2.has_key(u'Stefan-Fadinger-Platz')) + assert_true(st2.has_key(u'Prater Hauptallee')) + assert_equal(31, len(st2[u'Stefan-Fadinger-Platz'])) + assert_equal(30, len(st2[u'Prater Hauptallee'])) + +def test_departures_by_station(): + dep = parser.parse_departures_by_station(stationbased) + # find all 34 departures + assert_equal(34, len(dep)) + l = list(set(map(lambda x: x['line'], dep))) + # there are 8 different lines + assert_equal(8, len(l)) + +def test_departures_by_station_lowfloor(): + dep = parser.parse_departures_by_station(stationbased) + assert_true(dep[0]['lowfloor']) + assert_false(dep[14]['lowfloor']) - def test_lines(self): - lines = parser.lines - self.assertTrue(type(lines) == dict) - self.assertTrue(lines) +def test_departures_by_station_datetime(): + dep = parser.parse_departures_by_station(stationbased) + assert_equal(int, type(dep[13]['time'])) + assert_equal(time, type(dep[14]['time'])) - def test_stations(self): - lines = parser.lines +def test_departures(): + dep = parser.parse_departures(line_station) - s = [] - for line in lines: - s.append(parser.get_stations(line[0])) - self.assertTrue(s) +def test_error_page(): + dep = parser.parse_departures(errorpage) + assert_equal(0, len(dep)) -if __name__ == '__main__': - unittest.main() +def test_no_departures(): + dep = parser.parse_departures(nodepartures) + assert_equal(0, len(dep)) diff --git a/qml/MainPage.qml b/qml/MainPage.qml index 7969c75..e2e7259 100644 --- a/qml/MainPage.qml +++ b/qml/MainPage.qml @@ -60,7 +60,7 @@ Page { // would have to "copy" the default delegate style onAccepted: { - console.log('accepted: ' + selectedIndex) + console.log('accepted: ' + lineSelectorModel.get(selectedIndex).name) //gline.text = lineSelectorModel.get(selectedIndex).name } } -- 1.7.9.5