+++ /dev/null
-from BeautifulSoup import BeautifulSoup
-
-class ParserError(Exception):
- def __init__(self, value='', code=0):
- self.value = value
- self.code = code
-
- def __str__(self):
- return repr(self.value)
-
-class cParser:
-
- def __init__(self, html):
- self.soup = BeautifulSoup(html)
-
- def find_options(self):
- nlo = self.soup.find('select', {'id': 'nameList_origin'})
- nld = self.soup.find('select', {'id': 'nameList_destination'})
-
- if not nlo or not nld:
- raise ParserError('Unable to parse html')
-
- origin = nlo.findAll('option')
- destination = nld.findAll('option')
-
- if not origin:
- origin = []
- if not destination:
- destination = []
-
- return (origin, destination)
\ No newline at end of file
+++ /dev/null
-from BeautifulSoup import BeautifulSoup, NavigableString
-import urllib2
-from datetime import time, datetime
-from textwrap import wrap
-import settings
-
-class ParserError(Exception):
- def __init__(self, value='', code=0):
- self.value = value
- self.code = code
-
- def __str__(self):
- return repr(self.value)
-
-class Parser:
-
- def __init__(self, html):
- self.soup = BeautifulSoup(html)
- self._overview = None
- self._details = None
-
- @classmethod
- def get_tdtext(cls, x, cl):
- return x.find('td', {'class': cl}).text
-
- @classmethod
- def get_change(cls, x):
- y = Parser.get_tdtext(x, 'col_change')
- if y:
- return int(y)
- else:
- return 0
-
- @classmethod
- def get_price(cls, x):
- y = Parser.get_tdtext(x, 'col_price')
- if y.find(','):
- return float(y.replace(',', '.'))
- else:
- return 0.0
-
- @classmethod
- def get_date(cls, x):
- y = Parser.get_tdtext(x, 'col_date')
- if y:
- return datetime.strptime(y, '%d.%m.%Y').date()
- else:
- return None
-
- @classmethod
- def get_time(cls, x):
- y = Parser.get_tdtext(x, 'col_time')
- if y:
- if (y.find("-") > 0):
- return map(lambda z: time(*map(int, z.split(':'))), y.split('-'))
- else:
- return map(lambda z: time(*map(int, z.split(':'))), wrap(y, 5))
- else:
- return []
-
- @classmethod
- def get_duration(cls, x):
- y = Parser.get_tdtext(x, 'col_duration')
- if y:
- return time(*map(int, y.split(":")))
- else:
- return None
-
- def __iter__(self):
- for detail in self.details():
- yield detail
-
- def _parse_details(self):
- if self._current_state < 0:
- raise ParserError('Unable to parse details while in error state')
-
- tours = self.soup.findAll('div', {'class': 'data_table tourdetail'})
-
- trips = map(lambda x: map(lambda y: {
- 'time': Parser.get_time(y),
- 'station': map(lambda z: z[2:].strip(),
- filter(lambda x: type(x) == NavigableString, y.find('td', {'class': 'col_station'}).contents)), # filter non NaviStrings
- 'info': map(lambda x: x.strip(),
- filter(lambda z: type(z) == NavigableString, y.find('td', {'class': 'col_info'}).contents)),
- }, x.find('tbody').findAll('tr')),
- tours) # all routes
- return trips
-
- @property
- def details(self):
- """returns list of trip details
- [ [ { 'time': [datetime.time, datetime.time] if time else [],
- 'station': [u'start', u'end'] if station else [],
- 'info': [u'start station' if station else u'details for walking', u'end station' if station else u'walking duration']
- }, ... # next trip step
- ], ... # next trip possibility
- ]
- """
- if not self._details:
- self._details = self._parse_details()
-
- return self._details
-
- def _parse_overview(self):
-
- # get overview table
- table = self.soup.find('table', {'id': 'tbl_fahrten'})
-
- # check if there is an overview table
- if table and table.findAll('tr'):
- # get rows
- rows = table.findAll('tr')[1:] # cut off headline
-
- overview = map(lambda x: {
- 'date': Parser.get_date(x),
- 'time': Parser.get_time(x),
- 'duration': Parser.get_duration(x), # grab duration
- 'change': Parser.get_change(x),
- 'price': Parser.get_price(x),
- },
- rows)
- else:
- #self._current_state = self.STATE_ERROR
- raise ParserError('Unable to parse details')
-
- return overview
-
- @property
- def overview(self):
- """dict containing
- date: datetime
- time: [time, time]
- duration: time
- change: int
- price: float
- """
- if not self._overview:
- try:
- self._overview = self._parse_overview()
- except AttributeError:
- f = open('DEBUG', 'w')
- f.write(str(self.soup))
- f.close()
-
- return self._overview
-
- def _check_request_state(self):
- raise NotImplementedError()
-
- @property
- def request_state(self):
- return self._current_state
\ No newline at end of file
POSITION_TYPES = ('stop', 'address', 'poi')
-class ParserException(Exception):
+class ParserError(Exception):
- def __init__(self, msg = 'Parser error'):
+ def __init__(self, msg='Parser error'):
self.message = msg
class PageType:
destination, destination_type = destination_tuple
if not origin_type in POSITION_TYPES or\
not destination_type in POSITION_TYPES:
- raise ParserException('Invalid position type')
+ raise ParserError('Invalid position type')
post = settings.search_post
post['name_origin'] = origin
post['itdDateDayMonthYear'] = dtime.strftime('%d.%m.%Y')
post['itdTime'] = dtime.strftime('%H:%M')
params = urlencode(post)
- return urlopen('%s?%s' % (settings.action, params))
+ url = '%s?%s' % (settings.action, params)
+
+ print "\nurl %s url\n\n%s\n\nurl %s url\n" % ('~'*100, url, '~'*100)
+
+ return urlopen(url)
class sParser:
"""
def __init__(self, html):
- self.bs = BeautifulSoup(html)
+ self.soup = BeautifulSoup(html)
def check_page(self):
- if self.bs.find('form', {'id': 'form_efaresults'}):
+ if self.soup.find('form', {'id': 'form_efaresults'}):
return PageType.RESULT
- if self.bs.find('div', {'class':'form_error'}):
+ if self.soup.find('div', {'class':'form_error'}):
return PageType.CORRECTION
return PageType.UNKNOWN
def get_correction(self):
- nlo = self.bs.find('select', {'id': 'nameList_origin'})
- nld = self.bs.find('select', {'id': 'nameList_destination'})
+ nlo = self.soup.find('select', {'id': 'nameList_origin'})
+ nld = self.soup.find('select', {'id': 'nameList_destination'})
- if not nlo or not nld:
+ if not nlo and not nld:
raise ParserError('Unable to parse html')
- origin = nlo.findAll('option')
- destination = nld.findAll('option')
-
- if not origin:
+ if nlo:
+ origin = map(lambda x: x.text, nlo.findAll('option'))
+ else:
origin = []
- if not destination:
+ if nld:
+ destination = map(lambda x: x.text, nld.findAll('option'))
+ else:
destination = []
return (origin, destination)
def get_result(self):
- return rParser(str(self.bs))
+ return rParser(str(self.soup))
'date': rParser.get_date(x),
'time': rParser.get_time(x),
'duration': rParser.get_duration(x), # grab duration
- 'change': rParser.get_change(x),
+ 'change': rParser.get_change(x),
'price': rParser.get_price(x),
},
rows)
else:
- raise ParserError('Unable to parse details')
+ raise ParserError('Unable to parse overview')
return overview
return self._overview
if __name__ == '__main__':
- pass
\ No newline at end of file
+ parser = argparse.ArgumentParser(description='Get public transport route for Vienna')
+ parser.add_argument('-o', metavar='name', type=str, help='origin', required=True)
+ parser.add_argument('-d', metavar='name', type=str, help='destination', required=True)
+ parser.add_argument('-ot', metavar='type', type=str, help='origin type: %s' % ' | '.join(POSITION_TYPES), default='stop')
+ parser.add_argument('-dt', metavar='type', type=str, help='destination type: %s' % ' | '.join(POSITION_TYPES), default='stop')
+
+ args = parser.parse_args()
+
+ html = search((args.o, args.ot), (args.d, args.dt)).read()
+
+ parser = sParser(html)
+ state = parser.check_page()
+
+ if state == PageType.RESULT:
+ parser = rParser(html)
+ try:
+ overviews = parser.overview
+ for overview in overviews:
+ print '[%s] %s-%s (%s)' % (overview['date'], overview['time'][0], overview['time'][1], overview['duration'])
+ except ParserError as e:
+ print e.message
+ elif state == PageType.CORRECTION:
+ try:
+ cor = parser.get_correction()
+ if cor[0]:
+ print
+ print '* Origin ambiguous:'
+ print '', '\n '.join(cor[0])
+ if cor[1]:
+ print
+ print '* Destination ambiguous:'
+ print '', '\n '.join(cor[1])
+ except ParserError:
+ print 'PANIC at correction page'
+ elif state == PageType.UNKNOWN:
+ print 'PANIC unknown result'
+++ /dev/null
-# -*- coding: utf-8 -*-
-
-import urllib
-import sys
-from datetime import datetime
-import settings
-import webbrowser
-import urllib2
-
-from parseHtml import Parser
-import parseCorrection
-
-from PySide.QtCore import Qt
-from PySide.QtDeclarative import QDeclarativeView
-from BeautifulSoup import BeautifulSoup
-
-def QMLModel(overview, details):
- # Mapping from the "overview" data structure to a "plain" data
- # structure to be used as model for the qml listview
- r = []
- i = 0
- for item in overview:
- d = {
- 'date': item['date'].strftime('%d.%m.%Y') if item['date'] else u'Fußweg',
- 'duration': item['duration'].strftime('%H:%M'),
- 'price': item['price'],
- 'change': item['change'],
- 'details': details[i],
- }
-
- if len(item['time']) == 2 and all(x is not None for x in item['time']):
- d.update({
- 'time_from': item['time'][0].strftime('%H:%M'),
- 'time_to': item['time'][1].strftime('%H:%M'),
- })
- else:
- d.update({'time_from': '-', 'time_to': '-'})
-
- r.append(d)
- i += 1
- return r
-
-
-class Search:
-
- def __init__(self, origin, destination, origin_type='stop', destination_type='stop', parent=None):
- self.origin = origin
- self.destination = destination
- self.origin_type = origin_type
- self.destination_type = destination_type
- self.parent = parent
- self.view = None
- self.qml_model = None
-
- def get_html(self, dtime=None):
- if not dtime:
- dtime = datetime.now()
- #FIXME replace with logger
- print "get_html (%s:%s:%s)" % tuple(dtime.timetuple())[3:6]
- return urllib2.urlopen('%s?%s' % (settings.action, self.get_parameter(dtime)))
-
- def open_browser(self, dtime=datetime.now()):
- webbrowser.open('%s?%s' % (settings.action, self.get_parameter(dtime)))
-
- def open_qml(self, dtime=None):
- if not dtime:
- dtime = datetime.now()
- #FIXME replace with logger
- print "open_qml (%s:%s:%s)" % tuple(dtime.timetuple())[3:6]
- html = self.get_html(dtime)
- if BeautifulSoup(html).find('form', {'id': 'form_fahrplanauskunft'}):
- cor = parseCorrection.Parser(html)
- if p.origins:
- # TODO: Show selection
- pass
- if p.destinations:
- # TODO: Show selection
- pass
- p = Parser(html)
- self.qml_model = QMLModel(p.overview, p.details)
- self.view = QDeclarativeView(self.parent)
- self.view.setWindowTitle('Search results')
- self.view.setWindowFlags(Qt.Window)
- # quick & dirty workaround
- try:
- self.view.setAttribute(Qt.WA_Maemo5StackedWindow)
- except:
- pass
- self.view.setResizeMode(QDeclarativeView.SizeRootObjectToView)
- self.view.setSource('ui/Overview.qml')
- self.view.rootObject().setProperty('model', self.qml_model)
- self.view.show()
-
- def get_datetime(self, dtime):
- return (dtime.strftime('%d.%m.%Y'), dtime.strftime('%H:%M'))
-
- def get_parameter(self, dtime):
- date, time = self.get_datetime(dtime)
-
- post = {'language': 'de',
- 'sessionID': 0,
- 'requestID': 0,
- 'execInst': 'normal',
- 'command': '',
- 'anySigWhenPerfectNoOtherMatches': 1,
- 'itdLPxx_locationServerActive': '',
- 'locationServerActive': 0,
- 'typeInfo_origin': 'invalid',
- 'placeState_origin': 'empty',
- 'placeInfo_origin': 'invalid',
- 'place_origin': 'Wien',
- 'type_origin': self.origin_type, # stop/address/poi
- 'nameState_origin': 'empty',
- 'nameInfo_origin': 'invalid',
- 'anyType_origin': '',
- 'name_origin': self.origin,
- 'typeInfo_destination': 'invalid',
- 'placeState_destination': 'empty',
- 'placeInfo_destination': 'invalid',
- 'place_destination': 'Wien',
- 'type_destination': self.destination_type, # stop/address/poi
- 'nameState_destination': 'empty',
- 'nameInfo_destination': 'invalid',
- 'anyType_destination': '',
- 'name_destination': self.destination,
- 'itdTripDateTimeDepArr': 'dep',
- 'itdDateDayMonthYear': date, # DD.MM.YYYY
- 'itdTime': time, # HH:MM
- 'submitbutton': 'SUCHEN'
- }
-
- params = urllib.urlencode(post)
- return params