2 # -*- coding: UTF-8 -*-
4 from BeautifulSoup import BeautifulSoup, NavigableString
5 from urllib2 import urlopen
6 from urllib import urlencode
7 from datetime import datetime, time
8 from textwrap import wrap
13 from gotovienna import defaults
15 POSITION_TYPES = ('stop', 'address', 'poi')
17 DEBUGLOG = os.path.expanduser('~/gotoVienna.debug')
19 class ParserError(Exception):
21 def __init__(self, msg='Parser error'):
25 UNKNOWN, CORRECTION, RESULT = range(3)
28 def search(origin_tuple, destination_tuple, dtime=None):
29 """ build route request
30 returns html result (as urllib response)
33 dtime = datetime.now()
35 origin, origin_type = origin_tuple
36 destination, destination_type = destination_tuple
37 if not origin_type in POSITION_TYPES or\
38 not destination_type in POSITION_TYPES:
39 raise ParserError('Invalid position type')
41 post = defaults.search_post
42 post['name_origin'] = origin
43 post['type_origin'] = origin_type
44 post['name_destination'] = destination
45 post['type_destination'] = destination_type
46 post['itdDateDayMonthYear'] = dtime.strftime('%d.%m.%Y')
47 post['itdTime'] = dtime.strftime('%H:%M')
48 params = urlencode(post)
49 url = '%s?%s' % (defaults.action, params)
52 f = open(DEBUGLOG, 'a')
56 print 'Unable to write to DEBUGLOG: %s' % DEBUGLOG
62 """ Parser for search response
65 def __init__(self, html):
66 self.soup = BeautifulSoup(html)
69 if self.soup.find('form', {'id': 'form_efaresults'}):
70 return PageType.RESULT
72 if self.soup.find('div', {'class':'form_error'}):
73 return PageType.CORRECTION
75 return PageType.UNKNOWN
77 def get_correction(self):
78 nlo = self.soup.find('select', {'id': 'nameList_origin'})
79 nld = self.soup.find('select', {'id': 'nameList_destination'})
81 if not nlo and not nld:
82 raise ParserError('Unable to parse html')
85 origin = map(lambda x: x.text, nlo.findAll('option'))
89 destination = map(lambda x: x.text, nld.findAll('option'))
93 return (origin, destination)
96 return rParser(str(self.soup))
101 """ Parser for routing results
104 def __init__(self, html):
105 self.soup = BeautifulSoup(html)
106 self._overview = None
110 def get_tdtext(cls, x, cl):
111 return x.find('td', {'class': cl}).text
114 def get_change(cls, x):
115 y = rParser.get_tdtext(x, 'col_change')
122 def get_price(cls, x):
123 y = rParser.get_tdtext(x, 'col_price')
127 return float(y.replace(',', '.'))
132 def get_date(cls, x):
133 y = rParser.get_tdtext(x, 'col_date')
135 return datetime.strptime(y, '%d.%m.%Y').date()
140 def get_time(cls, x):
141 y = rParser.get_tdtext(x, 'col_time')
143 if (y.find("-") > 0):
144 return map(lambda z: time(*map(int, z.split(':'))), y.split('-'))
146 return map(lambda z: time(*map(int, z.split(':'))), wrap(y, 5))
151 def get_duration(cls, x):
152 y = rParser.get_tdtext(x, 'col_duration')
154 return time(*map(int, y.split(":")))
159 for detail in self.details():
162 def _parse_details(self):
163 tours = self.soup.findAll('div', {'class': 'data_table tourdetail'})
165 trips = map(lambda x: map(lambda y: {
166 'time': rParser.get_time(y),
167 'station': map(lambda z: z[2:].strip(),
168 filter(lambda x: type(x) == NavigableString, y.find('td', {'class': 'col_station'}).contents)), # filter non NaviStrings
169 'info': map(lambda x: x.strip(),
170 filter(lambda z: type(z) == NavigableString, y.find('td', {'class': 'col_info'}).contents)),
171 }, x.find('tbody').findAll('tr')),
177 """returns list of trip details
178 [ [ { 'time': [datetime.time, datetime.time] if time else [],
179 'station': [u'start', u'end'] if station else [],
180 'info': [u'start station' if station else u'details for walking', u'end station' if station else u'walking duration']
181 }, ... # next trip step
182 ], ... # next trip possibility
185 if not self._details:
186 self._details = self._parse_details()
190 def _parse_overview(self):
193 table = self.soup.find('table', {'id': 'tbl_fahrten'})
195 # check if there is an overview table
196 if table and table.findAll('tr'):
198 rows = table.findAll('tr')[1:] # cut off headline
200 overview = map(lambda x: {
201 'date': rParser.get_date(x),
202 'time': rParser.get_time(x),
203 'duration': rParser.get_duration(x), # grab duration
204 'change': rParser.get_change(x),
205 'price': rParser.get_price(x),
209 raise ParserError('Unable to parse overview')
222 if not self._overview:
224 self._overview = self._parse_overview()
225 except AttributeError:
226 f = open(DEBUGLOG, 'w')
227 f.write(str(self.soup))
230 return self._overview