1 from BeautifulSoup import BeautifulSoup, NavigableString
2 from urllib2 import urlopen
3 from urllib import urlencode
5 from datetime import datetime, time
6 from textwrap import wrap
8 POSITION_TYPES = ('stop', 'address', 'poi')
10 class ParserException(Exception):
12 def __init__(self, msg = 'Parser error'):
16 UNKNOWN, CORRECTION, RESULT = range(3)
19 def search(origin_tuple, destination_tuple, dtime=None):
20 """ build route request
21 returns html result (as urllib response)
24 dtime = datetime.now()
26 origin, origin_type = origin_tuple
27 destination, destination_type = destination_tuple
28 if not origin_type in POSITION_TYPES or\
29 not destination_type in POSITION_TYPES:
30 raise ParserException('Invalid position type')
32 post = settings.search_post
33 post['name_origin'] = origin
34 post['type_origin'] = origin_type
35 post['name_destination'] = destination
36 post['type_destination'] = destination_type
37 post['itdDateDayMonthYear'] = dtime.strftime('%d.%m.%Y')
38 post['itdTime'] = dtime.strftime('%H:%M')
39 params = urlencode(post)
40 return urlopen('%s?%s' % (settings.action, params))
44 """ Parser for search response
47 def __init__(self, html):
48 self.bs = BeautifulSoup(html)
51 if self.bs.find('form', {'id': 'form_efaresults'}):
52 return PageType.RESULT
54 if self.bs.find('div', {'class':'form_error'}):
55 return PageType.CORRECTION
57 return PageType.UNKNOWN
59 def get_correction(self):
60 nlo = self.bs.find('select', {'id': 'nameList_origin'})
61 nld = self.bs.find('select', {'id': 'nameList_destination'})
63 if not nlo or not nld:
64 raise ParserError('Unable to parse html')
66 origin = nlo.findAll('option')
67 destination = nld.findAll('option')
74 return (origin, destination)
77 return rParser(str(self.bs))
82 """ Parser for routing results
85 def __init__(self, html):
86 self.soup = BeautifulSoup(html)
91 def get_tdtext(cls, x, cl):
92 return x.find('td', {'class': cl}).text
95 def get_change(cls, x):
96 y = rParser.get_tdtext(x, 'col_change')
103 def get_price(cls, x):
104 y = rParser.get_tdtext(x, 'col_price')
106 return float(y.replace(',', '.'))
111 def get_date(cls, x):
112 y = rParser.get_tdtext(x, 'col_date')
114 return datetime.strptime(y, '%d.%m.%Y').date()
119 def get_time(cls, x):
120 y = rParser.get_tdtext(x, 'col_time')
122 if (y.find("-") > 0):
123 return map(lambda z: time(*map(int, z.split(':'))), y.split('-'))
125 return map(lambda z: time(*map(int, z.split(':'))), wrap(y, 5))
130 def get_duration(cls, x):
131 y = rParser.get_tdtext(x, 'col_duration')
133 return time(*map(int, y.split(":")))
138 for detail in self.details():
141 def _parse_details(self):
142 tours = self.soup.findAll('div', {'class': 'data_table tourdetail'})
144 trips = map(lambda x: map(lambda y: {
145 'time': rParser.get_time(y),
146 'station': map(lambda z: z[2:].strip(),
147 filter(lambda x: type(x) == NavigableString, y.find('td', {'class': 'col_station'}).contents)), # filter non NaviStrings
148 'info': map(lambda x: x.strip(),
149 filter(lambda z: type(z) == NavigableString, y.find('td', {'class': 'col_info'}).contents)),
150 }, x.find('tbody').findAll('tr')),
156 """returns list of trip details
157 [ [ { 'time': [datetime.time, datetime.time] if time else [],
158 'station': [u'start', u'end'] if station else [],
159 'info': [u'start station' if station else u'details for walking', u'end station' if station else u'walking duration']
160 }, ... # next trip step
161 ], ... # next trip possibility
164 if not self._details:
165 self._details = self._parse_details()
169 def _parse_overview(self):
172 table = self.soup.find('table', {'id': 'tbl_fahrten'})
174 # check if there is an overview table
175 if table and table.findAll('tr'):
177 rows = table.findAll('tr')[1:] # cut off headline
179 overview = map(lambda x: {
180 'date': rParser.get_date(x),
181 'time': rParser.get_time(x),
182 'duration': rParser.get_duration(x), # grab duration
183 'change': rParser.get_change(x),
184 'price': rParser.get_price(x),
188 raise ParserError('Unable to parse details')
201 if not self._overview:
203 self._overview = self._parse_overview()
204 except AttributeError:
205 f = open('DEBUG', 'w')
206 f.write(str(self.soup))
209 return self._overview