1 from BeautifulSoup import BeautifulSoup, NavigableString
3 from datetime import time, datetime
4 from textwrap import wrap
7 class ParserError(Exception):
8 def __init__(self, value='', code=0):
13 return repr(self.value)
24 def __init__(self, html):
25 self.soup = BeautifulSoup(html)
28 for detail in self.details():
32 def _parse_details(self):
33 if self._current_state < 0:
34 raise ParserError('Unable to parse details while in error state')
36 trips = map(lambda x: map(lambda x: {
38 'time': map(lambda x: (time(*map(lambda x: int(x), x.split(':')))), wrap(x.find('td', {'class': 'col_time'}).text, 5)), # black magic appears
39 'station': map(lambda x: x[2:].strip(),
40 filter(lambda x: type(x) == NavigableString, x.find('td', {'class': 'col_station'}).contents)), # filter non NaviStrings
41 'info': map(lambda x: x.strip(),
42 filter(lambda x: type(x) == NavigableString, x.find('td', {'class': 'col_info'}).contents)),
43 }, x.find('tbody').findAll('tr')),
44 self.soup.findAll('div', {'class': 'data_table tourdetail'})) # all routes
49 """returns list of trip details
50 [ [ { 'time': [datetime.time, datetime.time] if time else [],
51 'station': [u'start', u'end'] if station else [],
52 'info': [u'start station' if station else u'details for walking', u'end station' if station else u'walking duration']
53 }, ... # next trip step
54 ], ... # next trip possibility
58 self._details = self._parse_details()
62 def _parse_overview(self):
64 table = self.soup.find('table', {'id': 'tbl_fahrten'})
66 # check if there is an overview table
67 if table and table.findAll('tr'):
69 rows = table.findAll('tr')[1:] # cut off headline
70 overview = map(lambda x: {
71 'date': datetime.strptime(x.find('td', {'class': 'col_date'}).text, '%d.%m.%Y') # grab date
72 if x.find('td', {'class': 'col_date'}).text else None, # if date is empty set to None
73 'time': map(lambda x: time(*map(lambda x: int(x), x.strip().split(':'))) if x else None, # extract times or set to None if empty
74 x.find('td', {'class': 'col_time'}).text.split('-')) if x.find('td', {'class': 'col_time'}) else [],
75 'duration': time(*map(lambda x: int(x), x.find('td', {'class': 'col_duration'}).text.split(':'))), # grab duration
76 'change': int(x.find('td', {'class': 'col_change'}).text) # grab changes
77 if x.find('td', {'class': 'col_change'}).text else 0, # if change is empty set to 0
78 'price': float(x.find('td', {'class': 'col_price'}).text.replace(',', '.')) # grab price
79 if x.find('td', {'class': 'col_price'}).text.find(',') >= 0 else 0.0, # if price is empty set to 0.0
83 self._current_state = self.STATE_ERROR
84 raise ParserError('Unable to parse details while in error state')
97 if not self._overview:
99 self._overview = self._parse_overview()
100 except AttributeError:
101 f = open('DEBUG', 'w')
105 return self._overview
107 def _check_request_state(self):
108 raise NotImplementedError()
111 def request_state(self):
112 return self._current_state
122 def get_stations(self, letter):
123 if not self._stations.has_key(letter):
124 bs = BeautifulSoup(urllib2.urlopen(settings.stations % letter).read())
125 self._stations[letter] = map(lambda x: x['value'], bs.find('select', {'id': 'letter'}).findAll('option'))
127 return self._stations[letter]
131 bs = BeautifulSoup(urllib2.urlopen(settings.line_overview).read())
133 lines = bs.findAll('table', {'class': 'linie'})
134 # cut line parameter out of href
135 self._lines = map(lambda x: map(lambda x: x['href'][x['href'].find('=') + 1:], x.findAll('a')), lines)