1 from BeautifulSoup import BeautifulSoup, NavigableString
3 from datetime import time, datetime
4 from textwrap import wrap
8 class ParserError(Exception):
9 def __init__(self, value='', code=0):
14 return repr(self.value)
18 STATE_START, STATE_SEARCH, STATE_RESULT = range(3)
20 def __init__(self, html):
21 self.soup = BeautifulSoup(html)
24 self._current_state = 0
27 def get_tdtext(cls, x, cl):
28 return x.find('td', {'class': cl}).text
31 def get_change(cls, x):
32 y = Parser.get_tdtext(x, 'col_change')
39 def get_price(cls, x):
40 y = Parser.get_tdtext(x, 'col_price')
42 return float(y.replace(',', '.'))
48 y = Parser.get_tdtext(x, 'col_date')
50 return datetime.strptime(y, '%d.%m.%Y').date()
56 y = Parser.get_tdtext(x, 'col_time')
59 return map(lambda z: time(*map(int, z.split(':'))), y.split('-'))
61 return map(lambda z: time(*map(int, z.split(':'))), wrap(y, 5))
66 def get_duration(cls, x):
67 y = Parser.get_tdtext(x, 'col_duration')
69 return time(*map(int, y.split(":")))
74 for detail in self.details():
77 def _parse_details(self):
78 if self._current_state < 0:
79 raise ParserError('Unable to parse details while in error state')
81 tours = self.soup.findAll('div', {'class': 'data_table tourdetail'})
83 trips = map(lambda x: map(lambda y: {
84 'time': Parser.get_time(y),
85 'station': map(lambda z: z[2:].strip(),
86 filter(lambda x: type(x) == NavigableString, y.find('td', {'class': 'col_station'}).contents)), # filter non NaviStrings
87 'info': map(lambda x: x.strip(),
88 filter(lambda z: type(z) == NavigableString, y.find('td', {'class': 'col_info'}).contents)),
89 }, x.find('tbody').findAll('tr')),
95 """returns list of trip details
96 [ [ { 'time': [datetime.time, datetime.time] if time else [],
97 'station': [u'start', u'end'] if station else [],
98 'info': [u'start station' if station else u'details for walking', u'end station' if station else u'walking duration']
99 }, ... # next trip step
100 ], ... # next trip possibility
103 if not self._details:
104 self._details = self._parse_details()
108 def _parse_overview(self):
111 table = self.soup.find('table', {'id': 'tbl_fahrten'})
113 # check if there is an overview table
114 if table and table.findAll('tr'):
116 rows = table.findAll('tr')[1:] # cut off headline
118 overview = map(lambda x: {
119 'date': Parser.get_date(x),
120 'time': Parser.get_time(x),
121 'duration': Parser.get_duration(x), # grab duration
122 'change': Parser.get_change(x),
123 'price': Parser.get_price(x),
127 #self._current_state = self.STATE_ERROR
128 raise ParserError('Unable to parse details')
141 if not self._overview:
143 self._overview = self._parse_overview()
144 except AttributeError:
145 f = open('DEBUG', 'w')
146 f.write(str(self.soup))
149 return self._overview
151 def _check_request_state(self):
152 raise NotImplementedError()
155 def request_state(self):
156 return self._current_state
165 def get_stations(self, letter):
166 if not self._stations.has_key(letter):
167 bs = BeautifulSoup(urllib2.urlopen(settings.stations % letter))
168 self._stations[letter] = map(lambda x: x['value'], bs.find('select', {'id': 'letter'}).findAll('option'))
170 return self._stations[letter]
174 bs = BeautifulSoup(urllib2.urlopen(settings.line_overview))
176 lines = bs.findAll('table', {'class': 'linie'})
177 # cut line parameter out of href
178 self._lines = map(lambda x: map(lambda x: x['href'][x['href'].find('=') + 1:], x.findAll('a')), lines)