1 from BeautifulSoup import BeautifulSoup, NavigableString
3 from datetime import time, datetime
4 from textwrap import wrap
7 class ParserError(Exception):
8 def __init__(self, value='', code=0):
13 return repr(self.value)
17 def __init__(self, html):
18 self.soup = BeautifulSoup(html)
23 def get_tdtext(cls, x, cl):
24 return x.find('td', {'class': cl}).text
27 def get_change(cls, x):
28 y = Parser.get_tdtext(x, 'col_change')
35 def get_price(cls, x):
36 y = Parser.get_tdtext(x, 'col_price')
38 return float(y.replace(',', '.'))
44 y = Parser.get_tdtext(x, 'col_date')
46 return datetime.strptime(y, '%d.%m.%Y').date()
52 y = Parser.get_tdtext(x, 'col_time')
55 return map(lambda z: time(*map(int, z.split(':'))), y.split('-'))
57 return map(lambda z: time(*map(int, z.split(':'))), wrap(y, 5))
62 def get_duration(cls, x):
63 y = Parser.get_tdtext(x, 'col_duration')
65 return time(*map(int, y.split(":")))
70 for detail in self.details():
73 def _parse_details(self):
74 if self._current_state < 0:
75 raise ParserError('Unable to parse details while in error state')
77 tours = self.soup.findAll('div', {'class': 'data_table tourdetail'})
79 trips = map(lambda x: map(lambda y: {
80 'time': Parser.get_time(y),
81 'station': map(lambda z: z[2:].strip(),
82 filter(lambda x: type(x) == NavigableString, y.find('td', {'class': 'col_station'}).contents)), # filter non NaviStrings
83 'info': map(lambda x: x.strip(),
84 filter(lambda z: type(z) == NavigableString, y.find('td', {'class': 'col_info'}).contents)),
85 }, x.find('tbody').findAll('tr')),
91 """returns list of trip details
92 [ [ { 'time': [datetime.time, datetime.time] if time else [],
93 'station': [u'start', u'end'] if station else [],
94 'info': [u'start station' if station else u'details for walking', u'end station' if station else u'walking duration']
95 }, ... # next trip step
96 ], ... # next trip possibility
100 self._details = self._parse_details()
104 def _parse_overview(self):
107 table = self.soup.find('table', {'id': 'tbl_fahrten'})
109 # check if there is an overview table
110 if table and table.findAll('tr'):
112 rows = table.findAll('tr')[1:] # cut off headline
114 overview = map(lambda x: {
115 'date': Parser.get_date(x),
116 'time': Parser.get_time(x),
117 'duration': Parser.get_duration(x), # grab duration
118 'change': Parser.get_change(x),
119 'price': Parser.get_price(x),
123 #self._current_state = self.STATE_ERROR
124 raise ParserError('Unable to parse details')
137 if not self._overview:
139 self._overview = self._parse_overview()
140 except AttributeError:
141 f = open('DEBUG', 'w')
142 f.write(str(self.soup))
145 return self._overview
147 def _check_request_state(self):
148 raise NotImplementedError()
151 def request_state(self):
152 return self._current_state