X-Git-Url: http://vcs.maemo.org/git/?a=blobdiff_plain;f=debian%2Fpystan%2Fusr%2Flib%2Fpython2.5%2Fsite-packages%2Fpystan%2Flib%2Ftimetable_parser.py;fp=debian%2Fpystan%2Fusr%2Flib%2Fpython2.5%2Fsite-packages%2Fpystan%2Flib%2Ftimetable_parser.py;h=132ba7139c3192ba45483302fe20f4834b0faeac;hb=a0f2cf4120a390bd527b3787775ad564fd04d2e6;hp=0000000000000000000000000000000000000000;hpb=ff6d19caa498ee5aa72d76fc594847886eaf567e;p=pystan diff --git a/debian/pystan/usr/lib/python2.5/site-packages/pystan/lib/timetable_parser.py b/debian/pystan/usr/lib/python2.5/site-packages/pystan/lib/timetable_parser.py new file mode 100644 index 0000000..132ba71 --- /dev/null +++ b/debian/pystan/usr/lib/python2.5/site-packages/pystan/lib/timetable_parser.py @@ -0,0 +1,82 @@ + +import re + +from HTMLParser import HTMLParser + +class StanTimetableParser(HTMLParser): + + def __init__(self): + HTMLParser.__init__(self) + + self.result = { + 'navigation': {}, + 'timetable': [] + } + + self.current_tt_line = None + self.state = None + self.TT_CAPTURING_TIMETABLE = 'TT_CAPTURING_TIMETABLE' + self.TT_CAPTURING_NAVIGATION = 'TT_CAPTURING_NAVIGATION' + self.TT_STOP_HOUR = 'TT_STOP_HOUR' + self.TT_STOP_NAME = 'TT_STOP_NAME' + self.TT_NAVIG_PREV = 'TT_NAVIG_PREV' + self.TT_NAVIG_NEXT = 'TT_NAVIG_NEXT' + self.TT_NAVIG_PREV_LINK = 'TT_NAVIG_PREV_LINK' + self.TT_NAVIG_NEXT_LINK = 'TT_NAVIG_NEXT_LINK' + + def handle_starttag(self, tag, attrs): + self.last_tag = tag + + attributes = {} + for attr in attrs: + attributes[attr[0]] = attr[1] + + if tag == 'div' and attributes.has_key('class') and attributes['class'] == 'goatResultTop': + self.state = self.TT_CAPTURING_NAVIGATION + + elif tag == 'table' and attributes.has_key('id') and attributes['id'] == 'linehour': + self.state = self.TT_CAPTURING_TIMETABLE + + elif self.state == self.TT_CAPTURING_TIMETABLE: + if tag == 'tr': + self.current_tt_line = [] + elif tag == 'strong': + self.state = self.TT_STOP_NAME + elif tag == 'td' and attributes.has_key('class') and 'hour' in attributes['class']: + self.state = self.TT_STOP_HOUR + + elif self.state == self.TT_CAPTURING_NAVIGATION: + if tag == 'div' and attributes.has_key('class') and attributes['class'] == 'linehourPrev': + self.state = self.TT_NAVIG_PREV + elif tag == 'div' and attributes.has_key('class') and attributes['class'] == 'linehourNext': + self.state = self.TT_NAVIG_NEXT + + elif self.state == self.TT_NAVIG_PREV and tag == 'a': + self.result['navigation']['prev'] = attributes['href'] + self.state = self.TT_CAPTURING_NAVIGATION + + elif self.state == self.TT_NAVIG_NEXT and tag == 'a': + self.result['navigation']['next'] = attributes['href'] + self.state = self.TT_CAPTURING_NAVIGATION + + + + def handle_data(self, data): + if self.state == self.TT_STOP_HOUR: + self.current_tt_line.append(data) + self.state = self.TT_CAPTURING_TIMETABLE + + elif self.state == self.TT_STOP_NAME: + # remove in-parenthesis + data = re.compile('^[^\(]+').match(data).group() + self.current_tt_line.append(data) + self.state = self.TT_CAPTURING_TIMETABLE + + + + def handle_endtag(self, tag): + if tag == 'tr' and self.state == self.TT_CAPTURING_TIMETABLE and self.current_tt_line is not None and len(self.current_tt_line) > 0: + self.result['timetable'].append(self.current_tt_line) + + elif tag == 'table' and self.state == self.TT_CAPTURING_TIMETABLE: + self.state = None