cbae26655d21144acb0bcd6349e833433a9d715e
[pywienerlinien] / iTip.py
1 from BeautifulSoup import BeautifulSoup
2 from urllib2 import urlopen
3 import settings
4 from datetime import time
5
6 class iParser:
7
8     def __init__(self):
9         self._stations = {}
10         self._lines = {}
11
12     def get_stations(self, name):
13         """ Get station by direction
14         {'Directionname': [('Station name', 'url')]}
15         """
16         if not self._stations.has_key(name):
17             st = {}
18             
19             if not self.lines.has_key(name):
20                 return None
21             
22             bs = BeautifulSoup(urlopen(self.lines[name]))
23             tables = bs.findAll('table', {'class': 'text_10pix'})
24             for i in range (2):
25                 dir = tables[i].div.contents[-1].strip(' ')
26                 
27                 sta = []
28                 for tr in tables[0].findAll('tr', {'onmouseout': 'obj_unhighlight(this);'}):
29                     if tr.a:
30                         sta.append((tr.a.text, settings.line_overview + tr.a['href']))
31                     else:
32                         sta.append((tr.text.strip(' '), None))
33                     
34                 st[dir] = sta
35             self._stations[name] = st
36
37         return self._stations[name]
38
39     @property
40     def lines(self):
41         """ Dictionary of Line names with url as value
42         """
43         if not self._lines:
44             bs = BeautifulSoup(urlopen(settings.line_overview))
45             # get tables
46             lines = bs.findAll('td', {'class': 'linie'})
47             
48             for line in lines:
49                 if line.a:
50                     href = settings.line_overview + line.a['href']
51                     if line.text:
52                         self._lines[line.text] = href
53                     elif line.img:
54                         self._lines[line.img['alt']] = href
55                         
56         return self._lines
57
58     def get_departures(self, url):
59         """ Get list of next departures
60         integer if time until next departure
61         time if time of next departure
62         """
63         
64         #TODO parse line name and direction for station site parsing
65         
66         bs = BeautifulSoup(urlopen(url))
67         result_lines = bs.findAll('table')[-1].findAll('tr')
68         
69         dep = []
70         for tr in result_lines[1:]:
71             th = tr.findAll('th')
72             if len(th) < 2:
73                 #TODO replace with logger
74                 print "[DEBUG] Unable to find th in:\n%s" % str(tr)
75                 continue
76             
77             # parse time
78             time = th[-2].text.split(' ')
79             if len(time) < 2:
80                 print 'Invalid time: %s' % time
81                 continue
82             
83             time = time[1]
84             
85             if time.isdigit():
86                 # if time to next departure in cell convert to int
87                 dep.append(int(time))
88             else:
89                 # check if time of next departue in cell
90                 t = time.strip('&nbsp;').split(':')
91                 if len(t) == 2 and all(map(lambda x: x.isdigit(), t)):
92                     t = map(int, t)
93                     dep.append(time(*t))
94                 else:
95                     # Unexpected content
96                     #TODO replace with logger
97                     print "[DEBUG] Invalid data:\n%s" % time
98                 
99         return dep