9565fd3390f7e95a9559ac01c8df2dc117362f68
[pywienerlinien] / gotovienna / realtime.py
1 # -*- coding: utf-8 -*-
2
3 from BeautifulSoup import BeautifulSoup
4 from urllib2 import urlopen
5 import settings
6 from datetime import time
7 import argparse
8 import re
9
10 class ITipParser:
11     def __init__(self):
12         self._stations = {}
13         self._lines = {}
14
15     def get_stations(self, name):
16         """ Get station by direction
17         {'Directionname': [('Station name', 'url')]}
18         """
19         if not self._stations.has_key(name):
20             st = {}
21
22             if not self.lines.has_key(name):
23                 return None
24
25             bs = BeautifulSoup(urlopen(self.lines[name]))
26             tables = bs.findAll('table', {'class': 'text_10pix'})
27             for i in range(2):
28                 dir = tables[i].div.contents[-1].strip(' ')
29
30                 sta = []
31                 for tr in tables[i].findAll('tr', {'onmouseout': 'obj_unhighlight(this);'}):
32                     if tr.a:
33                         sta.append((tr.a.text, settings.line_overview + tr.a['href']))
34                     else:
35                         sta.append((tr.text.strip(' '), None))
36
37                 st[dir] = sta
38             self._stations[name] = st
39
40         return self._stations[name]
41
42     @property
43     def lines(self):
44         """ Dictionary of Line names with url as value
45         """
46         if not self._lines:
47             bs = BeautifulSoup(urlopen(settings.line_overview))
48             # get tables
49             lines = bs.findAll('td', {'class': 'linie'})
50
51             for line in lines:
52                 if line.a:
53                     href = settings.line_overview + line.a['href']
54                     if line.text:
55                         self._lines[line.text] = href
56                     elif line.img:
57                         self._lines[line.img['alt']] = href
58
59         return self._lines
60
61     def get_departures(self, url):
62         """ Get list of next departures
63         integer if time until next departure
64         time if time of next departure
65         """
66
67         #TODO parse line name and direction for station site parsing
68
69         if not url:
70             # FIXME prevent from calling this method with None
71             return []
72
73         bs = BeautifulSoup(urlopen(url))
74         result_lines = bs.findAll('table')[-1].findAll('tr')
75
76         dep = []
77         for tr in result_lines[1:]:
78             th = tr.findAll('th')
79             if len(th) < 2:
80                 #TODO replace with logger
81                 print "[DEBUG] Unable to find th in:\n%s" % str(tr)
82                 continue
83
84             # parse time
85             time = th[-2].text.split(' ')
86             if len(time) < 2:
87                 print 'Invalid time: %s' % time
88                 continue
89
90             time = time[1]
91
92             if time.find('rze...') >= 0:
93                     dep.append(0)
94             elif time.isdigit():
95                 # if time to next departure in cell convert to int
96                 dep.append(int(time))
97             else:
98                 # check if time of next departue in cell
99                 t = time.strip('&nbsp;').split(':')
100                 if len(t) == 2 and all(map(lambda x: x.isdigit(), t)):
101                     t = map(int, t)
102                     dep.append(time(*t))
103                 else:
104                     # Unexpected content
105                     #TODO replace with logger
106                     print "[DEBUG] Invalid data:\n%s" % time
107
108         return dep
109