Add movie action class
[cinaest] / src / plugins / google-parser.vala
1 /* This file is part of Cinaest.
2  *
3  * Copyright (C) 2009 Philipp Zabel
4  *
5  * Cinaest is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * Cinaest is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with Cinaest. If not, see <http://www.gnu.org/licenses/>.
17  */
18
19 errordomain ParserError {
20         WRONG_TAG,
21         EOF
22 }
23
24 public class GoogleMovie : Movie {
25         public string cinema;
26         public string runtime;
27         public string fsk;
28         public string showtimes;
29 }
30
31 public class GoogleParser : Object {
32         private MovieSource.ReceiveMovieFunction _get_callback;
33         char *current;
34         string cinema_name;
35         MovieFilter _filter;
36         PatternSpec pattern;
37
38         public int next_tag_offset () {
39                 int i = -1;
40                 while (current[++i] != '<' && current[i] != 0);
41                 return i;
42         }
43
44         public void next_tag () {
45                 if (current[0] == 0)
46                         return;
47                 current += next_tag_offset ();
48         }
49
50         public void finish_tag () {
51                 while (current[0] != '>' && current[0] != 0)
52                         current++;
53                 if (current[0] == '>')
54                         current++;
55         }
56
57         public weak string parse_tag (bool finish = true) throws Error {
58                 weak string tag;
59                 next_tag ();
60                 int i = 1;
61                 while (current[++i].isalnum ());
62                 if (current[i] == 0)
63                         throw new ParserError.EOF ("EOF in tag");
64                 if (current[i] == '>')
65                         finish = false;
66                 current[i] = 0;
67                 tag = (string) (current + 1);
68                 current += i + 1;
69                 if (finish)
70                         finish_tag ();
71                 return tag;
72         }
73
74         public void expect_tag (string tag) throws Error {
75                 var found = parse_tag (true);
76                 if (tag != found) {
77                         throw new ParserError.WRONG_TAG ("Wrong tag \"%s\", expected \"%s\"",
78                                                          found, tag);
79                 }
80         }
81
82         public string parse_text () {
83                 string text = ((string) current).ndup (next_tag_offset ());
84                 next_tag ();
85                 return text;
86         }
87
88         public void parse_attribute (string _attr, out string value) {
89                 string attr;
90                 if (current[0] == 0)
91                         return;
92                 int i = -1;
93                 while (current[++i] != '=' && current[i] != '>' && current[i] != 0) {
94                         
95                 }
96                 attr = ((string) current).ndup (i);
97                 current += i;
98                 if (current[0] == 0)
99                         return;
100                 current++;
101                 i = -1;
102                 while (!current[++i].isspace () && current[i] != '>' && current[i] != 0) {
103                         if (current[i] == '"')
104                                 while (current[++i] != '"' && current[i] != 0);
105                 }
106                 if (attr == _attr) {
107                         if (current[0] == '"')
108                                 value = ((string) current).substring (1, i - 2);
109                         else
110                                 value = ((string) current).ndup (i);
111                 }
112                 current += i;
113         }
114
115         public void skip_whitespace () {
116                 if (current[0] == 0)
117                         return;
118                 int i = -1;
119                 while (current[++i].isspace () && current[i] != 0);
120                 current += i;
121         }
122
123         public string? parse_tag_attribute (string tag, string attribute) throws Error {
124                 var found = parse_tag (false);
125                 if (tag != found) {
126                         throw new ParserError.WRONG_TAG ("Wrong tag \"%s\", expected \"%s\"",
127                                                          found, tag);
128                 }
129
130                 string? value = null;
131                 skip_whitespace ();
132                 while (current[0] != '>' && current[0] != 0) {
133                         parse_attribute (attribute, out value);
134                         skip_whitespace ();
135                 }
136                 // Skip the closing '>' bracket
137                 if (current[0] != 0)
138                         current++;
139
140                 return value;
141         }
142
143         public void parse_movie () throws Error {
144                 expect_tag ("div"); // class=movie
145                 expect_tag ("div"); // class=name
146                 expect_tag ("a"); // href="/movies?near=city&amp;mid=..."
147                 expect_tag ("span"); // dir=ltr
148                 var title = convert (parse_text ().replace ("&#39;", "'").replace ("&amp;", "&"), -1, "utf-8", "iso-8859-1"); // FIXME
149                 expect_tag ("/span");
150                 expect_tag ("/a");
151                 expect_tag ("/div");
152                 expect_tag ("span"); // class=info
153                 string[] runtime_and_fsk = {};
154                 double rating = 0.0;
155                 if (parse_tag () == "nobr") {
156                         expect_tag ("nobr");
157                         weak string rating_string = parse_tag_attribute ("img", "alt").offset (6); // "Rated " ->"0.0 out of 5.0"
158                         rating = rating_string.to_double ();
159                         expect_tag ("img");
160                         expect_tag ("img");
161                         expect_tag ("img");
162                         expect_tag ("img");
163                         expect_tag ("/nobr");
164                         expect_tag ("/nobr");
165                         runtime_and_fsk = parse_text ().replace ("&#8206;", "").offset (3).split (" - ");
166                         expect_tag ("/span");
167                 }
168                 expect_tag ("div"); // class=times
169                 var showtimes = parse_text ().replace ("&nbsp;", ",");
170                 expect_tag ("/div");
171                 expect_tag ("/div");
172
173                 if (pattern == null) {
174                         if (!title.has_prefix (_filter.title))
175                                 return;
176                 } else {
177                         if (!pattern.match ((uint) title.length, title, null))
178                                 return;
179                 }
180
181                 var movie = new GoogleMovie ();
182
183                 movie.title = strip_tags (title);
184                 movie.year = 0;
185                 movie.rating = (int) (rating * 10);
186
187                 movie.cinema = cinema_name;
188                 if (runtime_and_fsk.length >= 2) {
189                         movie.runtime = runtime_and_fsk[0];
190                         movie.fsk = runtime_and_fsk[1];
191                 }
192                 movie.showtimes = showtimes;
193
194                 // TODO - could be configurable by settings
195                 if (movie.runtime != null)
196                         movie.secondary = "%s - %s - %s".printf (movie.runtime, cinema_name, showtimes);
197                 else
198                         movie.secondary = "%s - %s".printf (cinema_name, showtimes);
199
200                 _get_callback (movie);
201         }
202
203         // FIXME - this is specific for Germany
204         private string strip_tags (string title) {
205                 string tag_suffix = " (OmU)"; // original audio with subtitles
206                 if (title.has_suffix (tag_suffix))
207                         return title.substring (0, title.length - tag_suffix.length);
208                 tag_suffix = " (OV)"; // original audio
209                 if (title.has_suffix (tag_suffix))
210                         return title.substring (0, title.length - tag_suffix.length);
211                 return title.dup ();
212         }
213
214         public void parse_cinema () throws Error {
215                 expect_tag ("div"); // class=theater
216                 expect_tag ("div"); // class=desc id=theater_...
217                 expect_tag ("h2"); // class=name
218                 expect_tag ("a"); // href="/movies?near=city&amp;tid=..."
219                 expect_tag ("span"); // dir=ltr
220                 string name = convert (parse_text ().replace ("&amp;", "&"), -1, "utf-8", "iso-8859-1"); // FIXME
221                 expect_tag ("/span");
222                 expect_tag ("/a");
223                 expect_tag ("/h2");
224                 expect_tag ("div"); // class=info
225                 var address_and_phone = parse_text ().replace ("&nbsp;", " ").split (" - ");
226                 if (address_and_phone.length >= 2) {
227                 //      string address = address_and_phone[0];
228                 //      string phone = address_and_phone[1];
229                 }
230                 expect_tag ("a"); // target=_top
231                 expect_tag ("/a");
232                 expect_tag ("/div");
233                 expect_tag ("/div");
234
235                 cinema_name = name;
236                 // FIXME - store cinema address for movie detail window
237         }
238
239         public void parse (ref char[] buf) throws Error {
240                 current = buf;
241                 next_tag ();
242
243                 {
244                         int i = 1;
245                         while (current[i++] != '>');
246                         if (((string) current).has_prefix ("<a href=\"/movies?near=")) {
247                                 weak string href = parse_tag_attribute ("a", "href").offset (13); // "/movies?near=" ->"berlin&amp;date=1"
248                                 // TODO - extract location and cache it
249                         }
250                         current += i;
251                         next_tag ();
252                 }
253                 while (current[0] != 0) {
254                         int i = 1;
255                         while (current[i++] != '>');
256                         if (((string) current).has_prefix ("<div class=movie>")) {
257                                 parse_movie ();
258                         } else if (((string) current).has_prefix("<div class=theater>")) {
259                                 parse_cinema ();
260                         } else {
261                                 current += i;
262                         }
263                         next_tag ();
264                 }
265         }
266
267         public async void query (MovieFilter filter, string? location, MovieSource.ReceiveMovieFunction callback, Cancellable? cancellable) {
268                 _get_callback = callback;
269                 _filter = filter;
270                 if (filter.title.chr(filter.title.length, '*') != null) {
271                         pattern = new PatternSpec (filter.title);
272                 } else {
273                         pattern = null;
274                 }
275                 try {
276                         // TODO - use google.de in Germany, also provides genres
277                         string uri = "http://google.com/movies";
278                         if (location != null && location != "")
279                                 uri += "?near=" + location;
280
281                         stdout.printf ("GET: %s\n", uri);
282
283                         File file = File.new_for_uri (uri);
284                         InputStream stream = yield file.read_async (Priority.DEFAULT_IDLE, null);
285
286                         char[] buf = new char[256*1024];
287                         size_t nread;
288                         size_t total = 0;
289                         while (total < 256*1024) {
290                                 nread = yield stream.read_async ((char *)buf + total, 256*1024 - total, Priority.DEFAULT_IDLE, cancellable);
291                                 total += nread;
292                                 if (cancellable.is_cancelled ())
293                                         return;
294                                 if (nread == 0)
295                                         break;
296                         }
297                         buf[total] = 0;
298                         parse (ref buf);
299                 } catch (Error e) {
300                         stderr.printf ("Error: %s\n", e.message);
301                 }
302         }
303 }