1 /* This file is part of Cinaest.
3 * Copyright (C) 2009 Philipp Zabel
5 * Cinaest is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
10 * Cinaest is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with Cinaest. If not, see <http://www.gnu.org/licenses/>.
19 errordomain ParserError {
26 public string address;
29 public Cinema (string _name) {
34 public class GoogleMovie {
37 public string secondary;
39 public string runtime;
41 public string showtimes;
44 public class GoogleParser : Object {
47 public string location;
51 public delegate void ReceiveMovie (GoogleMovie movie);
52 public ReceiveMovie _get_callback;
54 public int next_tag_offset () {
56 while (current[++i] != '<' && current[i] != 0);
60 public void next_tag () {
63 current += next_tag_offset ();
66 public void finish_tag () {
67 while (current[0] != '>' && current[0] != 0)
69 if (current[0] == '>')
73 public weak string parse_tag (bool finish = true) throws Error {
77 while (current[++i].isalnum ());
79 throw new ParserError.EOF ("EOF in tag");
80 if (current[i] == '>')
83 tag = (string) (current + 1);
90 public void expect_tag (string tag) throws Error {
91 var found = parse_tag (true);
93 throw new ParserError.WRONG_TAG ("Wrong tag \"%s\", expected \"%s\"",
98 public string parse_text () {
99 string text = ((string) current).ndup (next_tag_offset ());
104 public void parse_attribute (string _attr, out string value) {
109 while (current[++i] != '=' && current[i] != '>' && current[i] != 0) {
112 attr = ((string) current).ndup (i);
118 while (!current[++i].isspace () && current[i] != '>' && current[i] != 0) {
119 if (current[i] == '"')
120 while (current[++i] != '"' && current[i] != 0);
123 if (current[0] == '"')
124 value = ((string) current).substring (1, i - 2);
126 value = ((string) current).ndup (i);
131 public void skip_whitespace () {
135 while (current[++i].isspace () && current[i] != 0);
139 public string? parse_tag_attribute (string tag, string attribute) throws Error {
140 var found = parse_tag (false);
142 throw new ParserError.WRONG_TAG ("Wrong tag \"%s\", expected \"%s\"",
146 string? value = null;
148 while (current[0] != '>' && current[0] != 0) {
149 parse_attribute (attribute, out value);
152 // Skip the closing '>' bracket
159 public string unescape_unicode (string s) {
164 for (i = 0; i < l; i++) {
165 if (s[i] == '&' && s[i + 1] == '#') {
166 for (j = i + 2; j < l; j++) {
167 if (!s[j].isdigit ())
173 int codepoint = s.substring (i + 2, j - i - 2).to_int ();
174 char[] buf = new char[6];
175 ((unichar) codepoint).to_utf8 ((string) buf);
176 result += (string) buf;
181 if (s.offset (i).has_prefix ("&")) {
186 if (s.offset (i).has_prefix (""")) {
191 result += s.substring (i, 1);
197 public void parse_movie () throws Error {
198 expect_tag ("div"); // class=movie
199 expect_tag ("div"); // class=name
200 expect_tag ("a"); // href="/movies?near=city&mid=..."
201 expect_tag ("span"); // dir=ltr
202 var title = unescape_unicode (convert (parse_text (), -1, "utf-8", "iso-8859-1")); // FIXME
203 expect_tag ("/span");
206 expect_tag ("span"); // class=info
207 string[] runtime_and_fsk = {};
209 var tag = parse_tag ();
222 string rating_string = parse_tag_attribute ("img", "alt").offset (6); // "Rated " ->"0.0 out of 5.0"
223 rating = rating_string.to_double ();
228 expect_tag ("/nobr");
229 expect_tag ("/nobr");
230 runtime_and_fsk = parse_text ().replace ("‎", "").offset (3).split (" - ");
231 if (parse_tag () == "a") {
234 if (parse_tag () == "a") {
237 expect_tag ("/span");
241 expect_tag ("div"); // class=times
242 var showtimes = parse_text ().replace (" ", ",");
243 while (parse_tag () == "a") {
244 showtimes += parse_text () + ",";
248 if (pattern == null) {
249 if (!title.has_prefix (_title))
252 if (!pattern.match ((uint) title.length, title, null))
256 var movie = new GoogleMovie ();
258 movie.title = strip_tags (title).replace ("\"", "\\\"");
259 movie.rating = (int) (rating * 10);
261 movie.cinema = last_cinema;
262 if (runtime_and_fsk.length >= 2) {
263 movie.runtime = runtime_and_fsk[0];
264 movie.fsk = runtime_and_fsk[1];
266 movie.showtimes = showtimes;
268 // TODO - could be configurable by settings
269 if (movie.runtime != null)
270 movie.secondary = "%s - %s - %s".printf (movie.runtime, last_cinema.name, showtimes);
272 movie.secondary = "%s - %s".printf (last_cinema.name, showtimes);
274 _get_callback (movie);
277 // FIXME - this is specific for Germany
278 private string strip_tags (string title) {
279 string tag_suffix = " (OmU)"; // original audio with subtitles
280 if (title.has_suffix (tag_suffix))
281 return title.substring (0, title.length - tag_suffix.length);
282 tag_suffix = " (OV)"; // original audio
283 if (title.has_suffix (tag_suffix))
284 return title.substring (0, title.length - tag_suffix.length);
288 public void parse_cinema () throws Error {
289 expect_tag ("div"); // class=theater
290 expect_tag ("div"); // class=desc id=theater_...
291 expect_tag ("h2"); // class=name
292 expect_tag ("a"); // href="/movies?near=city&tid=..."
293 expect_tag ("span"); // dir=ltr
294 var name = unescape_unicode (convert (parse_text (), -1, "utf-8", "iso-8859-1")); // FIXME
295 expect_tag ("/span");
298 expect_tag ("div"); // class=info
299 var address_and_phone = parse_text ().replace (" ", " ").split (" - ");
300 string address = null;
302 if (address_and_phone.length >= 2) {
303 address = address_and_phone[0];
304 phone = address_and_phone[1].replace (" ", "").replace ("-", "");
306 expect_tag ("a"); // target=_top
311 last_cinema = new Cinema (name);
312 last_cinema.address = address;
313 last_cinema.phone = phone;
316 public int parse (ref char[] buf) throws Error {
321 while (location == null && current[0] != 0) {
323 while (current[i++] != '>');
324 if (((string) current).has_prefix ("<a href=\"/movies?near=")) {
325 string href = parse_tag_attribute ("a", "href");
326 char* p = (char*) href.offset (13); // skip "/movies?near="
329 while (p[++j] != '&' && p[j] != 0);
330 p[0] = p[0].toupper ();
331 location = ((string) p).ndup (j);
336 while (current[0] != 0) {
338 while (current[i++] != '>');
339 if (((string) current).has_prefix ("<div class=movie>")) {
342 } else if (((string) current).has_prefix("<div class=theater>")) {
353 public async int query (string title, string? location, ReceiveMovie callback, Cancellable? cancellable = null) {
354 _get_callback = callback;
356 if (title.chr(title.length, '*') != null) {
357 pattern = new PatternSpec (title);
362 // TODO - use google.de in Germany, also provides genres
363 string uri = "http://google.com/movies";
364 if (location != null && location != "")
365 uri += "?near=" + location;
367 stdout.printf ("GET: %s\n", uri);
369 File file = File.new_for_uri (uri);
370 InputStream stream = yield file.read_async (Priority.DEFAULT_IDLE, null);
372 char[] buf = new char[256*1024];
375 while (total < 256*1024) {
376 nread = yield stream.read_async ((char *)buf + total, 256*1024 - total, Priority.DEFAULT_IDLE, cancellable);
378 if (cancellable.is_cancelled ())
384 return parse (ref buf);
386 stderr.printf ("Error: %s\n", e.message);