/* This file is part of Cinaest. * * Copyright (C) 2009 Philipp Zabel * * Cinaest is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Cinaest is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Cinaest. If not, see . */ errordomain ParserError { WRONG_TAG, EOF } public class GoogleMovie : Movie { public string cinema; public string runtime; public string fsk; public string showtimes; } public class GoogleParser : Object { private MovieSource.ReceiveMovieFunction _get_callback; char *current; string cinema_name; MovieFilter _filter; PatternSpec pattern; public int next_tag_offset () { int i = -1; while (current[++i] != '<' && current[i] != 0); return i; } public void next_tag () { if (current[0] == 0) return; current += next_tag_offset (); } public void finish_tag () { while (current[0] != '>' && current[0] != 0) current++; if (current[0] == '>') current++; } public weak string parse_tag (bool finish = true) throws Error { weak string tag; next_tag (); int i = 1; while (current[++i].isalnum ()); if (current[i] == 0) throw new ParserError.EOF ("EOF in tag"); if (current[i] == '>') finish = false; current[i] = 0; tag = (string) (current + 1); current += i + 1; if (finish) finish_tag (); return tag; } public void expect_tag (string tag) throws Error { var found = parse_tag (true); if (tag != found) { throw new ParserError.WRONG_TAG ("Wrong tag \"%s\", expected \"%s\"", found, tag); } } public string parse_text () { string text = ((string) current).ndup (next_tag_offset ()); next_tag (); return text; } public void parse_attribute (string _attr, out string value) { string attr; if (current[0] == 0) return; int i = -1; while (current[++i] != '=' && current[i] != '>' && current[i] != 0) { } attr = ((string) current).ndup (i); current += i; if (current[0] == 0) return; current++; i = -1; while (!current[++i].isspace () && current[i] != '>' && current[i] != 0) { if (current[i] == '"') while (current[++i] != '"' && current[i] != 0); } if (attr == _attr) { if (current[0] == '"') value = ((string) current).substring (1, i - 2); else value = ((string) current).ndup (i); } current += i; } public void skip_whitespace () { if (current[0] == 0) return; int i = -1; while (current[++i].isspace () && current[i] != 0); current += i; } public string? parse_tag_attribute (string tag, string attribute) throws Error { var found = parse_tag (false); if (tag != found) { throw new ParserError.WRONG_TAG ("Wrong tag \"%s\", expected \"%s\"", found, tag); } string? value = null; skip_whitespace (); while (current[0] != '>' && current[0] != 0) { parse_attribute (attribute, out value); skip_whitespace (); } // Skip the closing '>' bracket if (current[0] != 0) current++; return value; } public void parse_movie () throws Error { expect_tag ("div"); // class=movie expect_tag ("div"); // class=name expect_tag ("a"); // href="/movies?near=city&mid=..." expect_tag ("span"); // dir=ltr var title = convert (parse_text ().replace ("'", "'").replace ("&", "&"), -1, "utf-8", "iso-8859-1"); // FIXME expect_tag ("/span"); expect_tag ("/a"); expect_tag ("/div"); expect_tag ("span"); // class=info string[] runtime_and_fsk = {}; double rating = 0.0; if (parse_tag () == "nobr") { expect_tag ("nobr"); weak string rating_string = parse_tag_attribute ("img", "alt").offset (6); // "Rated " ->"0.0 out of 5.0" rating = rating_string.to_double (); expect_tag ("img"); expect_tag ("img"); expect_tag ("img"); expect_tag ("img"); expect_tag ("/nobr"); expect_tag ("/nobr"); runtime_and_fsk = parse_text ().replace ("‎", "").offset (3).split (" - "); expect_tag ("/span"); } expect_tag ("div"); // class=times var showtimes = parse_text ().replace (" ", ","); expect_tag ("/div"); expect_tag ("/div"); if (pattern == null) { if (!title.has_prefix (_filter.title)) return; } else { if (!pattern.match ((uint) title.length, title, null)) return; } var movie = new GoogleMovie (); movie.title = strip_tags (title); movie.year = 0; movie.rating = (int) (rating * 10); movie.cinema = cinema_name; if (runtime_and_fsk.length >= 2) { movie.runtime = runtime_and_fsk[0]; movie.fsk = runtime_and_fsk[1]; } movie.showtimes = showtimes; // TODO - could be configurable by settings if (movie.runtime != null) movie.secondary = "%s - %s - %s".printf (movie.runtime, cinema_name, showtimes); else movie.secondary = "%s - %s".printf (cinema_name, showtimes); _get_callback (movie); } // FIXME - this is specific for Germany private string strip_tags (string title) { string tag_suffix = " (OmU)"; // original audio with subtitles if (title.has_suffix (tag_suffix)) return title.substring (0, title.length - tag_suffix.length); tag_suffix = " (OV)"; // original audio if (title.has_suffix (tag_suffix)) return title.substring (0, title.length - tag_suffix.length); return title.dup (); } public void parse_cinema () throws Error { expect_tag ("div"); // class=theater expect_tag ("div"); // class=desc id=theater_... expect_tag ("h2"); // class=name expect_tag ("a"); // href="/movies?near=city&tid=..." expect_tag ("span"); // dir=ltr string name = convert (parse_text ().replace ("&", "&"), -1, "utf-8", "iso-8859-1"); // FIXME expect_tag ("/span"); expect_tag ("/a"); expect_tag ("/h2"); expect_tag ("div"); // class=info var address_and_phone = parse_text ().replace (" ", " ").split (" - "); if (address_and_phone.length >= 2) { // string address = address_and_phone[0]; // string phone = address_and_phone[1]; } expect_tag ("a"); // target=_top expect_tag ("/a"); expect_tag ("/div"); expect_tag ("/div"); cinema_name = name; // FIXME - store cinema address for movie detail window } public void parse (ref char[] buf) throws Error { current = buf; next_tag (); { int i = 1; while (current[i++] != '>'); if (((string) current).has_prefix ("")) { parse_movie (); } else if (((string) current).has_prefix("
")) { parse_cinema (); } else { current += i; } next_tag (); } } public async void query (MovieFilter filter, string? location, MovieSource.ReceiveMovieFunction callback, Cancellable? cancellable) { _get_callback = callback; _filter = filter; if (filter.title.chr(filter.title.length, '*') != null) { pattern = new PatternSpec (filter.title); } else { pattern = null; } try { // TODO - use google.de in Germany, also provides genres string uri = "http://google.com/movies"; if (location != null && location != "") uri += "?near=" + location; stdout.printf ("GET: %s\n", uri); File file = File.new_for_uri (uri); InputStream stream = yield file.read_async (Priority.DEFAULT_IDLE, null); char[] buf = new char[256*1024]; size_t nread; size_t total = 0; while (total < 256*1024) { nread = yield stream.read_async ((char *)buf + total, 256*1024 - total, Priority.DEFAULT_IDLE, cancellable); total += nread; if (cancellable.is_cancelled ()) return; if (nread == 0) break; } buf[total] = 0; parse (ref buf); } catch (Error e) { stderr.printf ("Error: %s\n", e.message); } } }