--- /dev/null
+/* This file is part of Cinaest.
+ *
+ * Copyright (C) 2009 Philipp Zabel
+ *
+ * Cinaest is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Cinaest is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Cinaest. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+errordomain ParserError {
+ WRONG_TAG,
+ EOF
+}
+
+public class Cinema {
+ public string name;
+ public string address;
+ public string phone;
+
+ public Cinema (string _name) {
+ name = _name;
+ }
+}
+
+public class GoogleMovie {
+ public string title;
+ public int rating;
+ public string secondary;
+ public Cinema cinema;
+ public string runtime;
+ public string fsk;
+ public string showtimes;
+}
+
+public class GoogleParser : Object {
+ char *current;
+ Cinema last_cinema;
+ public string location;
+ string _title;
+ PatternSpec pattern;
+
+ public delegate void ReceiveMovie (GoogleMovie movie);
+ public ReceiveMovie _get_callback;
+
+ public int next_tag_offset () {
+ int i = -1;
+ while (current[++i] != '<' && current[i] != 0);
+ return i;
+ }
+
+ public void next_tag () {
+ if (current[0] == 0)
+ return;
+ current += next_tag_offset ();
+ }
+
+ public void finish_tag () {
+ while (current[0] != '>' && current[0] != 0)
+ current++;
+ if (current[0] == '>')
+ current++;
+ }
+
+ public weak string parse_tag (bool finish = true) throws Error {
+ weak string tag;
+ next_tag ();
+ int i = 1;
+ while (current[++i].isalnum ());
+ if (current[i] == 0)
+ throw new ParserError.EOF ("EOF in tag");
+ if (current[i] == '>')
+ finish = false;
+ current[i] = 0;
+ tag = (string) (current + 1);
+ current += i + 1;
+ if (finish)
+ finish_tag ();
+ return tag;
+ }
+
+ public void expect_tag (string tag) throws Error {
+ var found = parse_tag (true);
+ if (tag != found) {
+ throw new ParserError.WRONG_TAG ("Wrong tag \"%s\", expected \"%s\"",
+ found, tag);
+ }
+ }
+
+ public string parse_text () {
+ string text = ((string) current).ndup (next_tag_offset ());
+ next_tag ();
+ return text;
+ }
+
+ public void parse_attribute (string _attr, out string value) {
+ string attr;
+ if (current[0] == 0)
+ return;
+ int i = -1;
+ while (current[++i] != '=' && current[i] != '>' && current[i] != 0) {
+
+ }
+ attr = ((string) current).ndup (i);
+ current += i;
+ if (current[0] == 0)
+ return;
+ current++;
+ i = -1;
+ while (!current[++i].isspace () && current[i] != '>' && current[i] != 0) {
+ if (current[i] == '"')
+ while (current[++i] != '"' && current[i] != 0);
+ }
+ if (attr == _attr) {
+ if (current[0] == '"')
+ value = ((string) current).substring (1, i - 2);
+ else
+ value = ((string) current).ndup (i);
+ }
+ current += i;
+ }
+
+ public void skip_whitespace () {
+ if (current[0] == 0)
+ return;
+ int i = -1;
+ while (current[++i].isspace () && current[i] != 0);
+ current += i;
+ }
+
+ public string? parse_tag_attribute (string tag, string attribute) throws Error {
+ var found = parse_tag (false);
+ if (tag != found) {
+ throw new ParserError.WRONG_TAG ("Wrong tag \"%s\", expected \"%s\"",
+ found, tag);
+ }
+
+ string? value = null;
+ skip_whitespace ();
+ while (current[0] != '>' && current[0] != 0) {
+ parse_attribute (attribute, out value);
+ skip_whitespace ();
+ }
+ // Skip the closing '>' bracket
+ if (current[0] != 0)
+ current++;
+
+ return value;
+ }
+
+ public string unescape_unicode (string s) {
+ string result = "";
+ int i, j;
+ long l = s.length;
+
+ for (i = 0; i < l; i++) {
+ if (s[i] == '&' && s[i + 1] == '#') {
+ for (j = i + 2; j < l; j++) {
+ if (!s[j].isdigit ())
+ break;
+ if (s[j] == ';')
+ break;
+ }
+ if (s[j] == ';') {
+ int codepoint = s.substring (i + 2, j - i - 2).to_int ();
+ char[] buf = new char[6];
+ ((unichar) codepoint).to_utf8 ((string) buf);
+ result += (string) buf;
+ i = j;
+ continue;
+ }
+ }
+ if (s.offset (i).has_prefix ("&")) {
+ result += "&";
+ i += 4;
+ continue;
+ }
+ if (s.offset (i).has_prefix (""")) {
+ result += "\"";
+ i += 5;
+ continue;
+ }
+ result += s.substring (i, 1);
+ }
+
+ return result;
+ }
+
+ public void parse_movie () throws Error {
+ expect_tag ("div"); // class=movie
+ expect_tag ("div"); // class=name
+ expect_tag ("a"); // href="/movies?near=city&mid=..."
+ expect_tag ("span"); // dir=ltr
+ var title = unescape_unicode (convert (parse_text (), -1, "utf-8", "iso-8859-1")); // FIXME
+ expect_tag ("/span");
+ expect_tag ("/a");
+ expect_tag ("/div");
+ expect_tag ("span"); // class=info
+ string[] runtime_and_fsk = {};
+ double rating = 0.0;
+ var tag = parse_tag ();
+ if (tag == "a") {
+ // Trailer
+ expect_tag ("/a");
+ tag = parse_tag ();
+ }
+ if (tag == "a") {
+ // IMDb
+ expect_tag ("/a");
+ tag = parse_tag ();
+ }
+ if (tag == "nobr") {
+ expect_tag ("nobr");
+ string rating_string = parse_tag_attribute ("img", "alt").offset (6); // "Rated " ->"0.0 out of 5.0"
+ rating = rating_string.to_double ();
+ expect_tag ("img");
+ expect_tag ("img");
+ expect_tag ("img");
+ expect_tag ("img");
+ expect_tag ("/nobr");
+ expect_tag ("/nobr");
+ runtime_and_fsk = parse_text ().replace ("‎", "").offset (3).split (" - ");
+ if (parse_tag () == "a") {
+ // Trailer
+ expect_tag ("/a");
+ if (parse_tag () == "a") {
+ // IMDb link
+ expect_tag ("/a");
+ expect_tag ("/span");
+ }
+ }
+ }
+ expect_tag ("div"); // class=times
+ var showtimes = parse_text ().replace (" ", ",");
+ while (parse_tag () == "a") {
+ showtimes += parse_text () + ",";
+ expect_tag ("/a");
+ }
+
+ if (pattern == null) {
+ if (!title.has_prefix (_title))
+ return;
+ } else {
+ if (!pattern.match ((uint) title.length, title, null))
+ return;
+ }
+
+ var movie = new GoogleMovie ();
+
+ movie.title = strip_tags (title).replace ("\"", "\\\"");
+ movie.rating = (int) (rating * 10);
+
+ movie.cinema = last_cinema;
+ if (runtime_and_fsk.length >= 2) {
+ movie.runtime = runtime_and_fsk[0];
+ movie.fsk = runtime_and_fsk[1];
+ }
+ movie.showtimes = showtimes;
+
+ // TODO - could be configurable by settings
+ if (movie.runtime != null)
+ movie.secondary = "%s - %s - %s".printf (movie.runtime, last_cinema.name, showtimes);
+ else
+ movie.secondary = "%s - %s".printf (last_cinema.name, showtimes);
+
+ _get_callback (movie);
+ }
+
+ // FIXME - this is specific for Germany
+ private string strip_tags (string title) {
+ string tag_suffix = " (OmU)"; // original audio with subtitles
+ if (title.has_suffix (tag_suffix))
+ return title.substring (0, title.length - tag_suffix.length);
+ tag_suffix = " (OV)"; // original audio
+ if (title.has_suffix (tag_suffix))
+ return title.substring (0, title.length - tag_suffix.length);
+ return title.dup ();
+ }
+
+ public void parse_cinema () throws Error {
+ expect_tag ("div"); // class=theater
+ expect_tag ("div"); // class=desc id=theater_...
+ expect_tag ("h2"); // class=name
+ expect_tag ("a"); // href="/movies?near=city&tid=..."
+ expect_tag ("span"); // dir=ltr
+ var name = unescape_unicode (convert (parse_text (), -1, "utf-8", "iso-8859-1")); // FIXME
+ expect_tag ("/span");
+ expect_tag ("/a");
+ expect_tag ("/h2");
+ expect_tag ("div"); // class=info
+ var address_and_phone = parse_text ().replace (" ", " ").split (" - ");
+ string address = null;
+ string phone = null;
+ if (address_and_phone.length >= 2) {
+ address = address_and_phone[0];
+ phone = address_and_phone[1].replace (" ", "").replace ("-", "");
+ }
+ expect_tag ("a"); // target=_top
+ expect_tag ("/a");
+ expect_tag ("/div");
+ expect_tag ("/div");
+
+ last_cinema = new Cinema (name);
+ last_cinema.address = address;
+ last_cinema.phone = phone;
+ }
+
+ public int parse (ref char[] buf) throws Error {
+ int movies = 0;
+
+ current = buf;
+ next_tag ();
+ while (location == null && current[0] != 0) {
+ int i = 1;
+ while (current[i++] != '>');
+ if (((string) current).has_prefix ("<a href=\"/movies?near=")) {
+ string href = parse_tag_attribute ("a", "href");
+ char* p = (char*) href.offset (13); // skip "/movies?near="
+ int j = -1;
+
+ while (p[++j] != '&' && p[j] != 0);
+ p[0] = p[0].toupper ();
+ location = ((string) p).ndup (j);
+ }
+ current += i;
+ next_tag ();
+ }
+ while (current[0] != 0) {
+ int i = 1;
+ while (current[i++] != '>');
+ if (((string) current).has_prefix ("<div class=movie>")) {
+ parse_movie ();
+ movies++;
+ } else if (((string) current).has_prefix("<div class=theater>")) {
+ parse_cinema ();
+ } else {
+ current += i;
+ }
+ next_tag ();
+ }
+
+ return movies;
+ }
+
+ public async int query (string title, string? location, ReceiveMovie callback, Cancellable? cancellable = null) {
+ _get_callback = callback;
+ _title = title;
+ if (title.chr(title.length, '*') != null) {
+ pattern = new PatternSpec (title);
+ } else {
+ pattern = null;
+ }
+ try {
+ // TODO - use google.de in Germany, also provides genres
+ string uri = "http://google.com/movies";
+ if (location != null && location != "")
+ uri += "?near=" + location;
+
+ stdout.printf ("GET: %s\n", uri);
+
+ File file = File.new_for_uri (uri);
+ InputStream stream = yield file.read_async (Priority.DEFAULT_IDLE, null);
+
+ char[] buf = new char[256*1024];
+ size_t nread;
+ size_t total = 0;
+ while (total < 256*1024) {
+ nread = yield stream.read_async ((char *)buf + total, 256*1024 - total, Priority.DEFAULT_IDLE, cancellable);
+ total += nread;
+ if (cancellable.is_cancelled ())
+ return 0;
+ if (nread == 0)
+ break;
+ }
+ buf[total] = 0;
+ return parse (ref buf);
+ } catch (Error e) {
+ stderr.printf ("Error: %s\n", e.message);
+ }
+
+ return 0;
+ }
+}