Convert Google plugin into a D-Bus service
[cinaest] / src / backends / google / google-parser.vala
diff --git a/src/backends/google/google-parser.vala b/src/backends/google/google-parser.vala
new file mode 100644 (file)
index 0000000..886772d
--- /dev/null
@@ -0,0 +1,391 @@
+/* This file is part of Cinaest.
+ *
+ * Copyright (C) 2009 Philipp Zabel
+ *
+ * Cinaest is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Cinaest is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Cinaest. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+errordomain ParserError {
+       WRONG_TAG,
+       EOF
+}
+
+public class Cinema {
+       public string name;
+       public string address;
+       public string phone;
+
+       public Cinema (string _name) {
+               name = _name;
+       }
+}
+
+public class GoogleMovie {
+       public string title;
+       public int rating;
+       public string secondary;
+       public Cinema cinema;
+       public string runtime;
+       public string fsk;
+       public string showtimes;
+}
+
+public class GoogleParser : Object {
+       char *current;
+       Cinema last_cinema;
+       public string location;
+       string _title;
+       PatternSpec pattern;
+
+       public delegate void ReceiveMovie (GoogleMovie movie);
+       public ReceiveMovie _get_callback;
+
+       public int next_tag_offset () {
+               int i = -1;
+               while (current[++i] != '<' && current[i] != 0);
+               return i;
+       }
+
+       public void next_tag () {
+               if (current[0] == 0)
+                       return;
+               current += next_tag_offset ();
+       }
+
+       public void finish_tag () {
+               while (current[0] != '>' && current[0] != 0)
+                       current++;
+               if (current[0] == '>')
+                       current++;
+       }
+
+       public weak string parse_tag (bool finish = true) throws Error {
+               weak string tag;
+               next_tag ();
+               int i = 1;
+               while (current[++i].isalnum ());
+               if (current[i] == 0)
+                       throw new ParserError.EOF ("EOF in tag");
+               if (current[i] == '>')
+                       finish = false;
+               current[i] = 0;
+               tag = (string) (current + 1);
+               current += i + 1;
+               if (finish)
+                       finish_tag ();
+               return tag;
+       }
+
+       public void expect_tag (string tag) throws Error {
+               var found = parse_tag (true);
+               if (tag != found) {
+                       throw new ParserError.WRONG_TAG ("Wrong tag \"%s\", expected \"%s\"",
+                                                        found, tag);
+               }
+       }
+
+       public string parse_text () {
+               string text = ((string) current).ndup (next_tag_offset ());
+               next_tag ();
+               return text;
+       }
+
+       public void parse_attribute (string _attr, out string value) {
+               string attr;
+               if (current[0] == 0)
+                       return;
+               int i = -1;
+               while (current[++i] != '=' && current[i] != '>' && current[i] != 0) {
+
+               }
+               attr = ((string) current).ndup (i);
+               current += i;
+               if (current[0] == 0)
+                       return;
+               current++;
+               i = -1;
+               while (!current[++i].isspace () && current[i] != '>' && current[i] != 0) {
+                       if (current[i] == '"')
+                               while (current[++i] != '"' && current[i] != 0);
+               }
+               if (attr == _attr) {
+                       if (current[0] == '"')
+                               value = ((string) current).substring (1, i - 2);
+                       else
+                               value = ((string) current).ndup (i);
+               }
+               current += i;
+       }
+
+       public void skip_whitespace () {
+               if (current[0] == 0)
+                       return;
+               int i = -1;
+               while (current[++i].isspace () && current[i] != 0);
+               current += i;
+       }
+
+       public string? parse_tag_attribute (string tag, string attribute) throws Error {
+               var found = parse_tag (false);
+               if (tag != found) {
+                       throw new ParserError.WRONG_TAG ("Wrong tag \"%s\", expected \"%s\"",
+                                                        found, tag);
+               }
+
+               string? value = null;
+               skip_whitespace ();
+               while (current[0] != '>' && current[0] != 0) {
+                       parse_attribute (attribute, out value);
+                       skip_whitespace ();
+               }
+               // Skip the closing '>' bracket
+               if (current[0] != 0)
+                       current++;
+
+               return value;
+       }
+
+       public string unescape_unicode (string s) {
+               string result = "";
+               int i, j;
+               long l = s.length;
+
+               for (i = 0; i < l; i++) {
+                       if (s[i] == '&' && s[i + 1] == '#') {
+                               for (j = i + 2; j < l; j++) {
+                                       if (!s[j].isdigit ())
+                                               break;
+                                       if (s[j] == ';')
+                                               break;
+                               }
+                               if (s[j] == ';') {
+                                       int codepoint = s.substring (i + 2, j - i - 2).to_int ();
+                                       char[] buf = new char[6];
+                                       ((unichar) codepoint).to_utf8 ((string) buf);
+                                       result += (string) buf;
+                                       i = j;
+                                       continue;
+                               }
+                       }
+                       if (s.offset (i).has_prefix ("&amp;")) {
+                               result += "&";
+                               i += 4;
+                               continue;
+                       }
+                       if (s.offset (i).has_prefix ("&quot;")) {
+                               result += "\"";
+                               i += 5;
+                               continue;
+                       }
+                       result += s.substring (i, 1);
+               }
+
+               return result;
+       }
+
+       public void parse_movie () throws Error {
+               expect_tag ("div"); // class=movie
+               expect_tag ("div"); // class=name
+               expect_tag ("a"); // href="/movies?near=city&amp;mid=..."
+               expect_tag ("span"); // dir=ltr
+               var title = unescape_unicode (convert (parse_text (), -1, "utf-8", "iso-8859-1")); // FIXME
+               expect_tag ("/span");
+               expect_tag ("/a");
+               expect_tag ("/div");
+               expect_tag ("span"); // class=info
+               string[] runtime_and_fsk = {};
+               double rating = 0.0;
+               var tag = parse_tag ();
+               if (tag == "a") {
+                       // Trailer
+                       expect_tag ("/a");
+                       tag = parse_tag ();
+               }
+               if (tag == "a") {
+                       // IMDb
+                       expect_tag ("/a");
+                       tag = parse_tag ();
+               }
+               if (tag == "nobr") {
+                       expect_tag ("nobr");
+                       string rating_string = parse_tag_attribute ("img", "alt").offset (6); // "Rated " ->"0.0 out of 5.0"
+                       rating = rating_string.to_double ();
+                       expect_tag ("img");
+                       expect_tag ("img");
+                       expect_tag ("img");
+                       expect_tag ("img");
+                       expect_tag ("/nobr");
+                       expect_tag ("/nobr");
+                       runtime_and_fsk = parse_text ().replace ("&#8206;", "").offset (3).split (" - ");
+                       if (parse_tag () == "a") {
+                               // Trailer
+                               expect_tag ("/a");
+                               if (parse_tag () == "a") {
+                                       // IMDb link
+                                       expect_tag ("/a");
+                                       expect_tag ("/span");
+                               }
+                       }
+               }
+               expect_tag ("div"); // class=times
+               var showtimes = parse_text ().replace ("&nbsp;", ",");
+               while (parse_tag () == "a") {
+                       showtimes += parse_text () + ",";
+                       expect_tag ("/a");
+               }
+
+               if (pattern == null) {
+                       if (!title.has_prefix (_title))
+                               return;
+               } else {
+                       if (!pattern.match ((uint) title.length, title, null))
+                               return;
+               }
+
+               var movie = new GoogleMovie ();
+
+               movie.title = strip_tags (title).replace ("\"", "\\\"");
+               movie.rating = (int) (rating * 10);
+
+               movie.cinema = last_cinema;
+               if (runtime_and_fsk.length >= 2) {
+                       movie.runtime = runtime_and_fsk[0];
+                       movie.fsk = runtime_and_fsk[1];
+               }
+               movie.showtimes = showtimes;
+
+               // TODO - could be configurable by settings
+               if (movie.runtime != null)
+                       movie.secondary = "%s - %s - %s".printf (movie.runtime, last_cinema.name, showtimes);
+               else
+                       movie.secondary = "%s - %s".printf (last_cinema.name, showtimes);
+
+               _get_callback (movie);
+       }
+
+       // FIXME - this is specific for Germany
+       private string strip_tags (string title) {
+               string tag_suffix = " (OmU)"; // original audio with subtitles
+               if (title.has_suffix (tag_suffix))
+                       return title.substring (0, title.length - tag_suffix.length);
+               tag_suffix = " (OV)"; // original audio
+               if (title.has_suffix (tag_suffix))
+                       return title.substring (0, title.length - tag_suffix.length);
+               return title.dup ();
+       }
+
+       public void parse_cinema () throws Error {
+               expect_tag ("div"); // class=theater
+               expect_tag ("div"); // class=desc id=theater_...
+               expect_tag ("h2"); // class=name
+               expect_tag ("a"); // href="/movies?near=city&amp;tid=..."
+               expect_tag ("span"); // dir=ltr
+               var name = unescape_unicode (convert (parse_text (), -1, "utf-8", "iso-8859-1")); // FIXME
+               expect_tag ("/span");
+               expect_tag ("/a");
+               expect_tag ("/h2");
+               expect_tag ("div"); // class=info
+               var address_and_phone = parse_text ().replace ("&nbsp;", " ").split (" - ");
+               string address = null;
+               string phone = null;
+               if (address_and_phone.length >= 2) {
+                       address = address_and_phone[0];
+                       phone = address_and_phone[1].replace (" ", "").replace ("-", "");
+               }
+               expect_tag ("a"); // target=_top
+               expect_tag ("/a");
+               expect_tag ("/div");
+               expect_tag ("/div");
+
+               last_cinema = new Cinema (name);
+               last_cinema.address = address;
+               last_cinema.phone = phone;
+       }
+
+       public int parse (ref char[] buf) throws Error {
+               int movies = 0;
+
+               current = buf;
+               next_tag ();
+               while (location == null && current[0] != 0) {
+                       int i = 1;
+                       while (current[i++] != '>');
+                       if (((string) current).has_prefix ("<a href=\"/movies?near=")) {
+                               string href = parse_tag_attribute ("a", "href");
+                               char* p = (char*) href.offset (13); // skip "/movies?near="
+                               int j = -1;
+
+                               while (p[++j] != '&' && p[j] != 0);
+                               p[0] = p[0].toupper ();
+                               location = ((string) p).ndup (j);
+                       }
+                       current += i;
+                       next_tag ();
+               }
+               while (current[0] != 0) {
+                       int i = 1;
+                       while (current[i++] != '>');
+                       if (((string) current).has_prefix ("<div class=movie>")) {
+                               parse_movie ();
+                               movies++;
+                       } else if (((string) current).has_prefix("<div class=theater>")) {
+                               parse_cinema ();
+                       } else {
+                               current += i;
+                       }
+                       next_tag ();
+               }
+
+               return movies;
+       }
+
+       public async int query (string title, string? location, ReceiveMovie callback, Cancellable? cancellable = null) {
+               _get_callback = callback;
+               _title = title;
+               if (title.chr(title.length, '*') != null) {
+                       pattern = new PatternSpec (title);
+               } else {
+                       pattern = null;
+               }
+               try {
+                       // TODO - use google.de in Germany, also provides genres
+                       string uri = "http://google.com/movies";
+                       if (location != null && location != "")
+                               uri += "?near=" + location;
+
+                       stdout.printf ("GET: %s\n", uri);
+
+                       File file = File.new_for_uri (uri);
+                       InputStream stream = yield file.read_async (Priority.DEFAULT_IDLE, null);
+
+                       char[] buf = new char[256*1024];
+                       size_t nread;
+                       size_t total = 0;
+                       while (total < 256*1024) {
+                               nread = yield stream.read_async ((char *)buf + total, 256*1024 - total, Priority.DEFAULT_IDLE, cancellable);
+                               total += nread;
+                               if (cancellable.is_cancelled ())
+                                       return 0;
+                               if (nread == 0)
+                                       break;
+                       }
+                       buf[total] = 0;
+                       return parse (ref buf);
+               } catch (Error e) {
+                       stderr.printf ("Error: %s\n", e.message);
+               }
+
+               return 0;
+       }
+}