Google plugin: if location is unset, parse and set it from the Google query
[cinaest] / src / plugins / google-parser.vala
index b9dc1d5..2bf3ef2 100644 (file)
@@ -32,6 +32,7 @@ public class GoogleParser : Object {
        private MovieSource.ReceiveMovieFunction _get_callback;
        char *current;
        string cinema_name;
+       public string location;
        MovieFilter _filter;
        PatternSpec pattern;
 
@@ -141,39 +142,34 @@ public class GoogleParser : Object {
        }
 
        public void parse_movie () throws Error {
-               expect_tag ("a"); // rating
-               expect_tag ("nobr");
-               expect_tag ("nobr");
-               weak string rating_string = parse_tag_attribute ("img", "alt").offset (6); // "Rated " ->"0.0 out of 5.0"
-               double rating = rating_string.to_double ();
-
-               expect_tag ("img");
-               expect_tag ("img");
-               expect_tag ("img");
-               expect_tag ("img");
-               expect_tag ("/nobr");
-               expect_tag ("/nobr");
-               expect_tag ("br");
-               expect_tag ("nobr");
-               expect_tag ("/nobr");
-               expect_tag ("/a");
-               expect_tag ("/font");
-               expect_tag ("/td");
-               expect_tag ("td");
-               expect_tag ("font");
-               expect_tag ("a"); // <a href="/movies?near=city&mid=hexnumber"> --> link
-               expect_tag ("b");
-               var title = convert (parse_text ().replace ("&#39;", "'"), -1, "utf-8", "iso-8859-1"); // FIXME
-               expect_tag ("/b");
+               expect_tag ("div"); // class=movie
+               expect_tag ("div"); // class=name
+               expect_tag ("a"); // href="/movies?near=city&amp;mid=..."
+               expect_tag ("span"); // dir=ltr
+               var title = convert (parse_text ().replace ("&#39;", "'").replace ("&amp;", "&"), -1, "utf-8", "iso-8859-1"); // FIXME
+               expect_tag ("/span");
                expect_tag ("/a");
-               expect_tag ("br");
-               var runtime_and_fsk = parse_text ().replace ("&nbsp;", " ").replace ("&#8206;", "").split (" - ");
-
-               var showtimes = "";
-               if (parse_tag () == "br") {
-                       showtimes = parse_text ().replace ("&nbsp;", ",");
-                       expect_tag ("/font");
+               expect_tag ("/div");
+               expect_tag ("span"); // class=info
+               string[] runtime_and_fsk = {};
+               double rating = 0.0;
+               if (parse_tag () == "nobr") {
+                       expect_tag ("nobr");
+                       weak string rating_string = parse_tag_attribute ("img", "alt").offset (6); // "Rated " ->"0.0 out of 5.0"
+                       rating = rating_string.to_double ();
+                       expect_tag ("img");
+                       expect_tag ("img");
+                       expect_tag ("img");
+                       expect_tag ("img");
+                       expect_tag ("/nobr");
+                       expect_tag ("/nobr");
+                       runtime_and_fsk = parse_text ().replace ("&#8206;", "").offset (3).split (" - ");
+                       expect_tag ("/span");
                }
+               expect_tag ("div"); // class=times
+               var showtimes = parse_text ().replace ("&nbsp;", ",");
+               expect_tag ("/div");
+               expect_tag ("/div");
 
                if (pattern == null) {
                        if (!title.has_prefix (_filter.title))
@@ -217,21 +213,25 @@ public class GoogleParser : Object {
        }
 
        public void parse_cinema () throws Error {
-               expect_tag ("tr");
-               expect_tag ("td");
-               expect_tag ("a"); // --> link
-               expect_tag ("b");
+               expect_tag ("div"); // class=theater
+               expect_tag ("div"); // class=desc id=theater_...
+               expect_tag ("h2"); // class=name
+               expect_tag ("a"); // href="/movies?near=city&amp;tid=..."
+               expect_tag ("span"); // dir=ltr
                string name = convert (parse_text ().replace ("&amp;", "&"), -1, "utf-8", "iso-8859-1"); // FIXME
-               expect_tag ("/b");
+               expect_tag ("/span");
                expect_tag ("/a");
-               expect_tag ("br");
-               expect_tag ("font");
-               string address = parse_text ().replace ("&nbsp;", " ");
-               expect_tag ("a"); // --> map
+               expect_tag ("/h2");
+               expect_tag ("div"); // class=info
+               var address_and_phone = parse_text ().replace ("&nbsp;", " ").split (" - ");
+               if (address_and_phone.length >= 2) {
+               //      string address = address_and_phone[0];
+               //      string phone = address_and_phone[1];
+               }
+               expect_tag ("a"); // target=_top
                expect_tag ("/a");
-               expect_tag ("/font");
-               expect_tag ("/td");
-               expect_tag ("/tr");
+               expect_tag ("/div");
+               expect_tag ("/div");
 
                cinema_name = name;
                // FIXME - store cinema address for movie detail window
@@ -240,20 +240,36 @@ public class GoogleParser : Object {
        public void parse (ref char[] buf) throws Error {
                current = buf;
                next_tag ();
+               while (location == null && current[0] != 0) {
+                       int i = 1;
+                       while (current[i++] != '>');
+                       if (((string) current).has_prefix ("<a href=\"/movies?near=")) {
+                               string href = parse_tag_attribute ("a", "href");
+                               char* p = (char*) href.offset (13); // skip "/movies?near="
+                               int j = -1;
+
+                               while (p[++j] != '&' && p[j] != 0);
+                               p[0] = p[0].toupper ();
+                               location = ((string) p).ndup (j);
+                       }
+                       current += i;
+                       next_tag ();
+               }
                while (current[0] != 0) {
                        int i = 1;
                        while (current[i++] != '>');
-                       if (((string) current).has_prefix ("<a href=\"/movies/reviews?cid="))
+                       if (((string) current).has_prefix ("<div class=movie>")) {
                                parse_movie ();
-                       else if (((string) current).has_prefix("<tr valign=top><td colspan=4><a href=\"/movies?near="))
+                       } else if (((string) current).has_prefix("<div class=theater>")) {
                                parse_cinema ();
-                       else
+                       } else {
                                current += i;
+                       }
                        next_tag ();
                }
        }
 
-       public GoogleParser (MovieFilter filter, string? location, MovieSource.ReceiveMovieFunction callback, Cancellable? cancellable) {
+       public async void query (MovieFilter filter, string? location, MovieSource.ReceiveMovieFunction callback, Cancellable? cancellable) {
                _get_callback = callback;
                _filter = filter;
                if (filter.title.chr(filter.title.length, '*') != null) {
@@ -266,14 +282,24 @@ public class GoogleParser : Object {
                        string uri = "http://google.com/movies";
                        if (location != null && location != "")
                                uri += "?near=" + location;
+
+                       stdout.printf ("GET: %s\n", uri);
+
                        File file = File.new_for_uri (uri);
-                       InputStream stream = file.read (null);
+                       InputStream stream = yield file.read_async (Priority.DEFAULT_IDLE, null);
 
                        char[] buf = new char[256*1024];
                        size_t nread;
-                       bool ok = stream.read_all (buf, buf.length, out nread, cancellable);
-
-                       buf[nread] = 0;
+                       size_t total = 0;
+                       while (total < 256*1024) {
+                               nread = yield stream.read_async ((char *)buf + total, 256*1024 - total, Priority.DEFAULT_IDLE, cancellable);
+                               total += nread;
+                               if (cancellable.is_cancelled ())
+                                       return;
+                               if (nread == 0)
+                                       break;
+                       }
+                       buf[total] = 0;
                        parse (ref buf);
                } catch (Error e) {
                        stderr.printf ("Error: %s\n", e.message);