/* This file is part of Cinaest. * * Copyright (C) 2009 Philipp Zabel * * Cinaest is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Cinaest is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Cinaest. If not, see . */ errordomain ParserError { WRONG_TAG, EOF } public class Cinema { public string name; public string address; public string phone; public Cinema (string _name) { name = _name; } } public class GoogleMovie : Movie { public Cinema cinema; public string runtime; public string fsk; public string showtimes; } public class GoogleParser : Object { private MovieSource.ReceiveMovieFunction _get_callback; char *current; Cinema last_cinema; public string location; MovieFilter _filter; PatternSpec pattern; public int next_tag_offset () { int i = -1; while (current[++i] != '<' && current[i] != 0); return i; } public void next_tag () { if (current[0] == 0) return; current += next_tag_offset (); } public void finish_tag () { while (current[0] != '>' && current[0] != 0) current++; if (current[0] == '>') current++; } public weak string parse_tag (bool finish = true) throws Error { weak string tag; next_tag (); int i = 1; while (current[++i].isalnum ()); if (current[i] == 0) throw new ParserError.EOF ("EOF in tag"); if (current[i] == '>') finish = false; current[i] = 0; tag = (string) (current + 1); current += i + 1; if (finish) finish_tag (); return tag; } public void expect_tag (string tag) throws Error { var found = parse_tag (true); if (tag != found) { throw new ParserError.WRONG_TAG ("Wrong tag \"%s\", expected \"%s\"", found, tag); } } public string parse_text () { string text = ((string) current).ndup (next_tag_offset ()); next_tag (); return text; } public void parse_attribute (string _attr, out string value) { string attr; if (current[0] == 0) return; int i = -1; while (current[++i] != '=' && current[i] != '>' && current[i] != 0) { } attr = ((string) current).ndup (i); current += i; if (current[0] == 0) return; current++; i = -1; while (!current[++i].isspace () && current[i] != '>' && current[i] != 0) { if (current[i] == '"') while (current[++i] != '"' && current[i] != 0); } if (attr == _attr) { if (current[0] == '"') value = ((string) current).substring (1, i - 2); else value = ((string) current).ndup (i); } current += i; } public void skip_whitespace () { if (current[0] == 0) return; int i = -1; while (current[++i].isspace () && current[i] != 0); current += i; } public string? parse_tag_attribute (string tag, string attribute) throws Error { var found = parse_tag (false); if (tag != found) { throw new ParserError.WRONG_TAG ("Wrong tag \"%s\", expected \"%s\"", found, tag); } string? value = null; skip_whitespace (); while (current[0] != '>' && current[0] != 0) { parse_attribute (attribute, out value); skip_whitespace (); } // Skip the closing '>' bracket if (current[0] != 0) current++; return value; } public string unescape_unicode (string s) { string result = ""; int i, j; long l = s.length; for (i = 0; i < l; i++) { if (s[i] == '&' && s[i + 1] == '#') { for (j = i + 2; j < l; j++) { if (!s[j].isdigit ()) break; if (s[j] == ';') break; } if (s[j] == ';') { int codepoint = s.substring (i + 2, j - i - 2).to_int (); char[] buf = new char[6]; ((unichar) codepoint).to_utf8 ((string) buf); result += (string) buf; i = j; continue; } } if (s.offset (i).has_prefix ("&")) { result += "&"; i += 4; continue; } if (s.offset (i).has_prefix (""")) { result += "\""; i += 5; continue; } result += s.substring (i, 1); } return result; } public void parse_movie () throws Error { expect_tag ("div"); // class=movie expect_tag ("div"); // class=name expect_tag ("a"); // href="/movies?near=city&mid=..." expect_tag ("span"); // dir=ltr var title = unescape_unicode (convert (parse_text (), -1, "utf-8", "iso-8859-1")); // FIXME expect_tag ("/span"); expect_tag ("/a"); expect_tag ("/div"); expect_tag ("span"); // class=info string[] runtime_and_fsk = {}; double rating = 0.0; var tag = parse_tag (); if (tag == "a") { // Trailer expect_tag ("/a"); tag = parse_tag (); } if (tag == "a") { // IMDb expect_tag ("/a"); tag = parse_tag (); } if (tag == "nobr") { expect_tag ("nobr"); string rating_string = parse_tag_attribute ("img", "alt").offset (6); // "Rated " ->"0.0 out of 5.0" rating = rating_string.to_double (); expect_tag ("img"); expect_tag ("img"); expect_tag ("img"); expect_tag ("img"); expect_tag ("/nobr"); expect_tag ("/nobr"); runtime_and_fsk = parse_text ().replace ("‎", "").offset (3).split (" - "); if (parse_tag () == "a") { // Trailer expect_tag ("/a"); if (parse_tag () == "a") { // IMDb link expect_tag ("/a"); expect_tag ("/span"); } } } expect_tag ("div"); // class=times var showtimes = parse_text ().replace (" ", ","); while (parse_tag () == "a") { showtimes += parse_text () + ","; expect_tag ("/a"); } if (pattern == null) { if (!title.has_prefix (_filter.title)) return; } else { if (!pattern.match ((uint) title.length, title, null)) return; } var movie = new GoogleMovie (); movie.title = strip_tags (title); movie.year = 0; movie.rating = (int) (rating * 10); movie.cinema = last_cinema; if (runtime_and_fsk.length >= 2) { movie.runtime = runtime_and_fsk[0]; movie.fsk = runtime_and_fsk[1]; } movie.showtimes = showtimes; // TODO - could be configurable by settings if (movie.runtime != null) movie.secondary = "%s - %s - %s".printf (movie.runtime, last_cinema.name, showtimes); else movie.secondary = "%s - %s".printf (last_cinema.name, showtimes); _get_callback (movie); } // FIXME - this is specific for Germany private string strip_tags (string title) { string tag_suffix = " (OmU)"; // original audio with subtitles if (title.has_suffix (tag_suffix)) return title.substring (0, title.length - tag_suffix.length); tag_suffix = " (OV)"; // original audio if (title.has_suffix (tag_suffix)) return title.substring (0, title.length - tag_suffix.length); return title.dup (); } public void parse_cinema () throws Error { expect_tag ("div"); // class=theater expect_tag ("div"); // class=desc id=theater_... expect_tag ("h2"); // class=name expect_tag ("a"); // href="/movies?near=city&tid=..." expect_tag ("span"); // dir=ltr var name = unescape_unicode (convert (parse_text (), -1, "utf-8", "iso-8859-1")); // FIXME expect_tag ("/span"); expect_tag ("/a"); expect_tag ("/h2"); expect_tag ("div"); // class=info var address_and_phone = parse_text ().replace (" ", " ").split (" - "); string address = null; string phone = null; if (address_and_phone.length >= 2) { address = address_and_phone[0]; phone = address_and_phone[1].replace (" ", "").replace ("-", ""); } expect_tag ("a"); // target=_top expect_tag ("/a"); expect_tag ("/div"); expect_tag ("/div"); last_cinema = new Cinema (name); last_cinema.address = address; last_cinema.phone = phone; } public int parse (ref char[] buf) throws Error { int movies = 0; current = buf; next_tag (); while (location == null && current[0] != 0) { int i = 1; while (current[i++] != '>'); if (((string) current).has_prefix ("")) { parse_movie (); movies++; } else if (((string) current).has_prefix("
")) { parse_cinema (); } else { current += i; } next_tag (); } return movies; } public async int query (MovieFilter filter, string? location, MovieSource.ReceiveMovieFunction callback, Cancellable? cancellable) { _get_callback = callback; _filter = filter; if (filter.title.chr(filter.title.length, '*') != null) { pattern = new PatternSpec (filter.title); } else { pattern = null; } try { // TODO - use google.de in Germany, also provides genres string uri = "http://google.com/movies"; if (location != null && location != "") uri += "?near=" + location; stdout.printf ("GET: %s\n", uri); File file = File.new_for_uri (uri); InputStream stream = yield file.read_async (Priority.DEFAULT_IDLE, null); char[] buf = new char[256*1024]; size_t nread; size_t total = 0; while (total < 256*1024) { nread = yield stream.read_async ((char *)buf + total, 256*1024 - total, Priority.DEFAULT_IDLE, cancellable); total += nread; if (cancellable.is_cancelled ()) return 0; if (nread == 0) break; } buf[total] = 0; return parse (ref buf); } catch (Error e) { stderr.printf ("Error: %s\n", e.message); } return 0; } }