/* This file is part of Cinaest. * * Copyright (C) 2009 Philipp Zabel * * Cinaest is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Cinaest is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Cinaest. If not, see . */ public class Theater { public string name; public string address; public string phone; } public class GoogleMovie { public string title; public int rating; public Theater theater; public int runtime; public string fsk; public string showtimes; } class GoogleParser : Object { int movies; public string location; string _title; PatternSpec pattern; public delegate void ReceiveMovie (GoogleMovie movie); public ReceiveMovie _get_callback; private Html.Doc* get_html_document (ref char[] buf) { return Html.Doc.read_memory (buf, (int) buf.length, "http://movies.google.de", null, Html.ParserOption.NOERROR | Html.ParserOption.NOWARNING); } public int parse (ref char[] buf) throws Error { var doc = get_html_document (ref buf); if (doc == null) { stderr.printf ("Error: parsing failed\n"); return 0; } // TODO: set up location location = ""; var theater = search_tag_by_class (doc->children, "div", "theater"); if (theater == null) { stderr.printf ("Error: does not contain theater\n"); return 0; } movies = 0; while (theater != null) { theater = parse_theater (theater); } return movies; } private Xml.Node* parse_theater (Xml.Node* t) { var theater = new Theater (); var desc = t->children; if (desc != null && desc->name == "div" && desc->get_prop ("class") == "desc") { var name = desc->children; if (name != null && name->name == "h2" && name->get_prop ("class") == "name") { var a = name->children; if (a != null && a->name == "a") theater.name = get_child_text_content (a); print ("THEATER \"%s\"\n", theater.name); } var info = name->next; if (info != null && info->name == "div" && info->get_prop ("class") == "info") { var text = info->children; if (text != null && text->name == "text") { var address_and_phone = text->content.split (" - "); if (address_and_phone.length >= 2) { theater.address = address_and_phone[0]; theater.phone = address_and_phone[1].replace (" ", "").replace ("-", ""); } } } } var showtimes = desc->next; if (showtimes != null && showtimes->name == "div" && showtimes->get_prop ("class") == "showtimes") { var left = search_tag_by_class (showtimes->children, "div", "show_left"); if (left != null && left->children != null) { print ("LEFT\n"); var movie = search_tag_by_class (left->children, "div", "movie"); while (movie != null) { movie = parse_movie (movie, theater); } } var right = search_tag_by_class (left->next, "div", "show_right"); if (right != null && right->children != null) { print ("RIGHT\n"); var movie = search_tag_by_class (right->children, "div", "movie"); while (movie != null) { movie = parse_movie (movie, theater); } } } return t->next; } private Xml.Node* parse_movie (Xml.Node* m, Theater theater) { var movie = new GoogleMovie (); movie.theater = theater; Xml.Node* n; for (n = m->children; n != null; n = n->next) { if (n->name == "div" && n->get_prop ("class") == "name") movie.title = parse_movie_name (n); if (n->name == "span" && n->get_prop ("class") == "info") parse_movie_info (n, movie); if (n->name == "div" && n->get_prop ("class") == "times") parse_movie_times (n, movie); } if (pattern == null) { if (!movie.title.has_prefix (_title)) return m->next; } else { if (!pattern.match ((uint) movie.title.length, movie.title, null)) return m->next; } _get_callback (movie); movies++; return m->next; } private string? parse_movie_name (Xml.Node* n) { var a = n->children; if (a != null && a->name == "a") { var text = a->children; if (text != null && text->name == "text") print ("\"%s\"\n", text->content); return strip_tags (text->content); } return null; } // FIXME - this is specific for Germany private string strip_tags (string title) { string tag_suffix = " (OmU)"; // original audio with subtitles if (title.has_suffix (tag_suffix)) return title.substring (0, title.length - tag_suffix.length); tag_suffix = " (OV)"; // original audio if (title.has_suffix (tag_suffix)) return title.substring (0, title.length - tag_suffix.length); return title.dup (); } private void parse_movie_info (Xml.Node* i, GoogleMovie movie) { var text = i->children; if (text != null && text->name == "text") print ("\t\"%s\"\n", text->content); // movie.runtime for (var n = text->next; n != null; n = n->next) { if (n->name == "nobr") { movie.rating = parse_rating (n); if (movie.rating == 0) movie.rating = -1; break; } } } private int parse_rating (Xml.Node* nobr) { for (var n = nobr->children; n != null; n = n->next) { if (n->name == "nobr") { for (var img = n->children; img != null; img = img->next) { if (img->name == "img") { var alt = img->get_prop ("alt"); // "Rated 0.0 out of 5.0" if (alt != null && alt != "") // ^ return (int) (10 * alt.offset (6).to_double ()); print ("\trating: %s - %f\n", alt, alt.offset (6).to_double ()); } } } } return 0; } private void parse_movie_times (Xml.Node* node, GoogleMovie movie) { movie.showtimes = get_child_text_content (node).replace ("\xc2\xa0", ","); // U+00A0 =   } unowned string? get_child_text_content (Xml.Node *n) { if (n->children != null && n->children->name == "text") return n->children->content; else return null; } Xml.Node* search_tag_by_property (Xml.Node* node, string tag, string prop, string val) requires (node != null) { for (var n = node; n != null; n = n->next) { if (n->name == tag && n->get_prop (prop) == val) return n; if (n->children != null) { var found = search_tag_by_property (n->children, tag, prop, val); if (found != null) return found; } } return null; } Xml.Node* search_tag_by_class (Xml.Node* node, string tag, string @class) requires (node != null) { return search_tag_by_property (node, tag, "class", @class); } public async int query (string title, string? location, ReceiveMovie callback, Cancellable? cancellable = null) { _get_callback = callback; _title = title; if (title.chr(title.length, '*') != null) { pattern = new PatternSpec (title); } else { pattern = null; } try { // TODO - use google.de in Germany, also provides genres string uri = "http://google.com/movies"; if (location != null && location != "") uri += "?near=" + location; stdout.printf ("GET: %s\n", uri); File file = File.new_for_uri (uri); InputStream stream = yield file.read_async (Priority.DEFAULT_IDLE, null); char[] buf = new char[256*1024]; size_t nread; size_t total = 0; while (total < 256*1024) { nread = yield stream.read_async ((char *)buf + total, 256*1024 - total, Priority.DEFAULT_IDLE, cancellable); total += nread; if (cancellable.is_cancelled ()) return 0; if (nread == 0) break; } buf[total] = 0; return parse (ref buf); } catch (Error e) { stderr.printf ("Error: %s\n", e.message); } return 0; } }