1 /* This file is part of Cinaest.
3 * Copyright (C) 2009 Philipp Zabel
5 * Cinaest is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
10 * Cinaest is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with Cinaest. If not, see <http://www.gnu.org/licenses/>.
19 public class Theater {
21 public string address;
25 public class GoogleMovie {
28 public Theater theater;
31 public string showtimes;
34 class GoogleParser : Object {
36 public string location;
40 public delegate void ReceiveMovie (GoogleMovie movie);
41 public ReceiveMovie _get_callback;
43 private Html.Doc* get_html_document (ref char[] buf) {
44 return Html.Doc.read_memory (buf, (int) buf.length,
45 "http://movies.google.de", null, Html.ParserOption.NOERROR | Html.ParserOption.NOWARNING);
48 public int parse (ref char[] buf) throws Error {
49 var doc = get_html_document (ref buf);
51 stderr.printf ("Error: parsing failed\n");
55 // TODO: set up location
58 var theater = search_tag_by_class (doc->children, "div", "theater");
59 if (theater == null) {
60 stderr.printf ("Error: does not contain theater\n");
64 while (theater != null) {
65 theater = parse_theater (theater);
70 private Xml.Node* parse_theater (Xml.Node* t) {
71 var theater = new Theater ();
72 var desc = t->children;
73 if (desc != null && desc->name == "div" && desc->get_prop ("class") == "desc") {
74 var name = desc->children;
75 if (name != null && name->name == "h2" && name->get_prop ("class") == "name") {
76 var a = name->children;
77 if (a != null && a->name == "a")
78 theater.name = get_child_text_content (a);
79 print ("THEATER \"%s\"\n", theater.name);
81 var info = name->next;
82 if (info != null && info->name == "div" && info->get_prop ("class") == "info") {
83 var text = info->children;
84 if (text != null && text->name == "text") {
85 var address_and_phone = text->content.split (" - ");
86 if (address_and_phone.length >= 2) {
87 theater.address = address_and_phone[0];
88 theater.phone = address_and_phone[1].replace (" ", "").replace ("-", "");
93 var showtimes = desc->next;
94 if (showtimes != null && showtimes->name == "div" && showtimes->get_prop ("class") == "showtimes") {
95 var left = search_tag_by_class (showtimes->children, "div", "show_left");
96 if (left != null && left->children != null) {
98 var movie = search_tag_by_class (left->children, "div", "movie");
99 while (movie != null) {
100 movie = parse_movie (movie, theater);
103 var right = search_tag_by_class (left->next, "div", "show_right");
104 if (right != null && right->children != null) {
106 var movie = search_tag_by_class (right->children, "div", "movie");
107 while (movie != null) {
108 movie = parse_movie (movie, theater);
116 private Xml.Node* parse_movie (Xml.Node* m, Theater theater) {
117 var movie = new GoogleMovie ();
118 movie.theater = theater;
120 for (n = m->children; n != null; n = n->next) {
121 if (n->name == "div" && n->get_prop ("class") == "name")
122 movie.title = parse_movie_name (n);
123 if (n->name == "span" && n->get_prop ("class") == "info")
124 parse_movie_info (n, movie);
125 if (n->name == "div" && n->get_prop ("class") == "times")
126 parse_movie_times (n, movie);
128 if (pattern == null) {
129 if (!movie.title.has_prefix (_title))
132 if (!pattern.match ((uint) movie.title.length, movie.title, null))
135 _get_callback (movie);
140 private string? parse_movie_name (Xml.Node* n) {
142 if (a != null && a->name == "a") {
143 var text = a->children;
144 if (text != null && text->name == "text")
145 print ("\"%s\"\n", text->content);
146 return strip_tags (text->content);
151 // FIXME - this is specific for Germany
152 private string strip_tags (string title) {
153 string tag_suffix = " (OmU)"; // original audio with subtitles
154 if (title.has_suffix (tag_suffix))
155 return title.substring (0, title.length - tag_suffix.length);
156 tag_suffix = " (OV)"; // original audio
157 if (title.has_suffix (tag_suffix))
158 return title.substring (0, title.length - tag_suffix.length);
162 private void parse_movie_info (Xml.Node* i, GoogleMovie movie) {
163 var text = i->children;
164 if (text != null && text->name == "text")
165 print ("\t\"%s\"\n", text->content);
167 for (var n = text->next; n != null; n = n->next) {
168 if (n->name == "nobr") {
169 movie.rating = parse_rating (n);
170 if (movie.rating == 0)
177 private int parse_rating (Xml.Node* nobr) {
178 for (var n = nobr->children; n != null; n = n->next) {
179 if (n->name == "nobr") {
180 for (var img = n->children; img != null; img = img->next) {
181 if (img->name == "img") {
182 var alt = img->get_prop ("alt"); // "Rated 0.0 out of 5.0"
183 if (alt != null && alt != "") // ^
184 return (int) (10 * alt.offset (6).to_double ());
185 print ("\trating: %s - %f\n", alt, alt.offset (6).to_double ());
193 private void parse_movie_times (Xml.Node* node, GoogleMovie movie) {
194 movie.showtimes = get_child_text_content (node).replace ("\xc2\xa0", ","); // U+00A0 =
197 unowned string? get_child_text_content (Xml.Node *n) {
198 if (n->children != null && n->children->name == "text")
199 return n->children->content;
204 Xml.Node* search_tag_by_property (Xml.Node* node, string tag, string prop, string val) requires (node != null) {
205 for (var n = node; n != null; n = n->next) {
206 if (n->name == tag && n->get_prop (prop) == val)
208 if (n->children != null) {
209 var found = search_tag_by_property (n->children, tag, prop, val);
217 Xml.Node* search_tag_by_class (Xml.Node* node, string tag, string @class) requires (node != null) {
218 return search_tag_by_property (node, tag, "class", @class);
221 public async int query (string title, string? location, ReceiveMovie callback, Cancellable? cancellable = null) {
222 _get_callback = callback;
224 if (title.chr(title.length, '*') != null) {
225 pattern = new PatternSpec (title);
230 // TODO - use google.de in Germany, also provides genres
231 string uri = "http://google.com/movies";
232 if (location != null && location != "")
233 uri += "?near=" + location;
235 stdout.printf ("GET: %s\n", uri);
237 File file = File.new_for_uri (uri);
238 InputStream stream = yield file.read_async (Priority.DEFAULT_IDLE, null);
240 char[] buf = new char[256*1024];
243 while (total < 256*1024) {
244 nread = yield stream.read_async ((char *)buf + total, 256*1024 - total, Priority.DEFAULT_IDLE, cancellable);
246 if (cancellable.is_cancelled ())
252 return parse (ref buf);
254 stderr.printf ("Error: %s\n", e.message);