1 /* This file is part of Cinaest.
3 * Copyright (C) 2009 Philipp Zabel
5 * Cinaest is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
10 * Cinaest is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with Cinaest. If not, see <http://www.gnu.org/licenses/>.
19 errordomain ParserError {
26 public string address;
29 public Cinema (string _name) {
34 public class GoogleMovie {
40 public string showtimes;
43 public class GoogleParser : Object {
46 public string location;
50 public delegate void ReceiveMovie (GoogleMovie movie);
51 public ReceiveMovie _get_callback;
53 public int next_tag_offset () {
55 while (current[++i] != '<' && current[i] != 0);
59 public void next_tag () {
62 current += next_tag_offset ();
65 public void finish_tag () {
66 while (current[0] != '>' && current[0] != 0)
68 if (current[0] == '>')
72 public weak string parse_tag (bool finish = true) throws Error {
76 while (current[++i].isalnum ());
78 throw new ParserError.EOF ("EOF in tag");
79 if (current[i] == '>')
82 tag = (string) (current + 1);
89 public void expect_tag (string tag) throws Error {
90 var found = parse_tag (true);
92 throw new ParserError.WRONG_TAG ("Wrong tag \"%s\", expected \"%s\"",
97 public string parse_text () {
98 string text = ((string) current).ndup (next_tag_offset ());
103 public void parse_attribute (string _attr, out string value) {
108 while (current[++i] != '=' && current[i] != '>' && current[i] != 0) {
111 attr = ((string) current).ndup (i);
117 while (!current[++i].isspace () && current[i] != '>' && current[i] != 0) {
118 if (current[i] == '"')
119 while (current[++i] != '"' && current[i] != 0);
122 if (current[0] == '"')
123 value = ((string) current).substring (1, i - 2);
125 value = ((string) current).ndup (i);
130 public void skip_whitespace () {
134 while (current[++i].isspace () && current[i] != 0);
138 public string? parse_tag_attribute (string tag, string attribute) throws Error {
139 var found = parse_tag (false);
141 throw new ParserError.WRONG_TAG ("Wrong tag \"%s\", expected \"%s\"",
145 string? value = null;
147 while (current[0] != '>' && current[0] != 0) {
148 parse_attribute (attribute, out value);
151 // Skip the closing '>' bracket
158 public string unescape_unicode (string s) {
163 for (i = 0; i < l; i++) {
164 if (s[i] == '&' && s[i + 1] == '#') {
165 for (j = i + 2; j < l; j++) {
166 if (!s[j].isdigit ())
172 int codepoint = s.substring (i + 2, j - i - 2).to_int ();
173 char[] buf = new char[6];
174 ((unichar) codepoint).to_utf8 ((string) buf);
175 result += (string) buf;
180 if (s.offset (i).has_prefix ("&")) {
185 if (s.offset (i).has_prefix (""")) {
190 result += s.substring (i, 1);
196 public void parse_movie () throws Error {
197 expect_tag ("div"); // class=movie
198 expect_tag ("div"); // class=name
199 expect_tag ("a"); // href="/movies?near=city&mid=..."
200 expect_tag ("span"); // dir=ltr
201 var title = unescape_unicode (convert (parse_text (), -1, "utf-8", "iso-8859-1")); // FIXME
202 expect_tag ("/span");
205 expect_tag ("span"); // class=info
206 string[] runtime_and_fsk = {};
208 var tag = parse_tag ();
221 string rating_string = parse_tag_attribute ("img", "alt").offset (6); // "Rated " ->"0.0 out of 5.0"
222 rating = rating_string.to_double ();
227 expect_tag ("/nobr");
228 expect_tag ("/nobr");
229 runtime_and_fsk = parse_text ().replace ("‎", "").offset (3).split (" - ");
230 if (parse_tag () == "a") {
233 if (parse_tag () == "a") {
236 expect_tag ("/span");
240 expect_tag ("div"); // class=times
241 var showtimes = parse_text ().replace (" ", ",");
242 while (parse_tag () == "a") {
243 showtimes += parse_text () + ",";
247 if (pattern == null) {
248 if (!title.has_prefix (_title))
251 if (!pattern.match ((uint) title.length, title, null))
255 var movie = new GoogleMovie ();
257 movie.title = strip_tags (title).replace ("\"", "\\\"");
258 movie.rating = (int) (rating * 10);
260 movie.cinema = last_cinema;
262 if (runtime_and_fsk.length >= 2) {
263 unowned string runtime = runtime_and_fsk[0];
264 movie.runtime = 3600 * runtime.to_int ();
265 runtime = runtime.str ("hr ");
267 movie.runtime += 60 * runtime.offset (3).to_int ();
268 movie.fsk = runtime_and_fsk[1];
270 movie.showtimes = showtimes;
271 _get_callback (movie);
274 // FIXME - this is specific for Germany
275 private string strip_tags (string title) {
276 string tag_suffix = " (OmU)"; // original audio with subtitles
277 if (title.has_suffix (tag_suffix))
278 return title.substring (0, title.length - tag_suffix.length);
279 tag_suffix = " (OV)"; // original audio
280 if (title.has_suffix (tag_suffix))
281 return title.substring (0, title.length - tag_suffix.length);
285 public void parse_cinema () throws Error {
286 expect_tag ("div"); // class=theater
287 expect_tag ("div"); // class=desc id=theater_...
288 expect_tag ("h2"); // class=name
289 expect_tag ("a"); // href="/movies?near=city&tid=..."
290 expect_tag ("span"); // dir=ltr
291 var name = unescape_unicode (convert (parse_text (), -1, "utf-8", "iso-8859-1")); // FIXME
292 expect_tag ("/span");
295 expect_tag ("div"); // class=info
296 var address_and_phone = parse_text ().replace (" ", " ").split (" - ");
297 string address = null;
299 if (address_and_phone.length >= 2) {
300 address = address_and_phone[0];
301 phone = address_and_phone[1].replace (" ", "").replace ("-", "");
303 expect_tag ("a"); // target=_top
308 last_cinema = new Cinema (name);
309 last_cinema.address = address;
310 last_cinema.phone = phone;
313 public int parse (ref char[] buf) throws Error {
318 while (location == null && current[0] != 0) {
320 while (current[i++] != '>');
321 if (((string) current).has_prefix ("<a href=\"/movies?near=")) {
322 string href = parse_tag_attribute ("a", "href");
323 char* p = (char*) href.offset (13); // skip "/movies?near="
326 while (p[++j] != '&' && p[j] != 0);
327 p[0] = p[0].toupper ();
328 location = ((string) p).ndup (j);
333 while (current[0] != 0) {
335 while (current[i++] != '>');
336 if (((string) current).has_prefix ("<div class=movie>")) {
339 } else if (((string) current).has_prefix("<div class=theater>")) {
350 public async int query (string title, string? location, ReceiveMovie callback, Cancellable? cancellable = null) {
351 _get_callback = callback;
353 if (title.chr(title.length, '*') != null) {
354 pattern = new PatternSpec (title);
359 // TODO - use google.de in Germany, also provides genres
360 string uri = "http://google.com/movies";
361 if (location != null && location != "")
362 uri += "?near=" + location;
364 stdout.printf ("GET: %s\n", uri);
366 File file = File.new_for_uri (uri);
367 InputStream stream = yield file.read_async (Priority.DEFAULT_IDLE, null);
369 char[] buf = new char[256*1024];
372 while (total < 256*1024) {
373 nread = yield stream.read_async ((char *)buf + total, 256*1024 - total, Priority.DEFAULT_IDLE, cancellable);
375 if (cancellable.is_cancelled ())
381 return parse (ref buf);
383 stderr.printf ("Error: %s\n", e.message);