1 /* This file is part of Cinaest.
3 * Copyright (C) 2009 Philipp Zabel
5 * Cinaest is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
10 * Cinaest is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with Cinaest. If not, see <http://www.gnu.org/licenses/>.
19 errordomain ParserError {
26 public string address;
29 public Cinema (string _name) {
34 public class GoogleMovie {
40 public string showtimes;
43 public class GoogleParser : Object {
46 public string location;
50 public delegate void ReceiveMovie (GoogleMovie movie);
51 public ReceiveMovie _get_callback;
53 public int next_tag_offset () {
55 while (current[++i] != '<' && current[i] != 0);
59 public void next_tag () {
62 current += next_tag_offset ();
65 public void finish_tag () {
66 while (current[0] != '>' && current[0] != 0)
68 if (current[0] == '>')
72 public unowned string parse_tag (bool finish = true) throws Error {
76 while (current[++i].isalnum ());
78 throw new ParserError.EOF ("EOF in tag");
79 if (current[i] == '>')
82 tag = (string) (current + 1);
89 public void expect_tag (string tag) throws Error {
90 var found = parse_tag (true);
92 throw new ParserError.WRONG_TAG ("Wrong tag \"%s\", expected \"%s\"",
97 public string parse_text () {
98 string text = ((string) current).ndup (next_tag_offset ());
103 public void parse_attribute (string _attr, out string value) {
108 while (current[++i] != '=' && current[i] != '>' && current[i] != 0) {
111 attr = ((string) current).ndup (i);
117 while (!current[++i].isspace () && current[i] != '>' && current[i] != 0) {
118 if (current[i] == '"')
119 while (current[++i] != '"' && current[i] != 0);
122 if (current[0] == '"')
123 value = ((string) current).substring (1, i - 2);
125 value = ((string) current).ndup (i);
130 public void skip_whitespace () {
134 while (current[++i].isspace () && current[i] != 0);
138 public string? parse_tag_attribute (string tag, string attribute) throws Error {
139 var found = parse_tag (false);
141 throw new ParserError.WRONG_TAG ("Wrong tag \"%s\", expected \"%s\"",
145 string? value = null;
147 while (current[0] != '>' && current[0] != 0) {
148 parse_attribute (attribute, out value);
151 // Skip the closing '>' bracket
158 public string unescape_unicode (string s) {
163 for (i = 0; i < l; i++) {
164 if (s[i] == '&' && s[i + 1] == '#') {
165 for (j = i + 2; j < l; j++) {
166 if (!s[j].isdigit ())
172 int codepoint = s.substring (i + 2, j - i - 2).to_int ();
173 char[] buf = new char[6];
174 ((unichar) codepoint).to_utf8 ((string) buf);
175 result += (string) buf;
180 if (s.offset (i).has_prefix ("&")) {
185 if (s.offset (i).has_prefix (""")) {
190 result += s.substring (i, 1);
196 public void parse_movie () throws Error {
197 expect_tag ("div"); // class=movie
198 expect_tag ("div"); // class=name
199 expect_tag ("a"); // href="/movies?near=city&mid=..."
200 expect_tag ("span"); // dir=ltr
201 var title = unescape_unicode (convert (parse_text (), -1, "utf-8", "iso-8859-1")); // FIXME
202 expect_tag ("/span");
205 expect_tag ("span"); // class=info
206 string info_text = parse_text ().replace ("‎", "");
207 string[] runtime_and_fsk = {};
209 var tag = parse_tag ();
222 string rating_string = parse_tag_attribute ("img", "alt").offset (6); // "Rated " ->"0.0 out of 5.0"
223 rating = rating_string.to_double ();
228 expect_tag ("/nobr");
229 expect_tag ("/nobr");
230 info_text = parse_text ().replace ("‎", "").offset (3);
231 if (parse_tag () == "a") {
234 if (parse_tag () == "a") {
237 expect_tag ("/span");
241 runtime_and_fsk = info_text.split (" - ");
242 expect_tag ("div"); // class=times
243 var showtimes = parse_text ().replace (" ", ",");
244 while (parse_tag () == "a") {
245 showtimes += parse_text () + ",";
249 if (pattern == null) {
250 if (!title.has_prefix (_title))
253 if (!pattern.match ((uint) title.length, title, null))
257 var movie = new GoogleMovie ();
259 movie.title = strip_tags (title).replace ("\"", "\\\"");
260 movie.rating = (int) (rating * 10);
262 movie.cinema = last_cinema;
264 if (runtime_and_fsk.length >= 2) {
265 unowned string runtime = runtime_and_fsk[0];
266 movie.runtime = 3600 * runtime.to_int ();
267 runtime = runtime.str ("hr ");
269 movie.runtime += 60 * runtime.offset (3).to_int ();
270 movie.fsk = runtime_and_fsk[1];
272 movie.showtimes = showtimes;
273 _get_callback (movie);
276 // FIXME - this is specific for Germany
277 private string strip_tags (string title) {
278 string tag_suffix = " (OmU)"; // original audio with subtitles
279 if (title.has_suffix (tag_suffix))
280 return title.substring (0, title.length - tag_suffix.length);
281 tag_suffix = " (OV)"; // original audio
282 if (title.has_suffix (tag_suffix))
283 return title.substring (0, title.length - tag_suffix.length);
287 public void parse_cinema () throws Error {
288 expect_tag ("div"); // class=theater
289 expect_tag ("div"); // class=desc id=theater_...
290 expect_tag ("h2"); // class=name
291 expect_tag ("a"); // href="/movies?near=city&tid=..."
292 expect_tag ("span"); // dir=ltr
293 var name = unescape_unicode (convert (parse_text (), -1, "utf-8", "iso-8859-1")); // FIXME
294 expect_tag ("/span");
297 expect_tag ("div"); // class=info
298 var address_and_phone = parse_text ().replace (" ", " ").split (" - ");
299 string address = null;
301 if (address_and_phone.length >= 2) {
302 address = address_and_phone[0];
303 phone = address_and_phone[1].replace (" ", "").replace ("-", "");
305 expect_tag ("a"); // target=_top
310 last_cinema = new Cinema (name);
311 last_cinema.address = address;
312 last_cinema.phone = phone;
315 public int parse (ref char[] buf) throws Error {
320 while (location == null && current[0] != 0) {
322 while (current[i++] != '>');
323 if (((string) current).has_prefix ("<a href=\"/movies?near=")) {
324 string href = parse_tag_attribute ("a", "href");
325 char* p = (char*) href.offset (13); // skip "/movies?near="
328 while (p[++j] != '&' && p[j] != 0);
329 p[0] = p[0].toupper ();
330 location = ((string) p).ndup (j);
335 while (current[0] != 0) {
337 while (current[i++] != '>');
338 if (((string) current).has_prefix ("<div class=movie>")) {
341 } else if (((string) current).has_prefix("<div class=theater>")) {
352 public async int query (string title, string? location, ReceiveMovie callback, Cancellable? cancellable = null) {
353 _get_callback = callback;
355 if (title.chr(title.length, '*') != null) {
356 pattern = new PatternSpec (title);
361 // TODO - use google.de in Germany, also provides genres
362 string uri = "http://google.com/movies";
363 if (location != null && location != "")
364 uri += "?near=" + location;
366 stdout.printf ("GET: %s\n", uri);
368 File file = File.new_for_uri (uri);
369 InputStream stream = yield file.read_async (Priority.DEFAULT_IDLE, null);
371 char[] buf = new char[256*1024];
374 while (total < 256*1024) {
375 nread = yield stream.read_async ((char *)buf + total, 256*1024 - total, Priority.DEFAULT_IDLE, cancellable);
377 if (cancellable.is_cancelled ())
383 return parse (ref buf);
385 stderr.printf ("Error: %s\n", e.message);