Poster downloading update
[cinaest] / src / poster / imdb-poster-downloader.vala
1 /* This file is part of Cinaest.
2  *
3  * Copyright (C) 2010 Philipp Zabel
4  *
5  * Cinaest is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * Cinaest is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with Cinaest. If not, see <http://www.gnu.org/licenses/>.
17  */
18
19 namespace Hildon {
20         public const int MARGIN_DOUBLE = 16;
21         public const int MARGIN_HALF = 4;
22 }
23
24 namespace Poster {
25         public const int WIDTH = 288;
26         public const int HEIGHT = 400;
27 }
28
29 // A single IMDb poster image search (parsing and retrieval of the poster image URI)
30 public class IMDbMessage : Soup.Message {
31         public string title;
32         public int year;
33
34         public string poster_uri = null;
35         public int width;
36         public int height;
37
38         public IMDbMessage (string title_, int year_, int width_, int height_) {
39                 Object (method: "GET");
40
41                 title = title_;
42                 year = year_;
43                 width = width_;
44                 height = height_;
45                 string url = "http://www.imdb.com/find?s=tt&q=%s (%d)".printf (convert (title, -1, "iso8859-1", "utf-8"), year);
46                 uri = new Soup.URI (url);
47         }
48 }
49
50 // Encapsulation of a single poster download (IMDb title search query and image file download)
51 public class IMDbPosterDownload : Object {
52         private IMDbPosterDownloader downloader;
53         private Soup.Session session;
54         private string poster_path;
55         private bool cancelled = false;
56
57         public int handle;
58
59         public IMDbPosterDownload (string title, string year, int width, int height, int _handle, IMDbPosterDownloader _downloader) {
60                 handle = _handle;
61                 downloader = _downloader;
62                 session = downloader.session;
63
64                 print ("[%d] LOOKING FOR \"%s (%s)\"\n", handle, title, year);
65
66                 // Define poster path according to the Media Art Storage Spec (http://live.gnome.org/MediaArtStorageSpec)
67
68         //      poster_path = Hildon.albumart_get_path (title, year, "movie");
69                 poster_path = Path.build_filename (Environment.get_user_cache_dir (),
70                                                    "media-art",
71                                                    "movie-%s-%s.jpeg".printf (
72                               Checksum.compute_for_string (ChecksumType.MD5, title.down ()),
73                               Checksum.compute_for_string (ChecksumType.MD5, year)));
74
75                 var message = new IMDbMessage (title, year.to_int (), width, height);
76                 session.queue_message (message, title_page_callback);
77         }
78
79         private void title_page_callback (Soup.Session session, Soup.Message message) {
80                 if (cancelled ||
81                     message.status_code != Soup.KnownStatusCode.OK) {
82                         print ("[%d] NO POSTER FOR %s (CODE %u)\n", handle, message.uri.to_string (false), message.status_code);
83                         downloader.finished (this, null);
84                         return;
85                 }
86
87                 var msg = (IMDbMessage) message;
88
89                 if (message.uri.path == "/find") {
90                         print ("[%d] AMBIGUOUS RESULTS: %s\n", handle, message.uri.to_string (false));
91
92                         // Search page: title page link
93                         var re_sp_tpl = new Regex ("<a href=\"(/title/[^\"]*/)\"[^>]*>([^<]*)</a> *\\(([0-9]*)");
94
95                         MatchInfo match;
96                         if (re_sp_tpl.match ((string) message.response_body.data, 0, out match)) {
97                                 do {
98                                         print ("[%d] POTENTIAL RESULT: %s (%s)\n", handle, match.fetch (2), match.fetch (3));
99                                         if (msg.title.down () == match.fetch (2).down () &&
100                                             msg.year == match.fetch (3).to_int ()) {
101                                                 string url = "http://www.imdb.com" + match.fetch (1);
102                                                 print ("[%d] CHOSE RESULT URL: %s\n", handle, url);
103                                                 message.uri = new Soup.URI (url);
104                                                 session.queue_message (message, this.title_page_callback);
105                                                 return;
106                                         }
107                                 } while (match.next ());
108                         }
109                         print ("[%d] NO MATCH\n", handle);
110                         downloader.finished (this, null);
111                         return;
112                 }
113
114                 print ("[%d] GOT TITLE PAGE FOR %s\n", handle, message.uri.path);
115
116                 // Can we get away with the low-res poster thumbnail on the title page?
117                 if (msg.width <= 95 && msg.height <= 140) {
118                         MatchInfo match;
119                         if (downloader.re_tp_pth.match ((string) message.response_body.data, 0, out match)) {
120                                 string url = match.fetch (1);
121                                 print ("[%d] POSTER THUMBNAIL URL: %s\n", handle, url);
122                                 message.uri = new Soup.URI (url);
123                                 session.queue_message (message, this.image_callback);
124                         } else {
125                                 print ("[%d] NO POSTER THUMBNAIL AVAILABLE\n", handle);
126                                 downloader.finished (this, null);
127                         }
128                 } else {
129                         MatchInfo match;
130                         if (downloader.re_tp_ppl.match ((string) message.response_body.data, 0, out match)) {
131                                 string url = "http://www.imdb.com" + match.fetch (1);
132                                 print ("[%d] FOUND PHOTO PAGE URL: %s\n", handle, url);
133                                 message.uri = new Soup.URI (url);
134                                 session.queue_message (message, this.photo_page_callback);
135                         } else {
136                                 print ("[%d] NO POSTER AVAILABLE\n", handle);
137                                 downloader.finished (this, null);
138                         }
139                 }
140         }
141
142         private void photo_page_callback (Soup.Session session, Soup.Message message) {
143                 if (cancelled ||
144                     message.status_code != Soup.KnownStatusCode.OK) {
145                         downloader.finished (this, null);
146                         return;
147                 }
148
149                 print ("[%d] GOT PHOTO PAGE %s\n", handle, message.uri.path);
150
151                 MatchInfo match;
152                 if (downloader.re_pp_pim.match ((string) message.response_body.data, 0, out match)) {
153                         string url = match.fetch (1);
154                         print ("[%d] FOUND IMAGE URL: %s\n", handle, url);
155                         message.uri = new Soup.URI (url);
156                         session.queue_message (message, this.image_callback);
157                 }
158         }
159
160         private void image_callback (Soup.Session session, Soup.Message message) {
161                 if (cancelled ||
162                     message.status_code != Soup.KnownStatusCode.OK) {
163                         downloader.finished (this, null);
164                         return;
165                 }
166
167                 print ("[%d] Downloaded poster: %s\n", handle, message.uri.to_string (false));
168
169                 // Make sure the media-art directory is available
170                 DirUtils.create_with_parents (Path.get_dirname (poster_path), 0770);
171
172                 if (FileUtils.set_contents (poster_path + ".part",
173                                             (string) message.response_body.data,
174                                             (ssize_t) message.response_body.length)) {
175                         var pixbuf = new Gdk.Pixbuf.from_file (poster_path + ".part");
176                         int width = pixbuf.width;
177                         int height = pixbuf.height;
178
179                         if (width > Poster.WIDTH || height > Poster.HEIGHT) {
180                                 // Scale down
181                                 width = int.min (Poster.WIDTH,
182                                                  (pixbuf.width * height + pixbuf.height / 2) / pixbuf.height);
183                                 height = int.min ((pixbuf.height * width + pixbuf.width / 2) / pixbuf.width,
184                                                   Poster.HEIGHT);
185                                 pixbuf = pixbuf.scale_simple (width, height, Gdk.InterpType.BILINEAR);
186                                 pixbuf.save (poster_path + ".part", "jpeg", null);
187                         }
188
189                         FileUtils.rename (poster_path + ".part", poster_path);
190
191                         print ("[%d] Stored as: %s (%dx%d)\n", handle, poster_path, width, height);
192                         downloader.finished (this, poster_path);
193                 } else {
194                         stdout.printf ("[%d] Failed to store poster\n", handle);
195                         downloader.finished (this, null);
196                 }
197         }
198
199         public void cancel () {
200                 print ("[%d] Cancelled\n", handle);
201                 cancelled = true;
202         }
203 }
204
205 // The D-Bus service to manage poster downloads
206 public class IMDbPosterDownloader : Object, PosterDownloader {
207         private MainLoop loop;
208         private int fetch_handle = 1;
209         private List<IMDbPosterDownload> downloads = null;
210         private uint source_id;
211
212         public Soup.SessionAsync session;
213         public Regex re_tp_ppl;
214         public Regex re_tp_pth;
215         public Regex re_pp_pim;
216
217         public IMDbPosterDownloader () {
218                 loop = new MainLoop (null);
219
220                 session = new Soup.SessionAsync ();
221                 session.max_conns = 40;
222                 session.max_conns_per_host = 20;
223                 try {
224                         // Title page: photo page link
225                         re_tp_ppl = new Regex ("\"(/rg/action-box-title/primary-photo/media/[^\"]*)\"");
226                         // Title page: poster thumbnail
227                         re_tp_pth = new Regex ("<img[^>]*id=\"primary-poster\"[^>]*src=\"(http://ia.media-imdb.com/images[^\"]*)\"");
228                         // Photo page: poster image
229                         re_pp_pim = new Regex ("\"(http://ia.media-imdb.com/images[^\"]*)\"");
230                 } catch (RegexError e) {
231                 }
232         }
233
234         public void timeout_quit () {
235                 // With every change we reset the timer to 3min
236                 if (source_id != 0) {
237                         Source.remove (source_id);
238                 }
239                 source_id = Timeout.add_seconds (180, quit);
240         }
241
242         private bool quit () {
243                 loop.quit ();
244
245                 print ("Timeout. Quitting with %u remaining downloads.\n", downloads.length ());
246                 foreach (IMDbPosterDownload download in downloads)
247                         failed (download.handle);
248
249                 // One-shot only
250                 return false;
251         }
252
253         public void run () {
254                 loop.run ();
255         }
256
257         public void finished (IMDbPosterDownload download, string? poster_path) {
258                 if (poster_path != null)
259                         fetched (download.handle, poster_path);
260                 else
261                         failed (download.handle);
262                 downloads.remove (download);
263                 timeout_quit ();
264         }
265
266         // Implement the PosterDownloader interface
267         public int Fetch (string title, string year, string kind, int width, int height) throws DBus.Error {
268                 print ("Fetch (\"%s\", \"%s\", \"%s\", %d, %d) = %d\n", title, year, kind, width, height, fetch_handle+1);
269                 var download = new IMDbPosterDownload (title, year, width, height, ++fetch_handle, this);
270
271                 downloads.append (download);
272
273                 return fetch_handle;
274         }
275
276         public void Unqueue (int handle) throws DBus.Error {
277                 print ("Unqueue (%d)\n", handle);
278                 IMDbPosterDownload download = null;
279                 foreach (IMDbPosterDownload d in downloads) {
280                         if (d.handle == handle) {
281                                 download = d;
282                                 d.cancel ();
283                                 break;
284                         }
285                 }
286                 if (download != null) {
287                         downloads.remove (download);
288                 }
289         }
290
291         static void main () {
292                 try {
293                         var conn = DBus.Bus.get (DBus.BusType.SESSION);
294                         dynamic DBus.Object bus = conn.get_object ("org.freedesktop.DBus",
295                                                                    "/org/freedesktop/DBus",
296                                                                    "org.freedesktop.DBus");
297
298                         // Try to register service in session bus
299                         uint res = bus.request_name ("org.maemo.movieposter.IMDb", (uint) 0);
300                         if (res == DBus.RequestNameReply.PRIMARY_OWNER) {
301                                 // Start server
302                                 var server = new IMDbPosterDownloader ();
303                                 conn.register_object ("/org/maemo/movieposter/IMDb", server);
304
305                                 server.timeout_quit ();
306                                 server.run ();
307                         }
308                 } catch (Error e) {
309                         error ("Oops: %s\n", e.message);
310                 }
311         }
312 }