Get only JPEG images for thumbnails
[mussorgsky] / src / album_art.py
1 #!/usr/bin/env python2.5
2 import urllib2, urllib
3 import os
4 from album_art_spec import getCoverArtFileName, getCoverArtThumbFileName, get_thumb_filename_for_path
5 import dbus, time
6 import string
7
8 try:
9     import libxml2
10     libxml_available = True
11 except ImportError:
12     libxml_available = False
13
14 try:
15     import PIL
16     import Image
17     pil_available = True
18 except ImportError:
19     pil_available = False
20     
21
22 LASTFM_APIKEY = "1e1d53528c86406757a6887addef0ace"
23 BASE_LASTFM = "http://ws.audioscrobbler.com/2.0/?method=album.getinfo"
24
25
26 BASE_MSN = "http://www.bing.com/images/search?q="
27 MSN_MEDIUM = "+filterui:imagesize-medium"
28 MSN_SMALL = "+filterui:imagesize-medium"
29 MSN_SQUARE = "+filterui:aspect-square"
30 MSN_PHOTO = "+filterui:photo-graphics"
31
32 # LastFM:
33 # http://www.lastfm.es/api/show?service=290
34 #
35 class MussorgskyAlbumArt:
36
37     def __init__ (self):
38         bus = dbus.SessionBus ()
39         handle = time.time()
40         if (pil_available):
41             self.thumbnailer = LocalThumbnailer ()
42         else:
43             try:
44                 self.thumbnailer = bus.get_object ('org.freedesktop.thumbnailer',
45                                                    '/org/freedesktop/thumbnailer/Generic')
46             except dbus.exceptions.DBusException:
47                 print "No thumbnailer available"
48                 self.thumbnailer = None
49
50     def get_album_art (self, artist, album):
51         """
52         Return a tuple (album_art, thumbnail_album_art)
53         """
54         filename = getCoverArtFileName (album)
55         thumbnail = getCoverArtThumbFileName (album)
56
57         if (os.path.exists (filename)):
58             print "Album art already there " + filename
59         else:
60             online_resource = self.__msn_images (artist, album)
61             if (online_resource):
62                 print "Choosed:", online_resource
63                 content = self.__get_url (online_resource)
64                 if (content):
65                     print "Albumart: %s " % (filename)
66                     self.__save_content_into_file (content, filename)
67                 else:
68                     return (None, None)
69             else:
70                 return (None, None)
71
72         if (os.path.exists (thumbnail)):
73             print "Thumbnail exists " + thumbnail
74         else:
75             if (not self.__request_thumbnail (filename)):
76                 print "Failed doing thumbnail. Probably album art is not an image!"
77                 os.remove (filename)
78                 return (None, None)
79             
80         return (filename, thumbnail)
81
82     def __last_fm (self, artist, album):
83
84         if (not libxml_available):
85             return None
86         
87         if (not album or len (album) < 1):
88             return None
89         
90         URL = BASE_LASTFM + "&api_key=" + LASTFM_APIKEY
91         if (artist and len(artist) > 1):
92             URL += "&artist=" + urllib.quote(artist)
93         if (album):
94             URL += "&album=" + urllib.quote(album)
95             
96         print "Retrieving: %s" % (URL)
97         result = self.__get_url (URL)
98         if (not result):
99             return None
100         doc = libxml2.parseDoc (result)
101         image_nodes = doc.xpathEval ("//image[@size='large']")
102         if len (image_nodes) < 1:
103             return None
104         else:
105             return image_nodes[0].content
106
107     def __msn_images (self, artist, album):
108
109         good_artist = self.__clean_string_for_search (artist)
110         good_album = self.__clean_string_for_search (album)
111
112         if (good_album and good_artist):
113             full_try = BASE_MSN + good_album + "+" + good_artist + MSN_MEDIUM + MSN_SQUARE
114             print "Searching (album + artist): %s" % (full_try)
115             result = self.__get_url (full_try)
116             if (result):
117                 return self.__get_first_url_from_msn_results_page (result)
118
119         if (album):
120             if (album.lower ().find ("greatest hit") != -1):
121                 print "Ignoring '%s': too generic" % (album)
122                 pass
123             else:
124                 album_try = BASE_MSN + good_album + MSN_MEDIUM + MSN_SQUARE
125                 print "Searching (album): %s" % (album_try)
126                 result = self.__get_url (album_try)
127                 if (result):
128                     return self.__get_first_url_from_msn_results_page (result)
129
130         if (artist):
131             artist_try = BASE_MSN + good_artist + "+CD+music"  + MSN_SMALL + MSN_SQUARE + MSN_PHOTO
132             print "Searching (artist CD): %s" % (artist_try)
133             result = self.__get_url (artist_try)
134             if (result):
135                 return self.__get_first_url_from_msn_results_page (result)
136             
137         return None
138
139
140     def __get_first_url_from_msn_results_page (self, page):
141
142         current_option = None
143         starting_at = 0
144         security_limit = 20
145
146         while (security_limit > 0):
147             # Iterate until find a jpeg
148             start = page.find ("furl=", starting_at)
149             if (start == -1):
150                 return None
151             end = page.find ("\"", start + len ("furl="))
152             current_option = page [start + len ("furl="): end].replace ("amp;", "")
153             if (current_option.lower().endswith (".jpg") or
154                 current_option.lower().endswith (".jpeg")):
155                 return current_option
156             starting_at = end
157             security_limit -= 1
158         return None
159             
160         
161
162     def __clean_string_for_search (self, text):
163         if (not text or len (text) < 1):
164             return None
165             
166         bad_stuff = "_:?\\-~"
167         clean = text
168         for c in bad_stuff:
169             clean = clean.replace (c, " ")
170
171         clean.replace ("/", "%2F")
172         clean = clean.replace (" CD1", "").replace(" CD2", "")
173         return urllib.quote(clean)
174
175     def __save_content_into_file (self, content, filename):
176         output = open (filename, 'w')
177         output.write (content)
178         output.close ()
179         
180     def __get_url (self, url):
181         request = urllib2.Request (url)
182         request.add_header ('User-Agent', 'Mussorgsky/0.1 Test')
183         opener = urllib2.build_opener ()
184         try:
185             return opener.open (request).read ()
186         except:
187             return None
188
189     def __request_thumbnail (self, filename):
190         if (not self.thumbnailer):
191             print "No thumbnailer available"
192             return
193         uri = "file://" + filename
194         handle = time.time ()
195         return self.thumbnailer.Queue ([uri], ["image/jpeg"], dbus.UInt32 (handle))
196             
197
198
199 class LocalThumbnailer:
200     def __init__ (self):
201         self.THUMBNAIL_SIZE = (124,124)
202
203     def Queue (self, uris, mimes, handle):
204         for i in range (0, len(uris)):
205             uri = uris[i]
206             fullCoverFileName = uri[7:]
207             if (os.path.exists (fullCoverFileName)):
208                 thumbFile = get_thumb_filename_for_path (fullCoverFileName)
209                 try:
210                     image = Image.open (fullCoverFileName)
211                     image = image.resize (self.THUMBNAIL_SIZE, Image.ANTIALIAS )
212                     image.save( thumbFile, "JPEG" )
213                     print "Thumbnail: " + thumbFile
214                 except IOError, e:
215                     print e
216                     return False
217         return True
218             
219
220
221 if __name__ == "__main__":
222     import sys
223     from optparse import OptionParser
224
225     parser = OptionParser()
226     parser.add_option ("-p", "--print", dest="print_paths",
227                        action="store_true", default=True,
228                        help="Print the destination paths")
229     parser.add_option ("-r", "--retrieve", dest="retrieve",
230                        action="store_true", default=False,
231                        help="Try to retrieve the online content")
232     parser.add_option ("-a", "--artist", dest="artist", type="string",
233                        help="ARTIST to look for", metavar="ARTIST")
234     parser.add_option ("-b", "--album", dest="album", type="string",
235                        help="ALBUM to look for", metavar="ALBUM")
236
237     (options, args) = parser.parse_args ()
238     print options
239     if (not options.artist and not options.album):
240         parser.print_help ()
241         sys.exit (-1)
242         
243     if options.print_paths and not options.retrieve:
244         print "Album art:", getCoverArtFileName (options.album)
245         print "Thumbnail:", getCoverArtThumbFileName (options.album)
246
247         
248     if options.retrieve:
249         maa = MussorgskyAlbumArt ()
250         maa.get_album_art (options.artist, options.album)