1 import urllib, threading, os, gzip, time, json, re
2 _DUMP_URL = '''http://img.jamendo.com/data/dbdump_artistalbumtrack.xml.gz'''
3 _DUMP = os.path.expanduser('''~/.cache/jamaendo/dbdump.xml.gz''')
6 os.makedirs(os.path.dirname(_DUMP))
11 return os.path.isfile(_DUMP)
13 def _file_is_old(fil, old_age):
14 return os.path.getmtime(fil) < (time.time() - old_age)
17 return not has_dump() or _file_is_old(_DUMP, 60*60*24) # 1 day
19 def refresh_dump(complete_callback, progress_callback=None, force=False):
20 if force or _dump_is_old():
21 downloader = Downloader(complete_callback, progress_callback)
26 class Downloader(threading.Thread):
27 def __init__(self, complete_callback, progress_callback):
28 threading.Thread.__init__(self)
29 self.complete_callback = complete_callback
30 self.progress_callback = progress_callback
32 def actual_callback(self, numblocks, blocksize, filesize):
33 if self.progress_callback:
35 percent = min((numblocks*blocksize*100)/filesize, 100)
38 self.progress_callback(percent)
41 urllib.urlretrieve(_DUMP_URL, _DUMP, self.actual_callback)
42 self.complete_callback()
44 def fast_iter(context, func):
45 for event, elem in context:
48 while elem.getprevious() is not None:
49 del elem.getparent()[0]
52 from lxml import etree
57 if isinstance(v, basestring):
58 return v.encode('utf-8')
61 return "{%s}" % (", ".join("%s=%s"%(k.encode('utf-8'), printable(v)) \
62 for k,v in self.__dict__.iteritems() if not k.startswith('_')))
69 self.fil = gzip.open(_DUMP)
74 def make_obj(self, element):
75 if element.text is not None and element.text != "":
80 setattr(ret, child.tag, self.make_obj(child))
83 def artist_walker(self):
84 for event, element in etree.iterparse(self.fil, tag="artist"):
85 yield self.make_obj(element)
87 while element.getprevious() is not None:
88 del element.getparent()[0]
91 def search_artists(self, substr):
92 substr = substr.lower()
93 #return [dir(artist) for artist in self.artist_walker() if artist.name.find(substr) > -1]
94 artist = self.artist_walker().next()