Refactor: retrieve the album art inside the dialog
[mussorgsky] / src / parsefilename.py
1 # This file came originally from
2 # Picard, the next-generation MusicBrainz tagger
3 #   This specific file didn't contain a Copyright notice
4 #   but the whole project is under GPL 2 or later
5 #
6 # Original project URL:
7 # https://code.launchpad.net/picard
8 #
9 # Original file URL:
10 # http://bazaar.launchpad.net/~garyvdm/picard/keep_copy/annotate/head%3A/picard/parsefilename.py
11 #
12 # Modifications by Ivan Frade <ivan.frade@gmail.com>
13 #   Added new regular expresion
14 #   Added function to clean a little bit the filename before processing
15 #
16
17 # ParseFilename - Infer metadata from filepath
18 # Copyright (C) 2008?, Picard 
19 #
20 # This program is free software; you can redistribute it and/or modify
21 # it under the terms of the GNU General Public License as published by
22 # the Free Software Foundation; either version 2 of the License, or
23 # (at your option) any later version.
24 #
25 # This program is distributed in the hope that it will be useful,
26 # but WITHOUT ANY WARRANTY; without even the implied warranty of
27 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
28 # GNU General Public License for more details.
29 #
30 # You should have received a copy of the GNU General Public License
31 # along with this program; if not, write to the Free Software Foundation,
32 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
33 #
34 import re
35
36 _patterns = [
37     # AlbumArtist/1999 - Album/01-TrackTitle.ext
38     re.compile(r"(?:.*(/|\\))?(?P<artist>.*)(/|\\)((?P<year>\d{4}) - )(?P<album>.*)(/|\\)(?P<tracknum>\d{2})-(?P<title>.*)\.(?:\w{2,5})$"),
39     # AlbumArtist - Album/01 - TrackTitle.ext
40     re.compile(r"(?:.*(/|\\))?(?P<artist>.*) - (?P<album>.*)(/|\\)(?P<tracknum>\d{2}) - (?P<title>.*)\.(?:\w{2,5})$"),
41     # AlbumArtist - Album/01-TrackTitle.ext
42     re.compile(r"(?:.*(/|\\))?(?P<artist>.*) - (?P<album>.*)(/|\\)(?P<tracknum>\d{2})-(?P<title>.*)\.(?:\w{2,5})$"),
43     # AlbumArtist - Album/01. TrackTitle.ext
44     re.compile(r"(?:.*(/|\\))?(?P<artist>.*) - (?P<album>.*)(/|\\)(?P<tracknum>\d{2})\. (?P<title>.*)\.(?:\w{2,5})$"),
45     # AlbumArtist - Album/01 TrackTitle.ext
46     re.compile(r"(?:.*(/|\\))?(?P<artist>.*) - (?P<album>.*)(/|\\)(?P<tracknum>\d{2}) (?P<title>.*)\.(?:\w{2,5})$"),
47     # AlbumArtist - Album/01_Artist_-_TrackTitle.ext
48     re.compile(r"(?:.*(/|\\))?(?P<albumartist>.*) - (?P<album>.*)(/|\\)(?P<tracknum>\d{2})_(?P<artist>.*)_-_(?P<title>.*)\.(?:\w{2,5})$"),
49     # Album/Artist - Album - 01 - TrackTitle.ext
50     re.compile(r"(?:.*(/|\\))?(?P<artist>.*)(/|\\)(?P=artist) - (?P<album>.*) - (?P<tracknum>\d{2}) - (?P<title>.*)\.(?:\w{2,5})$"),
51     # AlbumArtist/Album/Artist - 01 - TrackTitle.ext
52     re.compile(r"(?:.*(/|\\))?(?P<albumartist>.*)(/|\\)(?P<album>.*)(/|\\)(?P<artist>.*) - (?P<tracknum>\d{2}) - (?P<title>.*)\.(?:\w{2,5})$"),
53     # AlbumArtist/Album/01. Artist - TrackTitle.ext
54     re.compile(r"(?:.*(/|\\))?(?P<albumartist>.*)(/|\\)(?P<album>.*)(/|\\)(?P<tracknum>\d{2})\. (?P<artist>.*) - (?P<title>.*)\.(?:\w{2,5})$"),
55     # AlbumArtist/Album/01 - Artist - TrackTitle.ext
56     re.compile(r"(?:.*(/|\\))?(?P<albumartist>.*)(/|\\)(?P<album>.*)(/|\\)(?P<tracknum>\d{2}) - (?P<artist>.*) - (?P<title>.*)\.(?:\w{2,5})$"),
57     # AlbumArtist/Album/01 - TrackTitle.ext
58     re.compile(r"(?:.*(/|\\))?(?P<artist>.*)(/|\\)(?P<album>.*)(/|\\)(?P<tracknum>\d{2}) - (?P<title>.*)\.(?:\w{2,5})$"),
59     # AlbumArtist/Album/01. TrackTitle.ext
60     re.compile(r"(?:.*(/|\\))?(?P<artist>.*)(/|\\)(?P<album>.*)(/|\\)(?P<tracknum>\d{2})\. (?P<title>.*)\.(?:\w{2,5})$"),
61     # AlbumArtist/Album/01 TrackTitle.ext
62     re.compile(r"(?:.*(/|\\))?(?P<artist>.*)(/|\\)(?P<album>.*)(/|\\)(?P<tracknum>\d{2}) (?P<title>.*)\.(?:\w{2,5})$"),
63     # AlbumArtist/Album/Album-01-TrackTitle.ext
64     re.compile(r"(?:.*(/|\\))?(?P<albumartist>.*)(/|\\)(?P<album>.*)(/|\\)(?P=album)-(?P<tracknum>\d{2})-(?P<artist>.*)-(?P<title>.*)\.(?:\w{2,5})$"),
65     # AlbumArtist/Album/Album-01-Artist-TrackTitle.ext
66     re.compile(r"(?:.*(/|\\))?(?P<artist>.*)(/|\\)(?P<album>.*)(/|\\)(?P=album)-(?P<tracknum>\d{2})-(?P<title>.*)\.(?:\w{2,5})$"),
67     # AlbumArtist/Album/Artist-01-TrackTitle.ext
68     re.compile(r"(?:.*(/|\\))?(?P<albumartist>.*)(/|\\)(?P<album>.*)(/|\\)(?P<artist>.*)-(?P<tracknum>\d{2})-(?P<title>.*)\.(?:\w{2,5})$"),
69     # Artist/Album/TrackTitle.ext
70     re.compile(r"(?:.*(/|\\))?(?P<artist>.*)(/|\\)(?P<album>.*)(/|\\)(?P<title>.*)\.(?:\w{2,5})$"),
71 ]
72
73 def parseFileName (filename):
74     for pattern in _patterns:
75         match = pattern.match(filename)
76         if match:
77             metadata = {}
78             #print "Match with pattern ", counter
79             metadata["artist"] = match.group("artist")
80             metadata["title"] = match.group("title")
81             metadata["album"] = match.group("album")
82             return metadata
83     return None
84         
85 def clean (filename):
86     """
87     Some brute force normalization
88     """
89     f = re.sub (r"\[.*\d\]", r"", filename) # [Disc 1]
90     f = re.sub (r"[Dd]isc \d", r"", f)  # Disc 1
91     f = re.sub (r"-?[Cc][Dd]\ ?\d", r"", f) # CD1, CD 1 <- Maybe useful!
92     f = re.sub (r"([a-z0-9])([A-Z])([a-z0-9])", r"\1 \2\3", f) # PinkFloyd -> Pink Floyd
93     return f.replace ("_", " ").replace ("["," ").replace ("]", " ").replace ("-", " ").replace ("  ", " ")
94
95
96 def crawl (path):
97     """
98     Test function. Crawl a directory trying to guess the metadata from the filepath
99     for all music files.
100     """
101     import os
102     print path
103     fail = 0
104     ok = 0
105     for (root, dirs, files) in os.walk (path):
106         for f in files:
107             if (f.endswith (".mp3")
108                 or f.endswith (".m4a")
109                 or f.endswith (".ogg")):
110                 filename = os.path.join (root, f)
111                 print filename
112                 mdata = parseFileName (clean (filename))
113                 if (not mdata):
114                     fail += 1
115                 else:
116                     print "  Artist: ", mdata['artist']
117                     print "  Album :", mdata['album']
118                     print "  Title :", mdata['title']
119                     ok += 1
120     print "aprox %d success out of %d" % (ok, ok+fail)
121         
122
123 if __name__ == "__main__":
124     crawl ("/home/ivan/Desktop")
125     #crawl ("/media/IPOD/")