1 # This file came originally from
2 # Picard, the next-generation MusicBrainz tagger
3 # This specific file didn't contain a Copyright notice
4 # but the whole project is under GPL 2 or later
6 # Original project URL:
7 # https://code.launchpad.net/picard
10 # http://bazaar.launchpad.net/~garyvdm/picard/keep_copy/annotate/head%3A/picard/parsefilename.py
12 # Modifications by Ivan Frade <ivan.frade@gmail.com>
13 # Added new regular expresion
14 # Added function to clean a little bit the filename before processing
17 # ParseFilename - Infer metadata from filepath
18 # Copyright (C) 2008?, Picard
20 # This program is free software; you can redistribute it and/or modify
21 # it under the terms of the GNU General Public License as published by
22 # the Free Software Foundation; either version 2 of the License, or
23 # (at your option) any later version.
25 # This program is distributed in the hope that it will be useful,
26 # but WITHOUT ANY WARRANTY; without even the implied warranty of
27 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28 # GNU General Public License for more details.
30 # You should have received a copy of the GNU General Public License
31 # along with this program; if not, write to the Free Software Foundation,
32 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
37 # AlbumArtist/1999 - Album/01-TrackTitle.ext
38 re.compile(r"(?:.*(/|\\))?(?P<artist>.*)(/|\\)((?P<year>\d{4}) - )(?P<album>.*)(/|\\)(?P<tracknum>\d{2})-(?P<title>.*)\.(?:\w{2,5})$"),
39 # AlbumArtist - Album/01 - TrackTitle.ext
40 re.compile(r"(?:.*(/|\\))?(?P<artist>.*) - (?P<album>.*)(/|\\)(?P<tracknum>\d{2}) - (?P<title>.*)\.(?:\w{2,5})$"),
41 # AlbumArtist - Album/01-TrackTitle.ext
42 re.compile(r"(?:.*(/|\\))?(?P<artist>.*) - (?P<album>.*)(/|\\)(?P<tracknum>\d{2})-(?P<title>.*)\.(?:\w{2,5})$"),
43 # AlbumArtist - Album/01. TrackTitle.ext
44 re.compile(r"(?:.*(/|\\))?(?P<artist>.*) - (?P<album>.*)(/|\\)(?P<tracknum>\d{2})\. (?P<title>.*)\.(?:\w{2,5})$"),
45 # AlbumArtist - Album/01 TrackTitle.ext
46 re.compile(r"(?:.*(/|\\))?(?P<artist>.*) - (?P<album>.*)(/|\\)(?P<tracknum>\d{2}) (?P<title>.*)\.(?:\w{2,5})$"),
47 # AlbumArtist - Album/01_Artist_-_TrackTitle.ext
48 re.compile(r"(?:.*(/|\\))?(?P<albumartist>.*) - (?P<album>.*)(/|\\)(?P<tracknum>\d{2})_(?P<artist>.*)_-_(?P<title>.*)\.(?:\w{2,5})$"),
49 # Album/Artist - Album - 01 - TrackTitle.ext
50 re.compile(r"(?:.*(/|\\))?(?P<artist>.*)(/|\\)(?P=artist) - (?P<album>.*) - (?P<tracknum>\d{2}) - (?P<title>.*)\.(?:\w{2,5})$"),
51 # AlbumArtist/Album/Artist - 01 - TrackTitle.ext
52 re.compile(r"(?:.*(/|\\))?(?P<albumartist>.*)(/|\\)(?P<album>.*)(/|\\)(?P<artist>.*) - (?P<tracknum>\d{2}) - (?P<title>.*)\.(?:\w{2,5})$"),
53 # AlbumArtist/Album/01. Artist - TrackTitle.ext
54 re.compile(r"(?:.*(/|\\))?(?P<albumartist>.*)(/|\\)(?P<album>.*)(/|\\)(?P<tracknum>\d{2})\. (?P<artist>.*) - (?P<title>.*)\.(?:\w{2,5})$"),
55 # AlbumArtist/Album/01 - Artist - TrackTitle.ext
56 re.compile(r"(?:.*(/|\\))?(?P<albumartist>.*)(/|\\)(?P<album>.*)(/|\\)(?P<tracknum>\d{2}) - (?P<artist>.*) - (?P<title>.*)\.(?:\w{2,5})$"),
57 # AlbumArtist/Album/01 - TrackTitle.ext
58 re.compile(r"(?:.*(/|\\))?(?P<artist>.*)(/|\\)(?P<album>.*)(/|\\)(?P<tracknum>\d{2}) - (?P<title>.*)\.(?:\w{2,5})$"),
59 # AlbumArtist/Album/01. TrackTitle.ext
60 re.compile(r"(?:.*(/|\\))?(?P<artist>.*)(/|\\)(?P<album>.*)(/|\\)(?P<tracknum>\d{2})\. (?P<title>.*)\.(?:\w{2,5})$"),
61 # AlbumArtist/Album/01 TrackTitle.ext
62 re.compile(r"(?:.*(/|\\))?(?P<artist>.*)(/|\\)(?P<album>.*)(/|\\)(?P<tracknum>\d{2}) (?P<title>.*)\.(?:\w{2,5})$"),
63 # AlbumArtist/Album/Album-01-TrackTitle.ext
64 re.compile(r"(?:.*(/|\\))?(?P<albumartist>.*)(/|\\)(?P<album>.*)(/|\\)(?P=album)-(?P<tracknum>\d{2})-(?P<artist>.*)-(?P<title>.*)\.(?:\w{2,5})$"),
65 # AlbumArtist/Album/Album-01-Artist-TrackTitle.ext
66 re.compile(r"(?:.*(/|\\))?(?P<artist>.*)(/|\\)(?P<album>.*)(/|\\)(?P=album)-(?P<tracknum>\d{2})-(?P<title>.*)\.(?:\w{2,5})$"),
67 # AlbumArtist/Album/Artist-01-TrackTitle.ext
68 re.compile(r"(?:.*(/|\\))?(?P<albumartist>.*)(/|\\)(?P<album>.*)(/|\\)(?P<artist>.*)-(?P<tracknum>\d{2})-(?P<title>.*)\.(?:\w{2,5})$"),
69 # Artist/Album/TrackTitle.ext
70 re.compile(r"(?:.*(/|\\))?(?P<artist>.*)(/|\\)(?P<album>.*)(/|\\)(?P<title>.*)\.(?:\w{2,5})$"),
73 def parseFileName (filename):
74 for pattern in _patterns:
75 match = pattern.match(filename)
78 #print "Match with pattern ", counter
79 metadata["artist"] = match.group("artist")
80 metadata["title"] = match.group("title")
81 metadata["album"] = match.group("album")
87 Some brute force normalization
89 f = re.sub (r"\[.*\d\]", r"", filename) # [Disc 1]
90 f = re.sub (r"[Dd]isc \d", r"", f) # Disc 1
91 f = re.sub (r"-?[Cc][Dd]\ ?\d", r"", f) # CD1, CD 1 <- Maybe useful!
92 f = re.sub (r"([a-z0-9])([A-Z])([a-z0-9])", r"\1 \2\3", f) # PinkFloyd -> Pink Floyd
93 return f.replace ("_", " ").replace ("["," ").replace ("]", " ").replace ("-", " ").replace (" ", " ")
98 Test function. Crawl a directory trying to guess the metadata from the filepath
105 for (root, dirs, files) in os.walk (path):
107 if (f.endswith (".mp3")
108 or f.endswith (".m4a")
109 or f.endswith (".ogg")):
110 filename = os.path.join (root, f)
112 mdata = parseFileName (clean (filename))
116 print " Artist: ", mdata['artist']
117 print " Album :", mdata['album']
118 print " Title :", mdata['title']
120 print "aprox %d success out of %d" % (ok, ok+fail)
123 if __name__ == "__main__":
124 crawl ("/home/ivan/Desktop")
125 #crawl ("/media/IPOD/")