From: Ivan Frade Date: Thu, 20 Aug 2009 20:49:38 +0000 (+0300) Subject: Code from pickar to infer metadata from file path X-Git-Tag: mussorgsky-0.2-1~17 X-Git-Url: https://vcs.maemo.org/git/?a=commitdiff_plain;h=852fffc61c69e1236b09b53471265f8c1fbd4635;p=mussorgsky Code from pickar to infer metadata from file path Pack of regular expressions to get artist/album/title from the path. Still not used in the code, but can be helpful. --- diff --git a/src/parsefilename.py b/src/parsefilename.py new file mode 100644 index 0000000..a4b7524 --- /dev/null +++ b/src/parsefilename.py @@ -0,0 +1,125 @@ +# This file came originally from +# Picard, the next-generation MusicBrainz tagger +# This specific file didn't contain a Copyright notice +# but the whole project is under GPL 2 or later +# +# Original project URL: +# https://code.launchpad.net/picard +# +# Original file URL: +# http://bazaar.launchpad.net/~garyvdm/picard/keep_copy/annotate/head%3A/picard/parsefilename.py +# +# Modifications by Ivan Frade +# Added new regular expresion +# Added function to clean a little bit the filename before processing +# + +# ParseFilename - Infer metadata from filepath +# Copyright (C) 2008?, Picard +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +# +import re + +_patterns = [ + # AlbumArtist/1999 - Album/01-TrackTitle.ext + re.compile(r"(?:.*(/|\\))?(?P.*)(/|\\)((?P\d{4}) - )(?P.*)(/|\\)(?P\d{2})-(?P.*)\.(?:\w{2,5})$"), + # AlbumArtist - Album/01 - TrackTitle.ext + re.compile(r"(?:.*(/|\\))?(?P<artist>.*) - (?P<album>.*)(/|\\)(?P<tracknum>\d{2}) - (?P<title>.*)\.(?:\w{2,5})$"), + # AlbumArtist - Album/01-TrackTitle.ext + re.compile(r"(?:.*(/|\\))?(?P<artist>.*) - (?P<album>.*)(/|\\)(?P<tracknum>\d{2})-(?P<title>.*)\.(?:\w{2,5})$"), + # AlbumArtist - Album/01. TrackTitle.ext + re.compile(r"(?:.*(/|\\))?(?P<artist>.*) - (?P<album>.*)(/|\\)(?P<tracknum>\d{2})\. (?P<title>.*)\.(?:\w{2,5})$"), + # AlbumArtist - Album/01 TrackTitle.ext + re.compile(r"(?:.*(/|\\))?(?P<artist>.*) - (?P<album>.*)(/|\\)(?P<tracknum>\d{2}) (?P<title>.*)\.(?:\w{2,5})$"), + # AlbumArtist - Album/01_Artist_-_TrackTitle.ext + re.compile(r"(?:.*(/|\\))?(?P<albumartist>.*) - (?P<album>.*)(/|\\)(?P<tracknum>\d{2})_(?P<artist>.*)_-_(?P<title>.*)\.(?:\w{2,5})$"), + # Album/Artist - Album - 01 - TrackTitle.ext + re.compile(r"(?:.*(/|\\))?(?P<artist>.*)(/|\\)(?P=artist) - (?P<album>.*) - (?P<tracknum>\d{2}) - (?P<title>.*)\.(?:\w{2,5})$"), + # AlbumArtist/Album/Artist - 01 - TrackTitle.ext + re.compile(r"(?:.*(/|\\))?(?P<albumartist>.*)(/|\\)(?P<album>.*)(/|\\)(?P<artist>.*) - (?P<tracknum>\d{2}) - (?P<title>.*)\.(?:\w{2,5})$"), + # AlbumArtist/Album/01. Artist - TrackTitle.ext + re.compile(r"(?:.*(/|\\))?(?P<albumartist>.*)(/|\\)(?P<album>.*)(/|\\)(?P<tracknum>\d{2})\. (?P<artist>.*) - (?P<title>.*)\.(?:\w{2,5})$"), + # AlbumArtist/Album/01 - Artist - TrackTitle.ext + re.compile(r"(?:.*(/|\\))?(?P<albumartist>.*)(/|\\)(?P<album>.*)(/|\\)(?P<tracknum>\d{2}) - (?P<artist>.*) - (?P<title>.*)\.(?:\w{2,5})$"), + # AlbumArtist/Album/01 - TrackTitle.ext + re.compile(r"(?:.*(/|\\))?(?P<artist>.*)(/|\\)(?P<album>.*)(/|\\)(?P<tracknum>\d{2}) - (?P<title>.*)\.(?:\w{2,5})$"), + # AlbumArtist/Album/01. TrackTitle.ext + re.compile(r"(?:.*(/|\\))?(?P<artist>.*)(/|\\)(?P<album>.*)(/|\\)(?P<tracknum>\d{2})\. (?P<title>.*)\.(?:\w{2,5})$"), + # AlbumArtist/Album/01 TrackTitle.ext + re.compile(r"(?:.*(/|\\))?(?P<artist>.*)(/|\\)(?P<album>.*)(/|\\)(?P<tracknum>\d{2}) (?P<title>.*)\.(?:\w{2,5})$"), + # AlbumArtist/Album/Album-01-TrackTitle.ext + re.compile(r"(?:.*(/|\\))?(?P<albumartist>.*)(/|\\)(?P<album>.*)(/|\\)(?P=album)-(?P<tracknum>\d{2})-(?P<artist>.*)-(?P<title>.*)\.(?:\w{2,5})$"), + # AlbumArtist/Album/Album-01-Artist-TrackTitle.ext + re.compile(r"(?:.*(/|\\))?(?P<artist>.*)(/|\\)(?P<album>.*)(/|\\)(?P=album)-(?P<tracknum>\d{2})-(?P<title>.*)\.(?:\w{2,5})$"), + # AlbumArtist/Album/Artist-01-TrackTitle.ext + re.compile(r"(?:.*(/|\\))?(?P<albumartist>.*)(/|\\)(?P<album>.*)(/|\\)(?P<artist>.*)-(?P<tracknum>\d{2})-(?P<title>.*)\.(?:\w{2,5})$"), + # Artist/Album/TrackTitle.ext + re.compile(r"(?:.*(/|\\))?(?P<artist>.*)(/|\\)(?P<album>.*)(/|\\)(?P<title>.*)\.(?:\w{2,5})$"), +] + +def parseFileName (filename): + for pattern in _patterns: + match = pattern.match(filename) + if match: + metadata = {} + #print "Match with pattern ", counter + metadata["artist"] = match.group("artist") + metadata["title"] = match.group("title") + metadata["album"] = match.group("album") + return metadata + return None + +def clean (filename): + """ + Some brute force normalization + """ + f = re.sub (r"\[.*\d\]", r"", filename) # [Disc 1] + f = re.sub (r"[Dd]isc \d", r"", f) # Disc 1 + f = re.sub (r"-?[Cc][Dd]\ ?\d", r"", f) # CD1, CD 1 <- Maybe useful! + f = re.sub (r"([a-z0-9])([A-Z])([a-z0-9])", r"\1 \2\3", f) # PinkFloyd -> Pink Floyd + return f.replace ("_", " ").replace ("["," ").replace ("]", " ").replace ("-", " ").replace (" ", " ") + + +def crawl (path): + """ + Test function. Crawl a directory trying to guess the metadata from the filepath + for all music files. + """ + import os + print path + fail = 0 + ok = 0 + for (root, dirs, files) in os.walk (path): + for f in files: + if (f.endswith (".mp3") + or f.endswith (".m4a") + or f.endswith (".ogg")): + filename = os.path.join (root, f) + print filename + mdata = parseFileName (clean (filename)) + if (not mdata): + fail += 1 + else: + print " Artist: ", mdata['artist'] + print " Album :", mdata['album'] + print " Title :", mdata['title'] + ok += 1 + print "aprox %d success out of %d" % (ok, ok+fail) + + +if __name__ == "__main__": + crawl ("/home/ivan/Desktop") + #crawl ("/media/IPOD/")