funkwhale/api/funkwhale_api/music/metadata.py

330 wiersze
9.3 KiB
Python
Czysty Zwykły widok Historia

import datetime
import logging
2018-04-21 14:01:42 +00:00
import mutagen
import pendulum
2018-06-10 08:55:16 +00:00
from django import forms
logger = logging.getLogger(__name__)
NODEFAULT = object()
class TagNotFound(KeyError):
pass
class UnsupportedTag(KeyError):
pass
class ParseError(ValueError):
pass
def get_id3_tag(f, k):
2018-06-09 13:36:16 +00:00
if k == "pictures":
return f.tags.getall("APIC")
# First we try to grab the standard key
try:
return f.tags[k].text[0]
except KeyError:
pass
# then we fallback on parsing non standard tags
2018-06-09 13:36:16 +00:00
all_tags = f.tags.getall("TXXX")
try:
2018-06-09 13:36:16 +00:00
matches = [t for t in all_tags if t.desc.lower() == k.lower()]
return matches[0].text[0]
except (KeyError, IndexError):
raise TagNotFound(k)
def clean_id3_pictures(apic):
pictures = []
for p in list(apic):
2018-06-09 13:36:16 +00:00
pictures.append(
{
"mimetype": p.mime,
"content": p.data,
"description": p.desc,
"type": p.type.real,
}
)
return pictures
def get_flac_tag(f, k):
2018-06-09 13:36:16 +00:00
if k == "pictures":
return f.pictures
try:
return f.get(k, [])[0]
except (KeyError, IndexError):
raise TagNotFound(k)
def clean_flac_pictures(apic):
pictures = []
for p in list(apic):
2018-06-09 13:36:16 +00:00
pictures.append(
{
"mimetype": p.mime,
"content": p.data,
"description": p.desc,
"type": p.type.real,
}
)
return pictures
def get_mp3_recording_id(f, k):
try:
2018-06-09 13:36:16 +00:00
return [t for t in f.tags.getall("UFID") if "musicbrainz.org" in t.owner][
0
].data.decode("utf-8")
except IndexError:
raise TagNotFound(k)
def convert_track_number(v):
try:
return int(v)
except ValueError:
# maybe the position is of the form "1/4"
pass
try:
2018-06-09 13:36:16 +00:00
return int(v.split("/")[0])
except (ValueError, AttributeError, IndexError):
pass
2018-04-21 14:01:42 +00:00
class FirstUUIDField(forms.UUIDField):
def to_python(self, value):
try:
2018-09-23 12:38:42 +00:00
# sometimes, Picard leaves two uuids in the field, separated
# by a slash or a ;
value = value.split(";")[0].split("/")[0].strip()
except (AttributeError, IndexError, TypeError):
pass
return super().to_python(value)
def get_date(value):
ADDITIONAL_FORMATS = ["%Y-%d-%m %H:%M"] # deezer date format
try:
parsed = pendulum.parse(str(value))
return datetime.date(parsed.year, parsed.month, parsed.day)
except pendulum.exceptions.ParserError:
pass
for date_format in ADDITIONAL_FORMATS:
try:
parsed = datetime.datetime.strptime(value, date_format)
except ValueError:
continue
else:
return datetime.date(parsed.year, parsed.month, parsed.day)
raise ParseError("{} cannot be parsed as a date".format(value))
2018-09-23 12:38:42 +00:00
def split_and_return_first(separator):
def inner(v):
return v.split(separator)[0].strip()
return inner
2018-04-21 14:01:42 +00:00
VALIDATION = {
2018-06-09 13:36:16 +00:00
"musicbrainz_artistid": FirstUUIDField(),
"musicbrainz_albumid": FirstUUIDField(),
"musicbrainz_recordingid": FirstUUIDField(),
2018-09-23 12:38:42 +00:00
"musicbrainz_albumartistid": FirstUUIDField(),
2018-04-21 14:01:42 +00:00
}
CONF = {
"OggOpus": {
"getter": lambda f, k: f[k][0],
"fields": {
"track_number": {
"field": "TRACKNUMBER",
"to_application": convert_track_number,
},
"title": {},
"artist": {},
2018-09-23 12:38:42 +00:00
"album_artist": {
"field": "albumartist",
"to_application": split_and_return_first(";"),
},
"album": {},
"date": {"field": "date", "to_application": get_date},
"musicbrainz_albumid": {},
"musicbrainz_artistid": {},
2018-09-23 12:38:42 +00:00
"musicbrainz_albumartistid": {},
"musicbrainz_recordingid": {"field": "musicbrainz_trackid"},
},
},
2018-06-09 13:36:16 +00:00
"OggVorbis": {
"getter": lambda f, k: f[k][0],
"fields": {
"track_number": {
"field": "TRACKNUMBER",
"to_application": convert_track_number,
},
"title": {},
"artist": {},
2018-09-23 12:38:42 +00:00
"album_artist": {
"field": "albumartist",
"to_application": split_and_return_first(";"),
},
2018-06-09 13:36:16 +00:00
"album": {},
"date": {"field": "date", "to_application": get_date},
2018-06-09 13:36:16 +00:00
"musicbrainz_albumid": {},
"musicbrainz_artistid": {},
2018-09-23 12:38:42 +00:00
"musicbrainz_albumartistid": {},
2018-06-09 13:36:16 +00:00
"musicbrainz_recordingid": {"field": "musicbrainz_trackid"},
},
},
2018-06-09 13:36:16 +00:00
"OggTheora": {
"getter": lambda f, k: f[k][0],
"fields": {
"track_number": {
"field": "TRACKNUMBER",
"to_application": convert_track_number,
},
"title": {},
"artist": {},
2018-09-23 12:38:42 +00:00
"album_artist": {"field": "albumartist"},
2018-06-09 13:36:16 +00:00
"album": {},
"date": {"field": "date", "to_application": get_date},
2018-06-09 13:36:16 +00:00
"musicbrainz_albumid": {"field": "MusicBrainz Album Id"},
"musicbrainz_artistid": {"field": "MusicBrainz Artist Id"},
2018-09-23 12:38:42 +00:00
"musicbrainz_albumartistid": {"field": "MusicBrainz Album Artist Id"},
2018-06-09 13:36:16 +00:00
"musicbrainz_recordingid": {"field": "MusicBrainz Track Id"},
},
},
2018-06-09 13:36:16 +00:00
"MP3": {
"getter": get_id3_tag,
"clean_pictures": clean_id3_pictures,
"fields": {
"track_number": {"field": "TRCK", "to_application": convert_track_number},
"title": {"field": "TIT2"},
"artist": {"field": "TPE1"},
2018-09-23 12:38:42 +00:00
"album_artist": {"field": "TPE2"},
2018-06-09 13:36:16 +00:00
"album": {"field": "TALB"},
"date": {"field": "TDRC", "to_application": get_date},
2018-06-09 13:36:16 +00:00
"musicbrainz_albumid": {"field": "MusicBrainz Album Id"},
"musicbrainz_artistid": {"field": "MusicBrainz Artist Id"},
2018-09-23 12:38:42 +00:00
"musicbrainz_albumartistid": {"field": "MusicBrainz Album Artist Id"},
2018-06-09 13:36:16 +00:00
"musicbrainz_recordingid": {
"field": "UFID",
"getter": get_mp3_recording_id,
},
"pictures": {},
},
},
2018-06-09 13:36:16 +00:00
"FLAC": {
"getter": get_flac_tag,
"clean_pictures": clean_flac_pictures,
"fields": {
"track_number": {
"field": "tracknumber",
"to_application": convert_track_number,
},
"title": {},
"artist": {},
2018-09-23 12:38:42 +00:00
"album_artist": {"field": "albumartist"},
2018-06-09 13:36:16 +00:00
"album": {},
"date": {"field": "date", "to_application": get_date},
2018-06-09 13:36:16 +00:00
"musicbrainz_albumid": {},
"musicbrainz_artistid": {},
2018-09-23 12:38:42 +00:00
"musicbrainz_albumartistid": {},
2018-06-09 13:36:16 +00:00
"musicbrainz_recordingid": {"field": "musicbrainz_trackid"},
"test": {},
"pictures": {},
},
},
}
2018-09-23 12:38:42 +00:00
ALL_FIELDS = [
"track_number",
"title",
"artist",
"album_artist",
"album",
"date",
"musicbrainz_albumid",
"musicbrainz_artistid",
"musicbrainz_albumartistid",
"musicbrainz_recordingid",
]
class Metadata(object):
def __init__(self, path):
self._file = mutagen.File(path)
if self._file is None:
2018-06-09 13:36:16 +00:00
raise ValueError("Cannot parse metadata from {}".format(path))
ft = self.get_file_type(self._file)
try:
self._conf = CONF[ft]
except KeyError:
2018-06-09 13:36:16 +00:00
raise ValueError("Unsupported format {}".format(ft))
def get_file_type(self, f):
return f.__class__.__name__
def get(self, key, default=NODEFAULT):
try:
2018-06-09 13:36:16 +00:00
field_conf = self._conf["fields"][key]
except KeyError:
2018-06-09 13:36:16 +00:00
raise UnsupportedTag("{} is not supported for this file format".format(key))
real_key = field_conf.get("field", key)
try:
2018-06-09 13:36:16 +00:00
getter = field_conf.get("getter", self._conf["getter"])
v = getter(self._file, real_key)
except KeyError:
if default == NODEFAULT:
raise TagNotFound(real_key)
return default
2018-06-09 13:36:16 +00:00
converter = field_conf.get("to_application")
if converter:
v = converter(v)
2018-04-21 14:01:42 +00:00
field = VALIDATION.get(key)
if field:
v = field.to_python(v)
return v
def all(self, ignore_parse_errors=True):
2018-09-23 12:38:42 +00:00
"""
Return a dict containing all metadata of the file
"""
data = {}
for field in ALL_FIELDS:
try:
data[field] = self.get(field, None)
except (TagNotFound, forms.ValidationError):
data[field] = None
except ParseError as e:
if not ignore_parse_errors:
raise
logger.warning("Unparsable field {}: {}".format(field, str(e)))
data[field] = None
2018-09-23 12:38:42 +00:00
return data
2018-06-09 13:36:16 +00:00
def get_picture(self, picture_type="cover_front"):
ptype = getattr(mutagen.id3.PictureType, picture_type.upper())
try:
2018-06-09 13:36:16 +00:00
pictures = self.get("pictures")
except (UnsupportedTag, TagNotFound):
return
2018-06-09 13:36:16 +00:00
cleaner = self._conf.get("clean_pictures", lambda v: v)
pictures = cleaner(pictures)
for p in pictures:
2018-06-09 13:36:16 +00:00
if p["type"] == ptype:
return p