Merge branch '288-cli-perf' into 'develop'

Resolve "Improve CLI import performance by removing duplicated MBID queries"

Closes #288 and #237

See merge request funkwhale/funkwhale!234
merge-requests/237/head
Eliot Berriot 2018-06-06 19:50:13 +00:00
commit a4eda7efad
6 zmienionych plików z 180 dodań i 27 usunięć

Wyświetl plik

@ -334,6 +334,11 @@ class TrackQuerySet(models.QuerySet):
.prefetch_related('files'))
def get_artist(release_list):
return Artist.get_or_create_from_api(
mbid=release_list[0]['artist-credits'][0]['artists']['id'])[0]
class Track(APIModelMixin):
title = models.CharField(max_length=255)
artist = models.ForeignKey(
@ -363,8 +368,9 @@ class Track(APIModelMixin):
'musicbrainz_field_name': 'title'
},
'artist': {
'musicbrainz_field_name': 'artist-credit',
'converter': lambda v: Artist.get_or_create_from_api(mbid=v[0]['artist']['id'])[0],
# we use the artist from the release to avoid #237
'musicbrainz_field_name': 'release-list',
'converter': get_artist,
},
'album': {
'musicbrainz_field_name': 'release-list',
@ -431,7 +437,40 @@ class Track(APIModelMixin):
title__iexact=title,
defaults=kwargs)
@classmethod
def get_or_create_from_release(cls, release_mbid, mbid):
release_mbid = str(release_mbid)
mbid = str(mbid)
try:
return cls.objects.get(mbid=mbid), False
except cls.DoesNotExist:
pass
album = Album.get_or_create_from_api(release_mbid)[0]
data = musicbrainz.client.api.releases.get(
str(album.mbid), includes=Album.api_includes)
tracks = [
t
for m in data['release']['medium-list']
for t in m['track-list']
]
track_data = None
for track in tracks:
if track['recording']['id'] == mbid:
track_data = track
break
if not track_data:
raise ValueError('No track found matching this ID')
return cls.objects.update_or_create(
mbid=mbid,
defaults={
'position': int(track['position']),
'title': track['recording']['title'],
'album': album,
'artist': album.artist,
}
)
class TrackFile(models.Model):
uuid = models.UUIDField(
unique=True, db_index=True, default=uuid.uuid4)

Wyświetl plik

@ -12,25 +12,43 @@ from funkwhale_api.music import models, metadata
@transaction.atomic
def import_track_data_from_path(path):
data = metadata.Metadata(path)
artist = models.Artist.objects.get_or_create(
name__iexact=data.get('artist'),
defaults={
'name': data.get('artist'),
'mbid': data.get('musicbrainz_artistid', None),
},
)[0]
album = None
track_mbid = data.get('musicbrainz_recordingid', None)
album_mbid = data.get('musicbrainz_albumid', None)
if album_mbid and track_mbid:
# to gain performance and avoid additional mb lookups,
# we import from the release data, which is already cached
return models.Track.get_or_create_from_release(
album_mbid, track_mbid)[0]
elif track_mbid:
return models.Track.get_or_create_from_api(track_mbid)[0]
elif album_mbid:
album = models.Album.get_or_create_from_api(album_mbid)[0]
artist = album.artist if album else None
artist_mbid = data.get('musicbrainz_artistid', None)
if not artist:
if artist_mbid:
artist = models.Artist.get_or_create_from_api(artist_mbid)[0]
else:
artist = models.Artist.objects.get_or_create(
name__iexact=data.get('artist'),
defaults={
'name': data.get('artist'),
},
)[0]
release_date = data.get('date', default=None)
album = models.Album.objects.get_or_create(
title__iexact=data.get('album'),
artist=artist,
defaults={
'title': data.get('album'),
'release_date': release_date,
'mbid': data.get('musicbrainz_albumid', None),
},
)[0]
if not album:
album = models.Album.objects.get_or_create(
title__iexact=data.get('album'),
artist=artist,
defaults={
'title': data.get('album'),
'release_date': release_date,
},
)[0]
position = data.get('track_number', default=None)
track = models.Track.objects.get_or_create(
title__iexact=data.get('title'),
@ -38,7 +56,6 @@ def import_track_data_from_path(path):
defaults={
'title': data.get('title'),
'position': position,
'mbid': data.get('musicbrainz_recordingid', None),
},
)[0]
return track

Wyświetl plik

@ -43,6 +43,53 @@ def test_import_album_stores_release_group(factories):
assert album.artist == artist
def test_import_track_from_release(factories, mocker):
album = factories['music.Album'](
mbid='430347cb-0879-3113-9fde-c75b658c298e')
album_data = {
'release': {
'id': album.mbid,
'title': 'Daydream Nation',
'status': 'Official',
'medium-count': 1,
'medium-list': [
{
'position': '1',
'format': 'CD',
'track-list': [
{
'id': '03baca8b-855a-3c05-8f3d-d3235287d84d',
'position': '4',
'number': '4',
'length': '417973',
'recording': {
'id': '2109e376-132b-40ad-b993-2bb6812e19d4',
'title': 'Teen Age Riot',
'length': '417973'},
'track_or_recording_length': '417973'
}
],
'track-count': 1
}
],
}
}
mocked_get = mocker.patch(
'funkwhale_api.musicbrainz.api.releases.get',
return_value=album_data)
track_data = album_data['release']['medium-list'][0]['track-list'][0]
track = models.Track.get_or_create_from_release(
'430347cb-0879-3113-9fde-c75b658c298e',
track_data['recording']['id'],
)[0]
mocked_get.assert_called_once_with(
album.mbid, includes=models.Album.api_includes)
assert track.title == track_data['recording']['title']
assert track.mbid == track_data['recording']['id']
assert track.album == album
assert track.artist == album.artist
assert track.position == int(track_data['position'])
def test_import_job_is_bound_to_track_file(factories, mocker):
track = factories['music.Track']()
job = factories['music.ImportJob'](mbid=track.mbid)

Wyświetl plik

@ -15,16 +15,13 @@ DATA_DIR = os.path.join(
)
def test_can_create_track_from_file_metadata(db, mocker):
def test_can_create_track_from_file_metadata_no_mbid(db, mocker):
metadata = {
'artist': ['Test artist'],
'album': ['Test album'],
'title': ['Test track'],
'TRACKNUMBER': ['4'],
'date': ['2012-08-15'],
'musicbrainz_albumid': ['a766da8b-8336-47aa-a3ee-371cc41ccc75'],
'musicbrainz_trackid': ['bd21ac48-46d8-4e78-925f-d9cc2a294656'],
'musicbrainz_artistid': ['013c8e5b-d72a-4cd3-8dee-6c64d6125823'],
}
m1 = mocker.patch('mutagen.File', return_value=metadata)
m2 = mocker.patch(
@ -35,13 +32,64 @@ def test_can_create_track_from_file_metadata(db, mocker):
os.path.join(DATA_DIR, 'dummy_file.ogg'))
assert track.title == metadata['title'][0]
assert track.mbid == uuid.UUID(metadata['musicbrainz_trackid'][0])
assert track.mbid is None
assert track.position == 4
assert track.album.title == metadata['album'][0]
assert track.album.mbid == uuid.UUID(metadata['musicbrainz_albumid'][0])
assert track.album.mbid is None
assert track.album.release_date == datetime.date(2012, 8, 15)
assert track.artist.name == metadata['artist'][0]
assert track.artist.mbid == uuid.UUID(metadata['musicbrainz_artistid'][0])
assert track.artist.mbid is None
def test_can_create_track_from_file_metadata_mbid(factories, mocker):
album = factories['music.Album']()
mocker.patch(
'funkwhale_api.music.models.Album.get_or_create_from_api',
return_value=(album, True),
)
album_data = {
'release': {
'id': album.mbid,
'medium-list': [
{
'track-list': [
{
'id': '03baca8b-855a-3c05-8f3d-d3235287d84d',
'position': '4',
'number': '4',
'recording': {
'id': '2109e376-132b-40ad-b993-2bb6812e19d4',
'title': 'Teen Age Riot',
},
}
],
'track-count': 1
}
],
}
}
mocker.patch(
'funkwhale_api.musicbrainz.api.releases.get',
return_value=album_data)
track_data = album_data['release']['medium-list'][0]['track-list'][0]
metadata = {
'musicbrainz_albumid': [album.mbid],
'musicbrainz_trackid': [track_data['recording']['id']],
}
m1 = mocker.patch('mutagen.File', return_value=metadata)
m2 = mocker.patch(
'funkwhale_api.music.metadata.Metadata.get_file_type',
return_value='OggVorbis',
)
track = tasks.import_track_data_from_path(
os.path.join(DATA_DIR, 'dummy_file.ogg'))
assert track.title == track_data['recording']['title']
assert track.mbid == track_data['recording']['id']
assert track.position == 4
assert track.album == album
assert track.artist == album.artist
def test_management_command_requires_a_valid_username(factories, mocker):

Wyświetl plik

@ -0,0 +1 @@
Do not crash when importing track with an artist that do not match the release artist (#237)

Wyświetl plik

@ -0,0 +1 @@
Huge performance boost (~x5 to x7) during CLI import that queries MusicBrainz (#288)