Merge branch '988-artist-album-tags' into 'develop'

Resolve "Tagging artists/albums genres when importing music files"

Closes #988

See merge request funkwhale/funkwhale!973
environments/review-front-serv-f1ybnc/deployments/3672
Eliot Berriot 2019-12-18 07:52:09 +01:00
commit c2cb510eb9
12 zmienionych plików z 333 dodań i 0 usunięć

Wyświetl plik

@ -599,6 +599,20 @@ CELERY_BEAT_SCHEDULE = {
},
}
if env.bool("ADD_ALBUM_TAGS_FROM_TRACKS", default=True):
CELERY_BEAT_SCHEDULE["music.albums_set_tags_from_tracks"] = {
"task": "music.albums_set_tags_from_tracks",
"schedule": crontab(minute="0", hour="4", day_of_week="4"),
"options": {"expires": 60 * 60 * 2},
}
if env.bool("ADD_ARTIST_TAGS_FROM_TRACKS", default=True):
CELERY_BEAT_SCHEDULE["music.artists_set_tags_from_tracks"] = {
"task": "music.artists_set_tags_from_tracks",
"schedule": crontab(minute="0", hour="4", day_of_week="4"),
"options": {"expires": 60 * 60 * 2},
}
NODEINFO_REFRESH_DELAY = env.int("NODEINFO_REFRESH_DELAY", default=3600 * 24)

Wyświetl plik

@ -0,0 +1,50 @@
import click
from funkwhale_api.music import tasks
from . import base
def handler_add_tags_from_tracks(
artists=False, albums=False,
):
result = None
if artists:
result = tasks.artists_set_tags_from_tracks()
elif albums:
result = tasks.albums_set_tags_from_tracks()
else:
raise click.BadOptionUsage("You must specify artists or albums")
if result is None:
click.echo(" No relevant tags found")
else:
click.echo(" Relevant tags added to {} objects".format(len(result)))
@base.cli.group()
def albums():
"""Manage albums"""
pass
@base.cli.group()
def artists():
"""Manage artists"""
pass
@albums.command(name="add-tags-from-tracks")
def albums_add_tags_from_tracks():
"""
Associate tags to album with no genre tags, assuming identical tags are found on the album tracks
"""
handler_add_tags_from_tracks(albums=True)
@artists.command(name="add-tags-from-tracks")
def artists_add_tags_from_tracks():
"""
Associate tags to artists with no genre tags, assuming identical tags are found on the artist tracks
"""
handler_add_tags_from_tracks(artists=True)

Wyświetl plik

@ -2,6 +2,7 @@ import click
import sys
from . import base
from . import library # noqa
from . import users # noqa
from rest_framework.exceptions import ValidationError

Wyświetl plik

@ -118,6 +118,15 @@ def get_domain_query_from_url(domain, url_field="fid"):
return query
def local_qs(queryset, url_field="fid", include=True):
query = get_domain_query_from_url(
domain=settings.FEDERATION_HOSTNAME, url_field=url_field
)
if not include:
query = ~query
return queryset.filter(query)
def is_local(url):
if not url:
return True

Wyświetl plik

@ -14,7 +14,9 @@ from requests.exceptions import RequestException
from funkwhale_api.common import channels, preferences
from funkwhale_api.federation import routes
from funkwhale_api.federation import library as lb
from funkwhale_api.federation import utils as federation_utils
from funkwhale_api.tags import models as tags_models
from funkwhale_api.tags import tasks as tags_tasks
from funkwhale_api.taskapp import celery
from . import licenses
@ -668,6 +670,50 @@ def clean_transcoding_cache():
return candidates.delete()
@celery.app.task(name="music.albums_set_tags_from_tracks")
@transaction.atomic
def albums_set_tags_from_tracks(ids=None, dry_run=False):
qs = models.Album.objects.filter(tagged_items__isnull=True).order_by("id")
qs = federation_utils.local_qs(qs)
qs = qs.values_list("id", flat=True)
if ids is not None:
qs = qs.filter(pk__in=ids)
data = tags_tasks.get_tags_from_foreign_key(
ids=qs, foreign_key_model=models.Track, foreign_key_attr="album",
)
logger.info("Found automatic tags for %s albums…", len(data))
if dry_run:
logger.info("Running in dry-run mode, not commiting")
return
tags_tasks.add_tags_batch(
data, model=models.Album,
)
return data
@celery.app.task(name="music.artists_set_tags_from_tracks")
@transaction.atomic
def artists_set_tags_from_tracks(ids=None, dry_run=False):
qs = models.Artist.objects.filter(tagged_items__isnull=True).order_by("id")
qs = federation_utils.local_qs(qs)
qs = qs.values_list("id", flat=True)
if ids is not None:
qs = qs.filter(pk__in=ids)
data = tags_tasks.get_tags_from_foreign_key(
ids=qs, foreign_key_model=models.Track, foreign_key_attr="artist",
)
logger.info("Found automatic tags for %s artists…", len(data))
if dry_run:
logger.info("Running in dry-run mode, not commiting")
return
tags_tasks.add_tags_batch(
data, model=models.Artist,
)
return data
def get_prunable_tracks(
exclude_favorites=True, exclude_playlists=True, exclude_listenings=True
):

Wyświetl plik

@ -0,0 +1,54 @@
import collections
from django.contrib.contenttypes.models import ContentType
from . import models
def get_tags_from_foreign_key(
ids, foreign_key_model, foreign_key_attr, tagged_items_attr="tagged_items"
):
"""
Cf #988, this is useful to tag an artist with #Rock if all its tracks are tagged with
#Rock, for instance.
"""
data = {}
objs = foreign_key_model.objects.filter(
**{"{}__pk__in".format(foreign_key_attr): ids}
).order_by("-id")
objs = objs.only("id", "{}_id".format(foreign_key_attr)).prefetch_related(
tagged_items_attr
)
for obj in objs.iterator():
# loop on all objects, store the objs tags + counter on the corresponding foreign key
row_data = data.setdefault(
getattr(obj, "{}_id".format(foreign_key_attr)),
{"total_objs": 0, "tags": []},
)
row_data["total_objs"] += 1
for ti in getattr(obj, tagged_items_attr).all():
row_data["tags"].append(ti.tag_id)
# now, keep only tags that are present on all objects, i.e tags where the count
# matches total_objs
final_data = {}
for key, row_data in data.items():
counter = collections.Counter(row_data["tags"])
tags_to_keep = sorted(
[t for t, c in counter.items() if c >= row_data["total_objs"]]
)
if tags_to_keep:
final_data[key] = tags_to_keep
return final_data
def add_tags_batch(data, model, tagged_items_attr="tagged_items"):
model_ct = ContentType.objects.get_for_model(model)
tagged_items = [
models.TaggedItem(tag_id=tag_id, content_type=model_ct, object_id=obj_id)
for obj_id, tag_ids in data.items()
for tag_id in tag_ids
]
return models.TaggedItem.objects.bulk_create(tagged_items, batch_size=2000)

Wyświetl plik

@ -3,6 +3,7 @@ import pytest
from click.testing import CliRunner
from funkwhale_api.cli import main
from funkwhale_api.cli import library
from funkwhale_api.cli import users
@ -102,6 +103,16 @@ from funkwhale_api.cli import users
)
],
),
(
("albums", "add-tags-from-tracks"),
tuple(),
[(library, "handler_add_tags_from_tracks", {"albums": True})],
),
(
("artists", "add-tags-from-tracks"),
tuple(),
[(library, "handler_add_tags_from_tracks", {"artists": True})],
),
],
)
def test_cli(cmd, args, handlers, mocker):

Wyświetl plik

@ -138,3 +138,37 @@ def test_retrieve_with_serializer(db, r_mock):
result = utils.retrieve_ap_object(fid, actor=None, serializer_class=S)
assert result == {"persisted": "object"}
@pytest.mark.parametrize(
"factory_name, fids, kwargs, expected_indexes",
[
(
"music.Artist",
["https://local.domain/test", "http://local.domain/"],
{},
[0, 1],
),
(
"music.Artist",
["https://local.domain/test", "http://notlocal.domain/"],
{},
[0],
),
(
"music.Artist",
["https://local.domain/test", "http://notlocal.domain/"],
{"include": False},
[1],
),
],
)
def test_local_qs(factory_name, fids, kwargs, expected_indexes, factories, settings):
settings.FEDERATION_HOSTNAME = "local.domain"
objs = [factories[factory_name](fid=fid) for fid in fids]
qs = objs[0].__class__.objects.all().order_by("id")
result = utils.local_qs(qs, **kwargs)
expected_objs = [obj for i, obj in enumerate(objs) if i in expected_indexes]
assert list(result) == expected_objs

Wyświetl plik

@ -9,6 +9,7 @@ from django.utils import timezone
from funkwhale_api.federation import serializers as federation_serializers
from funkwhale_api.federation import jsonld
from funkwhale_api.federation import utils as federation_utils
from funkwhale_api.music import licenses, metadata, models, signals, tasks
DATA_DIR = os.path.dirname(os.path.abspath(__file__))
@ -1049,3 +1050,53 @@ def test_process_upload_skips_import_metadata_if_invalid(factories, mocker):
get_track_from_import_metadata.assert_called_once_with(
expected_final_metadata, attributed_to=upload.library.actor
)
def test_tag_albums_from_tracks(queryset_equal_queries, factories, mocker):
get_tags_from_foreign_key = mocker.patch(
"funkwhale_api.tags.tasks.get_tags_from_foreign_key"
)
add_tags_batch = mocker.patch("funkwhale_api.tags.tasks.add_tags_batch")
expected_queryset = (
federation_utils.local_qs(
models.Album.objects.filter(tagged_items__isnull=True)
)
.values_list("id", flat=True)
.order_by("id")
)
tasks.albums_set_tags_from_tracks(ids=[1, 2])
get_tags_from_foreign_key.assert_called_once_with(
ids=expected_queryset.filter(pk__in=[1, 2]),
foreign_key_model=models.Track,
foreign_key_attr="album",
)
add_tags_batch.assert_called_once_with(
get_tags_from_foreign_key.return_value, model=models.Album,
)
def test_tag_artists_from_tracks(queryset_equal_queries, factories, mocker):
get_tags_from_foreign_key = mocker.patch(
"funkwhale_api.tags.tasks.get_tags_from_foreign_key"
)
add_tags_batch = mocker.patch("funkwhale_api.tags.tasks.add_tags_batch")
expected_queryset = (
federation_utils.local_qs(
models.Artist.objects.filter(tagged_items__isnull=True)
)
.values_list("id", flat=True)
.order_by("id")
)
tasks.artists_set_tags_from_tracks(ids=[1, 2])
get_tags_from_foreign_key.assert_called_once_with(
ids=expected_queryset.filter(pk__in=[1, 2]),
foreign_key_model=models.Track,
foreign_key_attr="artist",
)
add_tags_batch.assert_called_once_with(
get_tags_from_foreign_key.return_value, model=models.Artist,
)

Wyświetl plik

@ -0,0 +1,35 @@
from funkwhale_api.music import models as music_models
from funkwhale_api.tags import tasks
def test_get_tags_from_foreign_key(factories):
rock_tag = factories["tags.Tag"](name="Rock")
rap_tag = factories["tags.Tag"](name="Rap")
artist = factories["music.Artist"]()
factories["music.Track"].create_batch(3, artist=artist, set_tags=["rock", "rap"])
factories["music.Track"].create_batch(
3, artist=artist, set_tags=["rock", "rap", "techno"]
)
result = tasks.get_tags_from_foreign_key(
ids=[artist.pk],
foreign_key_model=music_models.Track,
foreign_key_attr="artist",
)
assert result == {artist.pk: [rock_tag.pk, rap_tag.pk]}
def test_add_tags_batch(factories):
rock_tag = factories["tags.Tag"](name="Rock")
rap_tag = factories["tags.Tag"](name="Rap")
factories["tags.Tag"]()
artist = factories["music.Artist"]()
data = {artist.pk: [rock_tag.pk, rap_tag.pk]}
tasks.add_tags_batch(
data, model=artist.__class__,
)
assert artist.get_tags() == ["Rap", "Rock"]

Wyświetl plik

@ -0,0 +1 @@
Added periodic background task and CLI command to associate genre tags to artists and albums based on identical tags found on corresponding tracks (#988)

Wyświetl plik

@ -168,3 +168,30 @@ database objects.
Running this command with ``--no-dry-run`` is irreversible. Unless you have a backup,
there will be no way to retrieve the deleted data.
Adding tags from tracks
-----------------------
By default, genre tags found imported files are associated with the corresponding track.
While you can always associate genre information with an artist or album through the web UI,
it may be tedious to do so by hand for a large number of objects.
We offer a command you can run after an import to do this for you. It will:
1. Find all local artists or albums with no tags
2. Get all the tags associated with the corresponding tracks
3. Associate tags that are found on all tracks to the corresponding artist or album
..note::
A periodic task also runs in the background every few days to perform the same process.
Usage:
.. code-block:: sh
# For albums
python manage.py fw albums add-tags-from-tracks --help
# For artists
python manage.py fw artists add-tags-from-tracks --help