microblog.pub/app/actor.py

456 wiersze
13 KiB
Python
Czysty Zwykły widok Historia

2022-07-06 19:13:55 +00:00
import hashlib
2022-06-22 18:11:22 +00:00
import typing
from dataclasses import dataclass
2022-10-07 06:55:05 +00:00
from datetime import timedelta
2022-07-31 08:03:45 +00:00
from functools import cached_property
2022-06-22 19:15:07 +00:00
from typing import Union
2022-06-22 18:11:22 +00:00
from urllib.parse import urlparse
import httpx
2022-09-07 19:29:09 +00:00
from loguru import logger
2022-06-29 06:56:39 +00:00
from sqlalchemy import select
2022-06-22 18:11:22 +00:00
from sqlalchemy.orm import joinedload
from app import activitypub as ap
2022-06-25 06:23:28 +00:00
from app import media
from app.config import BASE_URL
from app.config import USER_AGENT
from app.config import USERNAME
from app.config import WEBFINGER_DOMAIN
2022-06-29 18:43:17 +00:00
from app.database import AsyncSession
2022-10-07 06:55:05 +00:00
from app.utils.datetime import as_utc
from app.utils.datetime import now
2022-06-22 18:11:22 +00:00
if typing.TYPE_CHECKING:
from app.models import Actor as ActorModel
def _handle(raw_actor: ap.RawObject) -> str:
ap_id = ap.get_id(raw_actor["id"])
domain = urlparse(ap_id)
if not domain.hostname:
raise ValueError(f"Invalid actor ID {ap_id}")
handle = f'@{raw_actor["preferredUsername"]}@{domain.hostname}' # type: ignore
# TODO: cleanup this
# Next, check for custom webfinger domains
resp: httpx.Response | None = None
for url in {
f"https://{domain.hostname}/.well-known/webfinger",
f"https://{domain.hostname}/.well-known/webfinger",
}:
try:
logger.info(f"Webfinger {handle} at {url}")
resp = httpx.get(
url,
params={"resource": f"acct:{handle[1:]}"},
headers={
"User-Agent": USER_AGENT,
},
follow_redirects=True,
)
resp.raise_for_status()
break
except Exception:
logger.exception(f"Failed to webfinger {handle}")
if resp:
try:
json_resp = resp.json()
if json_resp.get("subject", "").startswith("acct:"):
return json_resp["subject"].removeprefix("acct:")
except Exception:
logger.exception(f"Failed to parse webfinger response for {handle}")
return handle
2022-06-22 18:11:22 +00:00
class Actor:
@property
def ap_actor(self) -> ap.RawObject:
raise NotImplementedError()
@property
def ap_id(self) -> str:
return ap.get_id(self.ap_actor["id"])
@property
def name(self) -> str | None:
return self.ap_actor.get("name")
@property
def summary(self) -> str | None:
return self.ap_actor.get("summary")
@property
def url(self) -> str | None:
return self.ap_actor.get("url") or self.ap_actor["id"]
@property
def preferred_username(self) -> str:
return self.ap_actor["preferredUsername"]
2022-06-25 08:20:07 +00:00
@property
def display_name(self) -> str:
2022-07-31 16:48:54 +00:00
if self.name:
return self.name
return self.preferred_username
2022-06-25 08:20:07 +00:00
@cached_property
2022-06-22 18:11:22 +00:00
def handle(self) -> str:
return _handle(self.ap_actor)
@property
def ap_type(self) -> str:
raise NotImplementedError()
@property
def inbox_url(self) -> str:
return self.ap_actor["inbox"]
@property
def outbox_url(self) -> str:
return self.ap_actor["outbox"]
2022-06-22 18:11:22 +00:00
@property
2022-07-31 08:03:45 +00:00
def shared_inbox_url(self) -> str:
return self.ap_actor.get("endpoints", {}).get("sharedInbox") or self.inbox_url
2022-06-22 18:11:22 +00:00
@property
def icon_url(self) -> str | None:
2022-11-12 08:26:28 +00:00
if icon := self.ap_actor.get("icon"):
return icon.get("url")
return None
2022-06-22 18:11:22 +00:00
@property
def icon_media_type(self) -> str | None:
2022-11-12 08:26:28 +00:00
if icon := self.ap_actor.get("icon"):
return icon.get("mediaType")
return None
2022-06-22 18:11:22 +00:00
@property
def image_url(self) -> str | None:
2022-11-12 08:26:28 +00:00
if image := self.ap_actor.get("image"):
return image.get("url")
return None
2022-06-22 18:11:22 +00:00
@property
def public_key_as_pem(self) -> str:
return self.ap_actor["publicKey"]["publicKeyPem"]
@property
def public_key_id(self) -> str:
return self.ap_actor["publicKey"]["id"]
2022-06-25 06:23:28 +00:00
@property
def proxied_icon_url(self) -> str:
if self.icon_url:
return media.proxied_media_url(self.icon_url)
else:
return BASE_URL + "/static/nopic.png"
2022-06-25 06:23:28 +00:00
@property
def resized_icon_url(self) -> str:
if self.icon_url:
return media.resized_media_url(self.icon_url, 50)
else:
return BASE_URL + "/static/nopic.png"
2022-06-25 06:23:28 +00:00
@property
def tags(self) -> list[ap.RawObject]:
2022-09-19 17:33:44 +00:00
return ap.as_list(self.ap_actor.get("tag", []))
@property
def followers_collection_id(self) -> str | None:
return self.ap_actor.get("followers")
2022-07-31 08:03:45 +00:00
@cached_property
def attachments(self) -> list[ap.RawObject]:
return ap.as_list(self.ap_actor.get("attachment", []))
2022-09-07 19:29:09 +00:00
@cached_property
def moved_to(self) -> str | None:
return self.ap_actor.get("movedTo")
2022-08-15 08:15:00 +00:00
@cached_property
def server(self) -> str:
2022-08-15 08:27:58 +00:00
return urlparse(self.ap_id).hostname # type: ignore
2022-08-15 08:15:00 +00:00
2022-06-22 18:11:22 +00:00
class RemoteActor(Actor):
def __init__(self, ap_actor: ap.RawObject, handle: str | None = None) -> None:
2022-06-22 18:11:22 +00:00
if (ap_type := ap_actor.get("type")) not in ap.ACTOR_TYPES:
raise ValueError(f"Unexpected actor type: {ap_type}")
self._ap_actor = ap_actor
self._ap_type = ap_type
if handle is None:
handle = _handle(ap_actor)
self._handle = handle
2022-06-22 18:11:22 +00:00
@property
def ap_actor(self) -> ap.RawObject:
return self._ap_actor
@property
def ap_type(self) -> str:
return self._ap_type
@property
def is_from_db(self) -> bool:
return False
@property
def handle(self) -> str:
return self._handle
2022-06-22 18:11:22 +00:00
LOCAL_ACTOR = RemoteActor(ap_actor=ap.ME, handle=f"@{USERNAME}@{WEBFINGER_DOMAIN}")
2022-06-22 18:11:22 +00:00
2022-06-29 18:43:17 +00:00
async def save_actor(db_session: AsyncSession, ap_actor: ap.RawObject) -> "ActorModel":
2022-06-22 18:11:22 +00:00
from app import models
if ap_type := ap_actor.get("type") not in ap.ACTOR_TYPES:
raise ValueError(f"Invalid type {ap_type} for actor {ap_actor}")
actor = models.Actor(
2022-08-27 07:10:14 +00:00
ap_id=ap.get_id(ap_actor["id"]),
2022-06-22 18:11:22 +00:00
ap_actor=ap_actor,
2022-08-27 07:10:14 +00:00
ap_type=ap.as_list(ap_actor["type"])[0],
2022-06-22 18:11:22 +00:00
handle=_handle(ap_actor),
)
2022-06-29 18:43:17 +00:00
db_session.add(actor)
2022-07-18 19:35:02 +00:00
await db_session.flush()
await db_session.refresh(actor)
2022-06-22 18:11:22 +00:00
return actor
async def fetch_actor(
db_session: AsyncSession,
actor_id: str,
save_if_not_found: bool = True,
2022-08-10 18:47:19 +00:00
) -> "ActorModel":
if actor_id == LOCAL_ACTOR.ap_id:
2022-08-10 18:47:19 +00:00
raise ValueError("local actor should not be fetched")
2022-06-22 18:11:22 +00:00
from app import models
2022-06-29 18:43:17 +00:00
existing_actor = (
await db_session.scalars(
select(models.Actor).where(
models.Actor.ap_id == actor_id,
)
2022-06-29 18:43:17 +00:00
)
).one_or_none()
2022-06-22 18:11:22 +00:00
if existing_actor:
2022-08-19 07:41:15 +00:00
if existing_actor.is_deleted:
raise ap.ObjectNotFoundError(f"{actor_id} was deleted")
2022-08-27 07:10:14 +00:00
2022-10-07 06:55:05 +00:00
if now() - as_utc(existing_actor.updated_at) > timedelta(hours=24):
logger.info(
f"Refreshing {actor_id=} last updated {existing_actor.updated_at}"
)
try:
ap_actor = await ap.fetch(actor_id)
2022-10-09 09:36:00 +00:00
await update_actor_if_needed(
db_session,
existing_actor,
RemoteActor(ap_actor),
)
2022-10-07 06:55:05 +00:00
return existing_actor
except Exception:
logger.exception(f"Failed to refresh {actor_id}")
# If we fail to refresh the actor, return the cached one
return existing_actor
else:
2022-10-07 06:55:05 +00:00
return existing_actor
if save_if_not_found:
ap_actor = await ap.fetch(actor_id)
# Some softwares uses URL when we expect ID or uses a different casing
# (like Birdsite LIVE) , which mean we may already have it in DB
existing_actor_by_url = (
await db_session.scalars(
select(models.Actor).where(
models.Actor.ap_id == ap.get_id(ap_actor),
2022-10-07 06:55:05 +00:00
)
)
).one_or_none()
if existing_actor_by_url:
# Update the actor as we had to fetch it anyway
await update_actor_if_needed(
db_session,
existing_actor_by_url,
RemoteActor(ap_actor),
)
return existing_actor_by_url
2022-10-07 06:55:05 +00:00
return await save_actor(db_session, ap_actor)
else:
raise ap.ObjectNotFoundError(actor_id)
2022-06-22 18:11:22 +00:00
2022-10-09 09:36:00 +00:00
async def update_actor_if_needed(
db_session: AsyncSession,
actor_in_db: "ActorModel",
ra: RemoteActor,
) -> None:
# Check if we actually need to udpte the actor in DB
if _actor_hash(ra) != _actor_hash(actor_in_db):
actor_in_db.ap_actor = ra.ap_actor
actor_in_db.handle = ra.handle
actor_in_db.ap_type = ra.ap_type
2022-10-11 18:49:06 +00:00
actor_in_db.updated_at = now()
await db_session.flush()
2022-10-09 09:36:00 +00:00
2022-06-22 18:11:22 +00:00
@dataclass
class ActorMetadata:
ap_actor_id: str
is_following: bool
is_follower: bool
is_follow_request_sent: bool
2022-10-18 19:39:09 +00:00
is_follow_request_rejected: bool
2022-06-22 18:11:22 +00:00
outbox_follow_ap_id: str | None
inbox_follow_ap_id: str | None
2022-09-07 19:29:09 +00:00
moved_to: typing.Optional["ActorModel"]
2022-10-18 19:39:09 +00:00
has_blocked_local_actor: bool
2022-06-22 18:11:22 +00:00
ActorsMetadata = dict[str, ActorMetadata]
2022-06-29 18:43:17 +00:00
async def get_actors_metadata(
db_session: AsyncSession,
2022-06-22 19:15:07 +00:00
actors: list[Union["ActorModel", "RemoteActor"]],
2022-06-22 18:11:22 +00:00
) -> ActorsMetadata:
from app import models
ap_actor_ids = [actor.ap_id for actor in actors]
followers = {
follower.ap_actor_id: follower.inbox_object.ap_id
2022-06-29 18:43:17 +00:00
for follower in (
await db_session.scalars(
select(models.Follower)
.where(models.Follower.ap_actor_id.in_(ap_actor_ids))
.options(joinedload(models.Follower.inbox_object))
)
2022-06-29 06:56:39 +00:00
)
.unique()
2022-06-22 18:11:22 +00:00
.all()
}
following = {
following.ap_actor_id
2022-06-29 18:43:17 +00:00
for following in await db_session.execute(
2022-06-29 06:56:39 +00:00
select(models.Following.ap_actor_id).where(
models.Following.ap_actor_id.in_(ap_actor_ids)
)
)
2022-06-22 18:11:22 +00:00
}
sent_follow_requests = {
follow_req.ap_object["object"]: follow_req.ap_id
2022-06-29 18:43:17 +00:00
for follow_req in await db_session.execute(
2022-06-29 06:56:39 +00:00
select(models.OutboxObject.ap_object, models.OutboxObject.ap_id).where(
models.OutboxObject.ap_type == "Follow",
models.OutboxObject.undone_by_outbox_object_id.is_(None),
models.OutboxObject.activity_object_ap_id.in_(ap_actor_ids),
2022-06-29 06:56:39 +00:00
)
2022-06-22 18:11:22 +00:00
)
}
2022-10-18 19:39:09 +00:00
rejected_follow_requests = {
reject.activity_object_ap_id
for reject in await db_session.execute(
select(models.InboxObject.activity_object_ap_id).where(
models.InboxObject.ap_type == "Reject",
models.InboxObject.ap_actor_id.in_(ap_actor_ids),
)
)
}
blocks = {
block.ap_actor_id
for block in await db_session.execute(
select(models.InboxObject.ap_actor_id).where(
models.InboxObject.ap_type == "Block",
models.InboxObject.undone_by_inbox_object_id.is_(None),
models.InboxObject.ap_actor_id.in_(ap_actor_ids),
)
)
}
2022-06-22 18:11:22 +00:00
idx: ActorsMetadata = {}
for actor in actors:
2022-06-22 19:15:07 +00:00
if not actor.ap_id:
raise ValueError("Should never happen")
2022-09-07 19:29:09 +00:00
moved_to = None
if actor.moved_to:
try:
moved_to = await fetch_actor(
db_session,
actor.moved_to,
save_if_not_found=False,
)
except ap.ObjectNotFoundError:
pass
except Exception:
logger.exception(f"Failed to fetch {actor.moved_to=}")
2022-06-22 18:11:22 +00:00
idx[actor.ap_id] = ActorMetadata(
ap_actor_id=actor.ap_id,
is_following=actor.ap_id in following,
is_follower=actor.ap_id in followers,
is_follow_request_sent=actor.ap_id in sent_follow_requests,
2022-10-18 19:39:09 +00:00
is_follow_request_rejected=bool(
sent_follow_requests[actor.ap_id] in rejected_follow_requests
)
if actor.ap_id in sent_follow_requests
else False,
2022-06-22 18:11:22 +00:00
outbox_follow_ap_id=sent_follow_requests.get(actor.ap_id),
inbox_follow_ap_id=followers.get(actor.ap_id),
2022-09-07 19:29:09 +00:00
moved_to=moved_to,
2022-10-18 19:39:09 +00:00
has_blocked_local_actor=actor.ap_id in blocks,
2022-06-22 18:11:22 +00:00
)
return idx
2022-07-06 19:13:55 +00:00
def _actor_hash(actor: Actor) -> bytes:
"""Used to detect when an actor is updated"""
h = hashlib.blake2b(digest_size=32)
h.update(actor.ap_id.encode())
h.update(actor.handle.encode())
if actor.name:
h.update(actor.name.encode())
if actor.summary:
h.update(actor.summary.encode())
if actor.url:
h.update(actor.url.encode())
h.update(actor.display_name.encode())
if actor.icon_url:
h.update(actor.icon_url.encode())
if actor.image_url:
h.update(actor.image_url.encode())
2022-08-10 06:58:18 +00:00
if actor.attachments:
for a in actor.attachments:
if a.get("type") != "PropertyValue":
continue
h.update(a["name"].encode())
h.update(a["value"].encode())
2022-07-06 19:13:55 +00:00
h.update(actor.public_key_id.encode())
h.update(actor.public_key_as_pem.encode())
2022-09-08 18:00:02 +00:00
if actor.moved_to:
h.update(actor.moved_to.encode())
2022-07-06 19:13:55 +00:00
return h.digest()