kopia lustrzana https://gitlab.com/marnanel/chapeau
340 wiersze
9.0 KiB
Python
340 wiersze
9.0 KiB
Python
# fetch.py
|
|
#
|
|
# Part of kepi.
|
|
# Copyright (c) 2018-2020 Marnanel Thurman.
|
|
# Licensed under the GNU Public License v2.
|
|
|
|
import logging
|
|
logger = logging.getLogger(name="kepi")
|
|
|
|
import requests
|
|
import django.db.utils
|
|
from urllib.parse import urlparse
|
|
from django.http.request import HttpRequest
|
|
from django.conf import settings
|
|
from kepi.trilby_api.models import *
|
|
from kepi.bowler_pub.utils import log_one_message
|
|
from kepi.bowler_pub.activityresponse import ActivityResponse
|
|
from kepi.sombrero_sendpub.webfinger import get_webfinger
|
|
import kepi.sombrero_sendpub.models as sombrero_models
|
|
import kepi.bowler_pub.create as bowler_create
|
|
from django.http import HttpResponse, JsonResponse, Http404
|
|
|
|
def fetch(address,
|
|
expected_type,
|
|
):
|
|
|
|
"""
|
|
Find remote or local objects.
|
|
|
|
For remote objects, if we already know about them,
|
|
return the existing object. If we don't, fetch it
|
|
over the network and store it, then return the value.
|
|
|
|
For local objects-- that is, ones where the hostname
|
|
is listed in this project's ALLOWED_HOSTS setting--
|
|
look it up locally and return it. If the address is
|
|
a URL, this will pass through the dispatcher.
|
|
|
|
"address" is the address of the thing we're looking for.
|
|
It's usually a URL. For Persons it can also be atstyle
|
|
("username@hostname"), which will result in a webfinger
|
|
lookup to find the actual URL. URLs should not contain
|
|
an "@" sign.
|
|
|
|
"expected_type" is the type we're looking for.
|
|
|
|
"expected_type" should contain at least the fields
|
|
- remote_url (read/write)
|
|
- local_form and remote_form, which may be identity functions
|
|
|
|
Particular types of object may define handlers which
|
|
can initialise other fields. The handler is looked up
|
|
using the "type" field in the retrieved object,
|
|
rather than the "expected_type" passed to this function.
|
|
|
|
This function returns the requested object if it can.
|
|
If you didn't specify a type, raises ValueError.
|
|
On all other errors, which are logged, returns None.
|
|
"""
|
|
|
|
if address is None:
|
|
return None
|
|
|
|
wanted = _parse_address(address)
|
|
|
|
wanted['type'] = expected_type
|
|
|
|
if wanted['is_local']:
|
|
handler = _fetch_local
|
|
else:
|
|
handler = _fetch_remote
|
|
|
|
if wanted['type'] is None:
|
|
raise ValueError(
|
|
"fetch() requires some sort of type to be specified")
|
|
|
|
return handler(address, wanted)
|
|
|
|
def _parse_address(address):
|
|
|
|
result = {
|
|
'is_atstyle': '@' in address,
|
|
}
|
|
|
|
if result['is_atstyle']:
|
|
fields = address.split('@')
|
|
result['username'] = fields[-2]
|
|
result['hostname'] = fields[-1]
|
|
else:
|
|
parsed = urlparse(address)
|
|
result['hostname'] = parsed.netloc
|
|
result['path'] = parsed.path
|
|
|
|
result['is_local'] = result['hostname'] in settings.ALLOWED_HOSTS
|
|
|
|
logger.debug("%s: wanted: %s", address, result)
|
|
|
|
return result
|
|
|
|
def _fetch_local_by_atstyle(address, wanted):
|
|
|
|
# atstyle only makes sense for Person
|
|
if not issubclass(wanted['type'], Person):
|
|
logger.warning("%s: atstyle request made for %s, not Person",
|
|
address, wanted['type'])
|
|
return None
|
|
|
|
try:
|
|
result = LocalPerson.objects.get(
|
|
local_user__username = wanted['username'],
|
|
)
|
|
logger.info("%s: found local user: %s",
|
|
address, result)
|
|
except LocalPerson.DoesNotExist:
|
|
logger.info("%s: no such user: %s",
|
|
address, wanted['username'])
|
|
result = None
|
|
|
|
return result
|
|
|
|
def _fetch_local_by_url(address, wanted):
|
|
from django.urls import resolve
|
|
|
|
class ActivityRequest(HttpRequest):
|
|
"""
|
|
These are fake HttpRequests which we send to the views
|
|
as an ACTIVITY_GET method.
|
|
"""
|
|
|
|
headers = {
|
|
'Accept': 'application/activity+json',
|
|
}
|
|
|
|
def __init__(self, path):
|
|
super().__init__()
|
|
|
|
self.path = path
|
|
self.method = 'ACTIVITY_GET'
|
|
|
|
try:
|
|
resolved = resolve(wanted['path'])
|
|
except django.urls.Resolver404:
|
|
logger.info('%s: not found', address)
|
|
return None
|
|
|
|
logger.debug('%s: handled by %s, %s, %s',
|
|
address,
|
|
str(resolved.func),
|
|
str(resolved.args),
|
|
str(resolved.kwargs),
|
|
)
|
|
|
|
request = ActivityRequest(
|
|
path=wanted['path'],
|
|
)
|
|
result = resolved.func(request,
|
|
*resolved.args,
|
|
**resolved.kwargs)
|
|
|
|
logger.info("%s: result from handler was %s",
|
|
address, result)
|
|
|
|
if isinstance(result, ActivityResponse):
|
|
result = result.activity_value
|
|
|
|
if result is not None and not isinstance(result, wanted['type']):
|
|
logger.info("%s: type mismatch (%s vs %s); discarding",
|
|
address, type(result), wanted['type'],
|
|
)
|
|
return None
|
|
|
|
return result
|
|
|
|
def _fetch_local(address, wanted):
|
|
if wanted['is_atstyle']:
|
|
return _fetch_local_by_atstyle(address, wanted)
|
|
else:
|
|
return _fetch_local_by_url(address, wanted)
|
|
|
|
def _fetch_remote(address, wanted):
|
|
|
|
# Do we already know about them?
|
|
|
|
if wanted['is_atstyle']:
|
|
# XXX Not certain about this (or indeed the benefit
|
|
# of storing "acct" in the Person object). Shouldn't we ask
|
|
# the webfinger module whether it knows them?
|
|
kwargs = {"acct": address}
|
|
else:
|
|
kwargs = {"remote_url": address}
|
|
|
|
try:
|
|
failure = sombrero_models.Failure.objects.get(
|
|
url = address,
|
|
)
|
|
logger.debug("%s: %s",
|
|
address, failure)
|
|
|
|
return None
|
|
except sombrero_models.Failure.DoesNotExist:
|
|
# all good then
|
|
pass
|
|
|
|
try:
|
|
result = wanted['type'].remote_form().objects.get(
|
|
**kwargs,
|
|
)
|
|
|
|
logger.debug("%s: already known: %s",
|
|
address, result)
|
|
|
|
return result
|
|
|
|
except AttributeError:
|
|
# Types don't have to support object lookup
|
|
pass
|
|
|
|
except wanted['type'].DoesNotExist:
|
|
pass
|
|
|
|
# No, so create them (but don't save yet).
|
|
|
|
logger.debug("%s: wanted %s; kwargs=%s",
|
|
address, wanted, kwargs)
|
|
|
|
if wanted['is_atstyle']:
|
|
|
|
webfinger = get_webfinger(
|
|
username = wanted['username'],
|
|
hostname = wanted['hostname'],
|
|
)
|
|
|
|
if webfinger.url is None:
|
|
logger.info("%s: webfinger lookup failed; bailing",
|
|
address)
|
|
return None
|
|
|
|
logger.info("%s: webfinger gave us %s",
|
|
address, webfinger.url)
|
|
address = webfinger.url
|
|
|
|
# okay, time to go looking online
|
|
|
|
try:
|
|
response = requests.get(
|
|
address,
|
|
headers = {
|
|
'Accept': 'application/activity+json',
|
|
},
|
|
)
|
|
except requests.ConnectionError:
|
|
|
|
logger.info("%s: can't reach host",
|
|
address)
|
|
|
|
sombrero_models.Failure(
|
|
url = address,
|
|
status = 0,
|
|
).save()
|
|
|
|
return None
|
|
|
|
except requests.exceptions.Timeout:
|
|
|
|
logger.info("%s: timeout reaching host",
|
|
address)
|
|
|
|
sombrero_models.Failure(
|
|
url = address,
|
|
status = 0,
|
|
).save()
|
|
|
|
return None
|
|
|
|
# so, we have *something*...
|
|
|
|
if response.status_code!=200:
|
|
# HTTP error; bail immediately
|
|
|
|
logger.info("%s: unexpected status code from status lookup: %d",
|
|
address, response.status_code,
|
|
)
|
|
|
|
sombrero_models.Failure(
|
|
url = address,
|
|
status = response.status_code,
|
|
).save()
|
|
|
|
return None
|
|
|
|
try:
|
|
found = response.json()
|
|
except ValueError as ve:
|
|
logger.info("%s: response was not JSON (%s); dropping",
|
|
address, ve)
|
|
|
|
# Not actually an HTTP failure, so don't create a Failure here
|
|
|
|
return None
|
|
|
|
log_one_message(
|
|
direction = "retrieved",
|
|
body = found,
|
|
)
|
|
|
|
if 'type' not in found:
|
|
logger.info("%s: retrieved JSON did not include a type; dropping",
|
|
address)
|
|
|
|
return None
|
|
|
|
if 'id' in found:
|
|
if found['id'] != address:
|
|
logger.info(
|
|
"%s: user's id was not the source url: got %s; dropping",
|
|
address, found['id'],
|
|
)
|
|
return None
|
|
|
|
result = bowler_create.deserialise(
|
|
found,
|
|
address,
|
|
)
|
|
|
|
if result is None:
|
|
logger.info("%s: -- can't deserialise; returning None",
|
|
address)
|
|
return None
|
|
|
|
logger.info("%s: -- deserialised as %s",
|
|
address, result)
|
|
|
|
if not isinstance(result, wanted['type']):
|
|
logger.info("%s: -- which wasn't %s; returning None",
|
|
address, wanted['type'])
|
|
|
|
return None
|
|
|
|
return result
|