bridgy-fed/web.py

554 wiersze
20 KiB
Python
Czysty Zwykły widok Historia

"""Handles inbound webmentions."""
import datetime
import difflib
import logging
import re
import urllib.parse
from urllib.parse import urlencode, urljoin, urlparse
from flask import g, redirect, render_template, request
from google.cloud import ndb
noop, lint fixes from flake8 remaining: $ flake8 --extend-ignore=E501 *.py tests/*.py "pyflakes" failed during execution due to "'FlakesChecker' object has no attribute 'NAMEDEXPR'" Run flake8 with greater verbosity to see more details activitypub.py:15:1: F401 'oauth_dropins.webutil.util.json_loads' imported but unused activitypub.py:36:1: F401 'web' imported but unused activitypub.py:48:1: E302 expected 2 blank lines, found 1 activitypub.py:51:9: F811 redefinition of unused 'web' from line 36 app.py:6:1: F401 'flask_app.app' imported but unused app.py:9:1: F401 'activitypub' imported but unused app.py:9:1: F401 'convert' imported but unused app.py:9:1: F401 'follow' imported but unused app.py:9:1: F401 'pages' imported but unused app.py:9:1: F401 'redirect' imported but unused app.py:9:1: F401 'superfeedr' imported but unused app.py:9:1: F401 'ui' imported but unused app.py:9:1: F401 'webfinger' imported but unused app.py:9:1: F401 'web' imported but unused app.py:9:1: F401 'xrpc_actor' imported but unused app.py:9:1: F401 'xrpc_feed' imported but unused app.py:9:1: F401 'xrpc_graph' imported but unused app.py:9:19: E401 multiple imports on one line models.py:19:1: F401 'oauth_dropins.webutil.util.json_loads' imported but unused models.py:364:31: E114 indentation is not a multiple of four (comment) models.py:364:31: E116 unexpected indentation (comment) protocol.py:17:1: F401 'oauth_dropins.webutil.util.json_loads' imported but unused redirect.py:26:1: F401 'oauth_dropins.webutil.util.json_loads' imported but unused web.py:18:1: F401 'oauth_dropins.webutil.util.json_loads' imported but unused webfinger.py:13:1: F401 'oauth_dropins.webutil.util.json_loads' imported but unused webfinger.py:110:13: E122 continuation line missing indentation or outdented webfinger.py:111:13: E122 continuation line missing indentation or outdented webfinger.py:131:13: E122 continuation line missing indentation or outdented webfinger.py:132:13: E122 continuation line missing indentation or outdented webfinger.py:133:13: E122 continuation line missing indentation or outdented webfinger.py:134:13: E122 continuation line missing indentation or outdented tests/__init__.py:2:1: F401 'oauth_dropins.webutil.tests' imported but unused tests/test_follow.py:11:1: F401 'oauth_dropins.webutil.util.json_dumps' imported but unused tests/test_follow.py:14:1: F401 '.testutil.Fake' imported but unused tests/test_models.py:156:15: E122 continuation line missing indentation or outdented tests/test_models.py:157:15: E122 continuation line missing indentation or outdented tests/test_models.py:158:11: E122 continuation line missing indentation or outdented tests/test_web.py:12:1: F401 'oauth_dropins.webutil.util.json_dumps' imported but unused tests/test_web.py:17:1: F401 '.testutil' imported but unused tests/test_web.py:1513:13: E128 continuation line under-indented for visual indent tests/test_web.py:1514:9: E124 closing bracket does not match visual indentation tests/testutil.py:106:1: E402 module level import not at top of file tests/testutil.py:107:1: E402 module level import not at top of file tests/testutil.py:108:1: E402 module level import not at top of file tests/testutil.py:109:1: E402 module level import not at top of file tests/testutil.py:110:1: E402 module level import not at top of file tests/testutil.py:301:24: E203 whitespace before ':' tests/testutil.py:301:25: E701 multiple statements on one line (colon) tests/testutil.py:301:25: E231 missing whitespace after ':'
2023-06-20 18:22:54 +00:00
from google.cloud.ndb import ComputedProperty
from granary import as1, as2, microformats2
import mf2util
from oauth_dropins.webutil import flask_util, util
from oauth_dropins.webutil.appengine_config import tasks_client
from oauth_dropins.webutil.appengine_info import APP_ID
from oauth_dropins.webutil.flask_util import error, flash
from oauth_dropins.webutil.util import json_dumps, json_loads
from oauth_dropins.webutil import webmention
2023-04-17 22:36:29 +00:00
from requests import HTTPError, RequestException, URLRequired
from werkzeug.exceptions import BadGateway, BadRequest, HTTPException, NotFound
import common
from common import add
from flask_app import app, cache
from models import Follower, Object, PROTOCOLS, Target, User
from protocol import Protocol
logger = logging.getLogger(__name__)
# https://cloud.google.com/appengine/docs/locations
TASKS_LOCATION = 'us-central1'
2023-04-17 22:36:29 +00:00
CHAR_AFTER_SPACE = chr(ord(' ') + 1)
# https://github.com/snarfed/bridgy-fed/issues/314
WWW_DOMAINS = frozenset((
'www.jvt.me',
))
NON_TLDS = frozenset(('html', 'json', 'php', 'xml'))
class Web(User, Protocol):
"""Web user and webmention protocol implementation.
The key name is the domain.
"""
ABBREV = 'web'
OTHER_LABELS = ('webmention',)
has_redirects = ndb.BooleanProperty()
redirects_error = ndb.TextProperty()
has_hcard = ndb.BooleanProperty()
@classmethod
def _get_kind(cls):
return 'MagicKey'
@ComputedProperty
def readable_id(self):
# prettify if domain, noop if username
username = self.username()
if username != self.key.id():
return util.domain_from_link(username, minimize=False)
def _pre_put_hook(self):
"""Validate domain id, don't allow upper case or invalid characters."""
super()._pre_put_hook()
id = self.key.id()
assert re.match(common.DOMAIN_RE, id)
assert id.lower() == id, f'upper case is not allowed in Web key id: {id}'
assert id not in common.DOMAINS, f'{id} is a Bridgy Fed domain'
@classmethod
def get_or_create(cls, id, **kwargs):
"""Lower cases id (domain), then passes through to :meth:`User.get_or_create`."""
return super().get_or_create(id.lower(), **kwargs)
def web_url(self):
"""Returns this user's web URL aka web_url, eg 'https://foo.com/'."""
return f'https://{self.key.id()}/'
def ap_address(self):
"""Returns this user's ActivityPub address, eg '@foo.com@foo.com'.
Uses the user's domain if they're direct, fed.brid.gy if they're not.
"""
if self.direct:
return f'@{self.username()}@{self.key.id()}'
else:
return f'@{self.key.id()}@{request.host}'
def ap_actor(self, rest=None):
"""Returns this user's ActivityPub/AS2 actor id.
Eg 'https://fed.brid.gy/foo.com'
Web users are special cased to not have an /ap/web/ prefix, for backward
compatibility.
"""
url = common.host_url(self.key.id())
if rest:
url += f'/{rest}'
return url
def user_page_path(self, rest=None):
"""Always use domain."""
path = f'/{self.ABBREV}/{self.key.id()}'
if rest:
if not rest.startswith('?'):
path += '/'
path += rest
return path
2023-06-04 23:10:37 +00:00
def username(self):
"""Returns the user's preferred username.
Uses stored representative h-card if available, falls back to id.
Returns: str
"""
id = self.key.id()
if self.obj and self.obj.as1 and self.direct:
for url in (util.get_list(self.obj.as1, 'url') +
util.get_list(self.obj.as1, 'urls')):
url = url.get('value') if isinstance(url, dict) else url
2023-06-04 23:10:37 +00:00
if url and url.startswith('acct:'):
try:
urluser, urldomain = util.parse_acct_uri(url)
except ValueError as e:
continue
2023-06-04 23:10:37 +00:00
if urldomain == id:
logger.info(f'Found custom username: {urluser}')
return urluser
logger.info(f'Defaulting username to key id {id}')
return id
def verify(self):
"""Fetches site a couple ways to check for redirects and h-card.
Returns: :class:`Web` that was verified. May be different than
self! eg if self's domain started with www and we switch to the root
domain.
"""
domain = self.key.id()
logger.info(f'Verifying {domain}')
if domain.startswith('www.') and domain not in WWW_DOMAINS:
# if root domain redirects to www, use root domain instead
# https://github.com/snarfed/bridgy-fed/issues/314
root = domain.removeprefix("www.")
root_site = f'https://{root}/'
try:
resp = util.requests_get(root_site, gateway=False)
if resp.ok and self.is_web_url(resp.url):
logger.info(f'{root_site} redirects to {resp.url} ; using {root} instead')
root_user = Web.get_or_create(root)
self.use_instead = root_user.key
self.put()
return root_user.verify()
except RequestException:
pass
# check webfinger redirect
path = f'/.well-known/webfinger?resource=acct:{domain}@{domain}'
self.has_redirects = False
self.redirects_error = None
try:
url = urljoin(self.web_url(), path)
resp = util.requests_get(url, gateway=False)
domain_urls = ([f'https://{domain}/' for domain in common.DOMAINS] +
[common.host_url()])
expected = [urljoin(url, path) for url in domain_urls]
if resp.ok and resp.url:
got = urllib.parse.unquote(resp.url)
if got in expected:
self.has_redirects = True
elif got:
diff = '\n'.join(difflib.Differ().compare([got], [expected[0]]))
self.redirects_error = f'Current vs expected:<pre>{diff}</pre>'
else:
lines = [url, f' returned HTTP {resp.status_code}']
if resp.url and resp.url != url:
lines[1:1] = [' redirected to:', resp.url]
self.redirects_error = '<pre>' + '\n'.join(lines) + '</pre>'
except RequestException:
pass
# check home page
try:
self.obj = Web.load(self.web_url(), remote=True, gateway=True)
self.has_hcard = True
except (BadRequest, NotFound, common.NoMicroformats):
self.obj = None
self.has_hcard = False
self.put()
return self
@classmethod
def key_for(cls, id):
"""Returns the :class:`ndb.Key` for a given id.
If id is a domain, uses it as is. If it's a home page URL or fed.brid.gy
or web.brid.gy AP actor URL, extracts the domain and uses that.
Otherwise, raises AssertionError.
Args:
id: str
Raises:
ValueError
"""
if not id:
raise ValueError()
if util.is_web(id):
parsed = urlparse(id)
if parsed.path in ('', '/'):
id = parsed.netloc
if re.match(common.DOMAIN_RE, id):
tld = id.split('.')[-1]
if tld in NON_TLDS:
raise ValueError(f"{id} looks like a domain but {tld} isn't a TLD")
return cls(id=id).key
raise ValueError(f'{id} is not a domain or usable home page URL')
@classmethod
def owns_id(cls, id):
"""Returns None if id is a domain or http(s) URL, False otherwise.
All web pages are http(s) URLs, but not all http(s) URLs are web pages.
"""
if not id:
return False
try:
key = cls.key_for(id)
if key:
user = key.get()
return True if user and user.has_redirects else None
except ValueError as e:
logger.info(e)
return None if util.is_web(id) else False
2023-06-16 20:16:17 +00:00
@classmethod
def target_for(cls, obj, shared=False):
"""Returns `obj`'s id, as a URL webmention target."""
# TODO: we have entities in prod that fail this, eg
# https://indieweb.social/users/bismark has source_protocol webmention
# assert obj.source_protocol in (cls.LABEL, cls.ABBREV, 'ui', None), str(obj)
2023-06-16 20:16:17 +00:00
if not util.is_web(obj.key.id()):
logger.warning(f"{obj.key} is source_protocol web but id isn't a URL!")
return None
return obj.key.id()
@classmethod
def send(cls, obj, url, **kwargs):
"""Sends a webmention to a given target URL.
See :meth:`Protocol.send` for details.
Returns true if the target URL doesn't advertise a webmention endpoint,
since webmention support itself is optional for web recipients.
https://fed.brid.gy/docs#error-handling
"""
source_url = obj.proxy_url()
logger.info(f'Sending webmention from {source_url} to {url}')
endpoint = common.webmention_discover(url).endpoint
if not endpoint:
return False
webmention.send(endpoint, source_url, url)
return True
@classmethod
def fetch(cls, obj, gateway=False, check_backlink=False, **kwargs):
"""Fetches a URL over HTTP and extracts its microformats2.
Follows redirects, but doesn't change the original URL in obj's id! The
:class:`Model` class doesn't allow that anyway, but more importantly, we
want to preserve that original URL becase other objects may refer to it
instead of the final redirect destination URL.
See :meth:`Protocol.fetch` for other background.
2023-04-17 22:36:29 +00:00
Args:
gateway: passed through to :func:`webutil.util.fetch_mf2`
check_backlink: bool, optional, whether to require a link to Bridgy
Fed. Ignored if the URL is a homepage, ie has no path.
kwargs: ignored
"""
url = obj.key.id()
is_homepage = urlparse(url).path.strip('/') == ''
require_backlink = (common.host_url().rstrip('/')
if check_backlink and not is_homepage
else None)
try:
2023-04-17 22:36:29 +00:00
parsed = util.fetch_mf2(url, gateway=gateway,
require_backlink=require_backlink)
except (ValueError, URLRequired) as e:
error(str(e))
if parsed is None:
error(f'id {urlparse(url).fragment} not found in {url}')
# find mf2 item
if is_homepage:
logger.info(f"{url} is user's web url")
entry = mf2util.representative_hcard(parsed, parsed['url'])
logger.info(f'Representative h-card: {json_dumps(entry, indent=2)}')
if not entry:
msg = f"Couldn't find a representative h-card (http://microformats.org/wiki/representative-hcard-parsing) on {parsed['url']}"
logging.info(msg)
raise common.NoMicroformats(msg)
else:
entry = mf2util.find_first_entry(parsed, ['h-entry'])
if not entry:
error(f'No microformats2 found in {url}')
# store final URL in mf2 object, and also default url property to it,
# since that's the fallback for AS1/AS2 id
if is_homepage:
entry.setdefault('rel-urls', {}).update(parsed.get('rel-urls', {}))
entry.setdefault('type', ['h-card'])
props = entry.setdefault('properties', {})
if parsed['url']:
entry['url'] = parsed['url']
props.setdefault('url', [parsed['url']])
logger.info(f'Extracted microformats2 entry: {json_dumps(entry, indent=2)}')
# run full authorship algorithm if necessary: https://indieweb.org/authorship
# duplicated in microformats2.json_to_object
author = util.get_first(props, 'author')
if not isinstance(author, dict) and not is_homepage:
logger.info(f'Fetching full authorship for author {author}')
author = mf2util.find_author({'items': [entry]}, hentry=entry,
fetch_mf2_func=util.fetch_mf2)
logger.info(f'Got: {author}')
if author:
props['author'] = util.trim_nulls([{
"type": ["h-card"],
'properties': {
field: [author[field]] if author.get(field) else []
for field in ('name', 'photo', 'url')
},
}])
obj.mf2 = entry
return obj
@classmethod
def serve(cls, obj):
"""Serves an :class:`Object` as HTML."""
obj_as1 = obj.as1
from_proto = PROTOCOLS.get(obj.source_protocol)
if from_proto:
# fill in author/actor if available
for field in 'author', 'actor':
val = as1.get_object(obj.as1, field)
if val.keys() == set(['id']) and val['id']:
loaded = from_proto.load(val['id'])
if loaded and loaded.as1:
obj_as1 = {**obj_as1, field: loaded.as1}
else:
logger.debug(f'Not hydrating actor or author due to source_protocol {obj.source_protocol}')
html = microformats2.activities_to_html([obj_as1])
# add HTML meta redirect to source page. should trigger for end users in
# browsers but not for webmention receivers (hopefully).
url = util.get_url(obj_as1)
if url:
utf8 = '<meta charset="utf-8">'
refresh = f'<meta http-equiv="refresh" content="0;url={url}">'
html = html.replace(utf8, utf8 + '\n' + refresh)
return html, {'Content-Type': common.CONTENT_TYPE_HTML}
@app.get('/web-site')
@flask_util.cached(cache, datetime.timedelta(days=1))
def enter_web_site():
return render_template('enter_web_site.html')
@app.post('/web-site')
def check_web_site():
url = request.values['url']
# this normalizes and lower cases domain
domain = util.domain_from_link(url, minimize=False)
if not domain:
flash(f'No domain found in {url}')
return render_template('enter_web_site.html')
elif domain in common.DOMAINS:
flash(f'{domain} is a Bridgy Fed domain')
return render_template('enter_web_site.html')
g.user = Web.get_or_create(domain, direct=True)
try:
g.user = g.user.verify()
except BaseException as e:
code, body = util.interpret_http_exception(e)
if code:
flash(f"Couldn't connect to {url}: {e}")
return render_template('enter_web_site.html')
raise
g.user.put()
return redirect(g.user.user_page_path())
@app.post('/webmention')
def webmention_external():
"""Handles inbound webmention, enqueue task to process.
Use a task queue to deliver to followers because we send to each inbox in
serial, which can take a long time with many followers/instances.
"""
logger.info(f'Params: {list(request.form.items())}')
source = flask_util.get_required_param('source').strip()
if not util.is_web(source):
error(f'Bad URL {source}')
domain = util.domain_from_link(source, minimize=False)
if not domain:
error(f'Bad source URL {source}')
g.user = Web.get_by_id(domain)
if not g.user:
error(f'No user found for domain {domain}')
queue_path = tasks_client.queue_path(APP_ID, TASKS_LOCATION, 'webmention')
task = tasks_client.create_task(
parent=queue_path,
task={
'app_engine_http_request': {
'http_method': 'POST',
'relative_uri': '/_ah/queue/webmention',
'body': urlencode(request.form).encode(),
# https://googleapis.dev/python/cloudtasks/latest/gapic/v2/types.html#google.cloud.tasks_v2.types.AppEngineHttpRequest.headers
'headers': {'Content-Type': 'application/x-www-form-urlencoded'},
},
},
)
msg = f'Enqueued task {task.name}.'
logger.info(msg)
return msg, 202
@app.post('/webmention-interactive')
def webmention_interactive():
"""Handler that runs interactive webmention-based requests from the web UI.
...eg the update profile button on user pages.
"""
try:
webmention_external()
flash(f'Updating fediverse profile from <a href="{g.user.web_url()}">{g.user.key.id()}</a>...')
except HTTPException as e:
flash(util.linkify(str(e.description), pretty=True))
path = g.user.user_page_path() if g.user else '/'
return redirect(path, code=302)
@app.post('/_ah/queue/webmention')
def webmention_task():
"""Handles inbound webmention task."""
logger.info(f'Params: {list(request.form.items())}')
# load user
source = flask_util.get_required_param('source').strip()
domain = util.domain_from_link(source, minimize=False)
logger.info(f'webmention from {domain}')
g.user = Web.get_by_id(domain)
if not g.user:
error(f'No user found for domain {domain}', status=304)
# fetch source page
try:
obj = Web.load(source, local=False, remote=True, check_backlink=True)
except BadRequest as e:
error(str(e.description), status=304)
2023-04-17 22:36:29 +00:00
except HTTPError as e:
if e.response.status_code not in (410, 404):
error(f'{e} ; {e.response.text if e.response else ""}', status=502)
create_id = f'{source}#bridgy-fed-create'
logger.info(f'Interpreting as Delete. Looking for {create_id}')
create = Object.get_by_id(create_id)
2023-04-17 22:36:29 +00:00
if not create or create.status != 'complete':
error(f"Bridgy Fed hasn't successfully published {source}", status=304)
id = f'{source}#bridgy-fed-delete'
obj = Object(id=id, our_as1={
2023-04-17 22:36:29 +00:00
'id': id,
'objectType': 'activity',
'verb': 'delete',
'actor': g.user.ap_actor(),
2023-04-17 22:36:29 +00:00
'object': source,
})
if not obj.mf2 and obj.type != 'delete':
error(f'No microformats2 found in {source}', status=304)
elif obj.mf2:
# default actor to user
2023-04-17 22:36:29 +00:00
props = obj.mf2['properties']
author_urls = microformats2.get_string_urls(props.get('author', []))
if author_urls and not g.user.is_web_url(author_urls[0]):
logger.info(f'Overriding author {author_urls[0]} with {g.user.ap_actor()}')
props['author'] = [g.user.ap_actor()]
logger.info(f'Converted to AS1: {obj.type}: {json_dumps(obj.as1, indent=2)}')
# if source is home page, update Web user and send an actor Update to
# followers' instances
if g.user and (g.user.key.id() == obj.key.id()
or g.user.is_web_url(obj.key.id())):
obj.put()
g.user.obj = obj
g.user.put()
actor_as1 = {
**obj.as1,
'id': g.user.ap_actor(),
'updated': util.now().isoformat(),
}
id = common.host_url(f'{obj.key.id()}#update-{util.now().isoformat()}')
obj = Object(id=id, our_as1={
'objectType': 'activity',
'verb': 'update',
'id': id,
'actor': g.user.ap_actor(),
'object': actor_as1,
})
return Web.receive(obj)