kopia lustrzana https://gitlab.com/jaywink/federation
Merge branch 'todos-and-issues' into 'master'
Fixes addressing various manually tracked content issues. See merge request jaywink/federation!183master
commit
1f15583aad
36
CHANGELOG.md
36
CHANGELOG.md
|
@ -1,5 +1,41 @@
|
||||||
# Changelog
|
# Changelog
|
||||||
|
|
||||||
|
## Unreleased
|
||||||
|
|
||||||
|
### Changed
|
||||||
|
|
||||||
|
* This is actually both a change and a fix. AP Image objects do not define properties matching the
|
||||||
|
HTML img tag alt and title properties. Image.name is used to render both alt and title, which IMHO is
|
||||||
|
wrong. With this change, markdown images defining the title property will be recognized instead of
|
||||||
|
being thrown away (the fix) and the title property, if defined, will have precedence over the
|
||||||
|
alt property as the Image.name value (the change). Before this change, the client app would properly
|
||||||
|
render the img tag from the markdown source (with distinct alt and title properties), but the Image
|
||||||
|
object would not federate and hence not be displayed on other platforms (namely Mastodon).
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
|
||||||
|
* Note._find_and_mark_mentions: When an AP Mention object href can't be found in the rendered content,
|
||||||
|
try the name property.
|
||||||
|
|
||||||
|
* Ignore media objects that don't define a media type.
|
||||||
|
|
||||||
|
* Prevent rendered content image duplication when an image is both in the AP payload rendered content
|
||||||
|
and defined as an attachment that doesn't set the inlineImage property.
|
||||||
|
|
||||||
|
* Instead of discarding the whole AP payload out when encountering an undefined or unlisted AP object,
|
||||||
|
log a warning and keep going. Ensure None is returned when a nested field only contains an undefined
|
||||||
|
object.
|
||||||
|
|
||||||
|
* Accept the application/ld+json type for webfinger AP links.
|
||||||
|
|
||||||
|
* Mark an AP mention only if profile.finger is defined.
|
||||||
|
|
||||||
|
* Handle escape sequences for inbound markdown mentions.
|
||||||
|
|
||||||
|
* Extend the Unicode character range allowed in markdown mentions.
|
||||||
|
|
||||||
|
* Discard illegal characters from tag text. Previously, this was done only on tag links.
|
||||||
|
|
||||||
## [0.25.1] - 2024-02-18
|
## [0.25.1] - 2024-02-18
|
||||||
|
|
||||||
### Fixed
|
### Fixed
|
||||||
|
|
|
@ -209,11 +209,16 @@ class MixedField(fields.Nested):
|
||||||
ret = []
|
ret = []
|
||||||
for item in value:
|
for item in value:
|
||||||
if item.get('@type'):
|
if item.get('@type'):
|
||||||
res = super()._deserialize(item, attr, data, **kwargs)
|
try:
|
||||||
|
res = super()._deserialize(item, attr, data, **kwargs)
|
||||||
|
except KeyError as ex:
|
||||||
|
logger.warning("nested field: undefined JSON-LD type %s", ex)
|
||||||
|
continue
|
||||||
ret.append(res if not isinstance(res, list) else res[0])
|
ret.append(res if not isinstance(res, list) else res[0])
|
||||||
else:
|
else:
|
||||||
ret.append(self.iri._deserialize(item, attr, data, **kwargs))
|
ret.append(self.iri._deserialize(item, attr, data, **kwargs))
|
||||||
|
|
||||||
|
if not ret: ret.append(None)
|
||||||
return ret if len(ret) > 1 or self.many else ret[0]
|
return ret if len(ret) > 1 or self.many else ret[0]
|
||||||
|
|
||||||
|
|
||||||
|
@ -247,7 +252,7 @@ class Object(BaseEntity, metaclass=JsonLDAnnotation):
|
||||||
icon = MixedField(as2.icon, nested='ImageSchema')
|
icon = MixedField(as2.icon, nested='ImageSchema')
|
||||||
image = MixedField(as2.image, nested='ImageSchema')
|
image = MixedField(as2.image, nested='ImageSchema')
|
||||||
tag_objects = MixedField(as2.tag, nested=['NoteSchema', 'HashtagSchema','MentionSchema','PropertyValueSchema','EmojiSchema'], many=True)
|
tag_objects = MixedField(as2.tag, nested=['NoteSchema', 'HashtagSchema','MentionSchema','PropertyValueSchema','EmojiSchema'], many=True)
|
||||||
attachment = fields.Nested(as2.attachment, nested=['LinkSchema', 'NoteSchema', 'ImageSchema', 'AudioSchema', 'DocumentSchema','PropertyValueSchema','IdentityProofSchema'],
|
attachment = MixedField(as2.attachment, nested=['LinkSchema', 'NoteSchema', 'ImageSchema', 'AudioSchema', 'DocumentSchema','PropertyValueSchema','IdentityProofSchema'],
|
||||||
many=True, default=[])
|
many=True, default=[])
|
||||||
content_map = LanguageMap(as2.content) # language maps are not implemented in calamus
|
content_map = LanguageMap(as2.content) # language maps are not implemented in calamus
|
||||||
context = fields.RawJsonLD(as2.context)
|
context = fields.RawJsonLD(as2.context)
|
||||||
|
@ -421,6 +426,8 @@ class Document(Object):
|
||||||
url = MixedField(as2.url, nested='LinkSchema')
|
url = MixedField(as2.url, nested='LinkSchema')
|
||||||
|
|
||||||
def to_base(self):
|
def to_base(self):
|
||||||
|
if self.media_type is missing:
|
||||||
|
return self
|
||||||
self.__dict__.update({'schema': True})
|
self.__dict__.update({'schema': True})
|
||||||
if self.media_type.startswith('image'):
|
if self.media_type.startswith('image'):
|
||||||
return Image(**get_base_attributes(self))
|
return Image(**get_base_attributes(self))
|
||||||
|
@ -866,7 +873,7 @@ class Note(Object, RawContentMixin):
|
||||||
normalized_url = f'{parsed.scheme}://{parsed.netloc}{normalized_path.decode()}'
|
normalized_url = f'{parsed.scheme}://{parsed.netloc}{normalized_path.decode()}'
|
||||||
links = {link['href'].lower(), unquote(link['href']).lower(), url, normalized_url}
|
links = {link['href'].lower(), unquote(link['href']).lower(), url, normalized_url}
|
||||||
if links.intersection(hrefs):
|
if links.intersection(hrefs):
|
||||||
tag = re.match(r'^#?([\w\-]+$)', link.text)
|
tag = re.match(r'^#?([\w\-]+)', link.text)
|
||||||
if tag:
|
if tag:
|
||||||
link['data-hashtag'] = tag.group(1).lower()
|
link['data-hashtag'] = tag.group(1).lower()
|
||||||
|
|
||||||
|
@ -879,17 +886,28 @@ class Note(Object, RawContentMixin):
|
||||||
for mention in mentions:
|
for mention in mentions:
|
||||||
hrefs = []
|
hrefs = []
|
||||||
profile = get_profile_or_entity(fid=mention.href, remote_url=mention.href)
|
profile = get_profile_or_entity(fid=mention.href, remote_url=mention.href)
|
||||||
if profile and not profile.url:
|
if profile and not (profile.url and profile.finger):
|
||||||
# This should be removed when we are confident that the remote_url property
|
# This should be removed when we are confident that the remote_url and
|
||||||
# has been populated for most profiles on the client app side.
|
# finger properties have been populated for most profiles on the client app side.
|
||||||
profile = retrieve_and_parse_profile(profile.id)
|
profile = retrieve_and_parse_profile(profile.id)
|
||||||
if profile:
|
if profile and profile.finger:
|
||||||
hrefs.extend([profile.id, profile.url])
|
hrefs.extend([profile.id, profile.url])
|
||||||
|
else:
|
||||||
|
continue
|
||||||
for href in hrefs:
|
for href in hrefs:
|
||||||
links = self._soup.find_all(href=href)
|
links = self._soup.find_all(href=href)
|
||||||
for link in links:
|
for link in links:
|
||||||
link['data-mention'] = profile.finger
|
link['data-mention'] = profile.finger
|
||||||
self._mentions.add(profile.finger)
|
self._mentions.add(profile.finger)
|
||||||
|
if profile.finger not in self._mentions:
|
||||||
|
# can't find some mentions using their href property value
|
||||||
|
# try with the name property
|
||||||
|
matches = self._soup.find_all(string=mention.name)
|
||||||
|
for match in matches:
|
||||||
|
link = match.find_parent('a')
|
||||||
|
if link:
|
||||||
|
link['data-mention'] = profile.finger
|
||||||
|
self._mentions.add(profile.finger)
|
||||||
|
|
||||||
def extract_mentions(self):
|
def extract_mentions(self):
|
||||||
"""
|
"""
|
||||||
|
@ -953,7 +971,7 @@ class Note(Object, RawContentMixin):
|
||||||
if hasattr(child, 'to_base'):
|
if hasattr(child, 'to_base'):
|
||||||
child = child.to_base()
|
child = child.to_base()
|
||||||
if isinstance(child, Image):
|
if isinstance(child, Image):
|
||||||
if child.inline or (child.image and child.image in self.raw_content):
|
if child.inline or self._soup.find('img', src=child.url):
|
||||||
continue
|
continue
|
||||||
children.append(child)
|
children.append(child)
|
||||||
self._cached_children = children
|
self._cached_children = children
|
||||||
|
|
|
@ -6,6 +6,7 @@ from typing import List, Set, Union, Dict, Tuple
|
||||||
|
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from commonmark import commonmark
|
from commonmark import commonmark
|
||||||
|
from markdownify import markdownify
|
||||||
from marshmallow import missing
|
from marshmallow import missing
|
||||||
|
|
||||||
from federation.entities.activitypub.enums import ActivityType
|
from federation.entities.activitypub.enums import ActivityType
|
||||||
|
@ -224,13 +225,17 @@ class RawContentMixin(BaseEntity):
|
||||||
Returns a Tuple of (url, filename).
|
Returns a Tuple of (url, filename).
|
||||||
"""
|
"""
|
||||||
images = []
|
images = []
|
||||||
if self._media_type != "text/markdown" or self.raw_content is None:
|
if hasattr(self, '_soup'):
|
||||||
return images
|
for img in self._soup.find_all('img', src=re.compile(r'^http')):
|
||||||
regex = r"!\[([\w\s\-\']*)\]\((https?://[\w\d\-\./]+\.[\w]*((?<=jpg)|(?<=gif)|(?<=png)|(?<=jpeg)))\)"
|
images.append((img['src'], img.get('title', '') or img.get('alt', '')))
|
||||||
matches = re.finditer(regex, self.raw_content, re.MULTILINE | re.IGNORECASE)
|
else:
|
||||||
for match in matches:
|
if self._media_type != "text/markdown" or self.raw_content is None:
|
||||||
groups = match.groups()
|
return images
|
||||||
images.append((groups[1], groups[0] or ""))
|
regex = r"!\[([\w\s\-\']*)\]\((https?://[\w\d\-\./]+\.[\w]*((?<=jpg)|(?<=gif)|(?<=png)|(?<=jpeg)))\)"
|
||||||
|
matches = re.finditer(regex, self.raw_content, re.MULTILINE | re.IGNORECASE)
|
||||||
|
for match in matches:
|
||||||
|
groups = match.groups()
|
||||||
|
images.append((groups[1], groups[0] or ""))
|
||||||
return images
|
return images
|
||||||
|
|
||||||
# Legacy. Keep this until tests are reworked
|
# Legacy. Keep this until tests are reworked
|
||||||
|
@ -258,6 +263,9 @@ class RawContentMixin(BaseEntity):
|
||||||
if handle:
|
if handle:
|
||||||
self._mentions.add(handle)
|
self._mentions.add(handle)
|
||||||
self.raw_content = self.raw_content.replace(mention, '@' + handle)
|
self.raw_content = self.raw_content.replace(mention, '@' + handle)
|
||||||
|
# mardownify the extracted mention in case some characters are escaped in
|
||||||
|
# raw_content
|
||||||
|
self.raw_content = self.raw_content.replace(markdownify(mention), '@' + handle)
|
||||||
|
|
||||||
|
|
||||||
class OptionalRawContentMixin(RawContentMixin):
|
class OptionalRawContentMixin(RawContentMixin):
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
import pytest
|
import pytest
|
||||||
# noinspection PyPackageRequirements
|
# noinspection PyPackageRequirements
|
||||||
|
from commonmark import commonmark
|
||||||
from freezegun import freeze_time
|
from freezegun import freeze_time
|
||||||
from unittest.mock import patch
|
from unittest.mock import patch
|
||||||
|
|
||||||
|
@ -152,8 +153,7 @@ def activitypubpost_tags():
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def activitypubpost_embedded_images():
|
def activitypubpost_embedded_images():
|
||||||
with freeze_time("2019-04-27"):
|
with freeze_time("2019-04-27"):
|
||||||
obj = models.Post(
|
raw_content="""
|
||||||
raw_content="""
|
|
||||||
#Cycling #lauttasaari #sea #sun
|
#Cycling #lauttasaari #sea #sun
|
||||||
|
|
||||||
|
|
||||||
|
@ -166,7 +166,10 @@ def activitypubpost_embedded_images():
|
||||||
[foo](https://jasonrobinson.me/media/uploads/2019/07/16/daa24d89-cedf-4fc7-bad8-74a9025414710.jpg)
|
[foo](https://jasonrobinson.me/media/uploads/2019/07/16/daa24d89-cedf-4fc7-bad8-74a9025414710.jpg)
|
||||||
#only a link, not embedded
|
#only a link, not embedded
|
||||||
https://jasonrobinson.me/media/uploads/2019/07/16/daa24d89-cedf-4fc7-bad8-74a9025414711.jpg
|
https://jasonrobinson.me/media/uploads/2019/07/16/daa24d89-cedf-4fc7-bad8-74a9025414711.jpg
|
||||||
""",
|
"""
|
||||||
|
obj = models.Post(
|
||||||
|
raw_content=raw_content,
|
||||||
|
rendered_content=commonmark(raw_content, ignore_html_blocks=True),
|
||||||
public=True,
|
public=True,
|
||||||
provider_display_name="Socialhome",
|
provider_display_name="Socialhome",
|
||||||
id=f"http://127.0.0.1:8000/post/123456/",
|
id=f"http://127.0.0.1:8000/post/123456/",
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
|
import re
|
||||||
from typing import Optional, Any
|
from typing import Optional, Any
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
@ -16,6 +17,7 @@ except Exception as exc:
|
||||||
federation_user = None
|
federation_user = None
|
||||||
logger.warning("django is required for get requests signing: %s", exc)
|
logger.warning("django is required for get requests signing: %s", exc)
|
||||||
|
|
||||||
|
type_path = re.compile(r'^application/(activity|ld)\+json')
|
||||||
|
|
||||||
def get_profile_id_from_webfinger(handle: str) -> Optional[str]:
|
def get_profile_id_from_webfinger(handle: str) -> Optional[str]:
|
||||||
"""
|
"""
|
||||||
|
@ -30,7 +32,7 @@ def get_profile_id_from_webfinger(handle: str) -> Optional[str]:
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
return
|
return
|
||||||
for link in doc.get("links", []):
|
for link in doc.get("links", []):
|
||||||
if link.get("rel") == "self" and link.get("type") == "application/activity+json":
|
if link.get("rel") == "self" and type_path.match(link.get("type")):
|
||||||
return link["href"]
|
return link["href"]
|
||||||
logger.debug("get_profile_id_from_webfinger: found webfinger but it has no as2 self href")
|
logger.debug("get_profile_id_from_webfinger: found webfinger but it has no as2 self href")
|
||||||
|
|
||||||
|
|
|
@ -8,8 +8,8 @@ from commonmark import commonmark
|
||||||
|
|
||||||
ILLEGAL_TAG_CHARS = "!#$%^&*+.,@£/()=?`'\\{[]}~;:\"’”—\xa0"
|
ILLEGAL_TAG_CHARS = "!#$%^&*+.,@£/()=?`'\\{[]}~;:\"’”—\xa0"
|
||||||
TAG_PATTERN = re.compile(r'(#[\w\-]+)([)\]_!?*%/.,;\s]+\s*|\Z)', re.UNICODE)
|
TAG_PATTERN = re.compile(r'(#[\w\-]+)([)\]_!?*%/.,;\s]+\s*|\Z)', re.UNICODE)
|
||||||
# This will match non matching braces. I don't think it's an issue.
|
# This will match non-matching braces. I don't think it's an issue.
|
||||||
MENTION_PATTERN = re.compile(r'(@\{?(?:[\w\-. \u263a-\U0001f645]*; *)?[\w]+@[\w\-.]+\.[\w]+}?)', re.UNICODE)
|
MENTION_PATTERN = re.compile(r'(@\{?(?:[\w\-. \u0250-\U0001f64f]*; *)?[\w]+@[\w\-.]+\.[\w]+}?)', re.UNICODE)
|
||||||
# based on https://stackoverflow.com/a/6041965
|
# based on https://stackoverflow.com/a/6041965
|
||||||
URL_PATTERN = re.compile(r'((?:(?:https?|ftp)://|^|(?<=[("<\s]))+(?:[\w\-]+(?:(?:\.[\w\-]+)+))'
|
URL_PATTERN = re.compile(r'((?:(?:https?|ftp)://|^|(?<=[("<\s]))+(?:[\w\-]+(?:(?:\.[\w\-]+)+))'
|
||||||
r'[\w.,;:@?!$()*^=%&/~+\-#]*(?<![:;,).>"]))',
|
r'[\w.,;:@?!$()*^=%&/~+\-#]*(?<![:;,).>"]))',
|
||||||
|
|
1
setup.py
1
setup.py
|
@ -37,6 +37,7 @@ setup(
|
||||||
"lxml>=3.4.0",
|
"lxml>=3.4.0",
|
||||||
"iteration_utilities",
|
"iteration_utilities",
|
||||||
"jsonschema>=2.0.0",
|
"jsonschema>=2.0.0",
|
||||||
|
"markdownify",
|
||||||
"pycryptodome>=3.4.10",
|
"pycryptodome>=3.4.10",
|
||||||
"python-dateutil>=2.4.0",
|
"python-dateutil>=2.4.0",
|
||||||
"python-httpsig-socialhome",
|
"python-httpsig-socialhome",
|
||||||
|
|
Ładowanie…
Reference in New Issue