Do not assume that the last part of a mention.href is the user's name. Adjust patterns to match a leading whitespace or the beginning.

2023-07-13 11:09:00 -04:00 · 2023-07-13 11:09:00 -04:00 · d577e39777
commit d577e39777
--- a/federation/entities/activitypub/models.py
+++ b/federation/entities/activitypub/models.py
@ -857,9 +857,13 @@ class Note(Object, RawContentMixin):

    def _find_and_mark_mentions(self):
        mentions = [mention for mention in self.tag_objects if isinstance(mention, Mention)]
-        hrefs = [mention.href for mention in mentions]
-        # add Mastodon's form
-        hrefs.extend([re.sub(r'/(users/)([\w]+)$', r'/@\2', href) for href in hrefs])
+        hrefs = []
+        for mention in mentions:
+            hrefs.append(mention.href)
+            # add Mastodon's form
+            parsed = urlparse(mention.href)
+            username = mention.name.lstrip('@').split('@')[0]
+            hrefs.append(f'{parsed.scheme}://{parsed.netloc}/@{username}')
        for href in hrefs:
            links = self._soup.find_all(href=href)
            for link in links:
--- a/federation/utils/text.py
+++ b/federation/utils/text.py
@ -9,8 +9,8 @@ from bs4.element import NavigableString
 from commonmark import commonmark

 ILLEGAL_TAG_CHARS = "!#$%^&*+.,@£/()=?`'\\{[]}~;:\"’”—\xa0"
-TAG_PATTERN = re.compile(r'(#[\w]+)', re.UNICODE)
-MENTION_PATTERN = re.compile(r'(@{?[\S ]?[^{}@]+[@;]?\s*[\w\-./@]+[\w/]+}?)', re.UNICODE)
+TAG_PATTERN = re.compile(r'(^|\s)(#[\w]+)', re.UNICODE)
+MENTION_PATTERN = re.compile(r'(^|\s)(@{?[\S ]?[^{}@]+[@;]?\s*[\w\-./@]+[\w/]+}?)', re.UNICODE)


 def decode_if_bytes(text):