From aa351e27e20f1724fcb9d8fc25f1fc3776127be5 Mon Sep 17 00:00:00 2001 From: Alain St-Denis Date: Sun, 29 Oct 2023 10:03:09 -0400 Subject: [PATCH] fix url regex --- federation/utils/text.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/federation/utils/text.py b/federation/utils/text.py index ab4b8ec..bf1f235 100644 --- a/federation/utils/text.py +++ b/federation/utils/text.py @@ -11,7 +11,8 @@ TAG_PATTERN = re.compile(r'(#[\w\-]+)([)\]_!?*%/.,;\s]+\s*|\Z)', re.UNICODE) # This will match non matching braces. I don't think it's an issue. MENTION_PATTERN = re.compile(r'(@\{?(?:[\w\-. \u263a-\U0001f645]*; *)?[\w]+@[\w\-.]+\.[\w]+}?)', re.UNICODE) # based on https://stackoverflow.com/a/6041965 -URL_PATTERN = re.compile(r'((?:(?:https?|ftp)://|^|(?<=[("<\s]))+(?:[\w\-]+(?:(?:\.[\w\-]+)+))(?:[\w.,@?^=%&:\/~+#-]*[\w@?^=%&\/~+#-]))', +URL_PATTERN = re.compile(r'((?:(?:https?|ftp)://|^|(?<=[("<\s]))+(?:[\w\-]+(?:(?:\.[\w\-]+)+))' + r'[\w.,;:@?!$()*^=%&/~+\-#]*(?"]))', re.UNICODE) def decode_if_bytes(text):