From d73c79b2ba1cdfa206d65a2b605e4d028e42af26 Mon Sep 17 00:00:00 2001 From: Chaiwat Suttipongsakul Date: Sun, 24 Feb 2019 21:36:06 +0700 Subject: [PATCH 1/2] fix unicode hashtag --- little_boxes/content_helper.py | 8 ++++++-- requirements.txt | 1 + setup.py | 4 +++- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/little_boxes/content_helper.py b/little_boxes/content_helper.py index f670849..cd7ee30 100644 --- a/little_boxes/content_helper.py +++ b/little_boxes/content_helper.py @@ -1,10 +1,11 @@ -import re from typing import Dict from typing import List from typing import Tuple from markdown import markdown +import regex as re + from .activitypub import get_backend from .webfinger import get_actor_url @@ -24,7 +25,10 @@ MENTION_REGEX = re.compile(r"@[\d\w_.+-]+@[\d\w-]+\.[\d\w\-.]+") def hashtagify(content: str) -> Tuple[str, List[Dict[str, str]]]: base_url = get_backend().base_url() tags = [] - for hashtag in re.findall(HASHTAG_REGEX, content): + hashtags = re.findall(HASHTAG_REGEX, content) + hashtags.sort() + hashtags.reverse() # replace longest tags first + for hashtag in hashtags: tag = hashtag[1:] link = f'' tags.append(dict(href=f"{base_url}/tags/{tag}", name=hashtag, type="Hashtag")) diff --git a/requirements.txt b/requirements.txt index c5e4c7c..785d00b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,3 +5,4 @@ markdown pyld pycryptodome html2text +regex diff --git a/setup.py b/setup.py index 7872ad7..f19afe3 100644 --- a/setup.py +++ b/setup.py @@ -1,11 +1,12 @@ #!/usr/bin/env python +from distutils.core import setup import io import os -from distutils.core import setup from setuptools import find_packages + here = os.path.abspath(os.path.dirname(__file__)) @@ -29,6 +30,7 @@ REQUIRED = [ "pycryptodome", "html2text", "mdx_linkify", + "regex", ] DEPENDENCY_LINKS = [] From 9ec30df3f67bceec5bc859dd0e0bfdf5642779f3 Mon Sep 17 00:00:00 2001 From: Chaiwat Suttipongsakul Date: Sun, 24 Feb 2019 22:02:28 +0700 Subject: [PATCH 2/2] same hashtag should be replaced only one time --- little_boxes/content_helper.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/little_boxes/content_helper.py b/little_boxes/content_helper.py index cd7ee30..c3b342c 100644 --- a/little_boxes/content_helper.py +++ b/little_boxes/content_helper.py @@ -26,8 +26,9 @@ def hashtagify(content: str) -> Tuple[str, List[Dict[str, str]]]: base_url = get_backend().base_url() tags = [] hashtags = re.findall(HASHTAG_REGEX, content) + hashtags = list(set(hashtags)) # unique tags hashtags.sort() - hashtags.reverse() # replace longest tags first + hashtags.reverse() # replace longest tag first for hashtag in hashtags: tag = hashtag[1:] link = f''