diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index b13d2a5..7747370 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -18,7 +18,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install -e . + pip install -e .\[richtext\] pip install -r requirements-test.txt - name: Run tests run: | diff --git a/requirements.txt b/requirements.txt index 3616ac3..8579f41 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,4 +2,4 @@ requests>=2.13,<3.0 beautifulsoup4>=4.5.0,<5.0 wcwidth>=0.1.7 urwid>=2.0.0,<3.0 - +urwidgets>=0.1,<0.2 diff --git a/setup.py b/setup.py index 624e55d..a52d725 100644 --- a/setup.py +++ b/setup.py @@ -31,7 +31,7 @@ setup( 'License :: OSI Approved :: GNU General Public License v3 (GPLv3)', 'Programming Language :: Python :: 3', ], - packages=['toot', 'toot.tui', 'toot.utils'], + packages=['toot', 'toot.tui', 'toot.tui.richtext', 'toot.utils'], python_requires=">=3.7", install_requires=[ "requests>=2.13,<3.0", @@ -40,6 +40,9 @@ setup( "urwid>=2.0.0,<3.0", "tomlkit>=0.10.0,<1.0" ], + extras_require={ + "richtext": ['urwidgets>=0.1,<0.2'], + }, entry_points={ 'console_scripts': [ 'toot=toot.console:main', diff --git a/tests/test_utils.py b/tests/test_utils.py index a1bba9a..9dbb579 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -3,6 +3,7 @@ import pytest from toot.console import duration from toot.wcstring import wc_wrap, trunc, pad, fit_text +from toot.utils import urlencode_url def test_pad(): @@ -201,3 +202,8 @@ def test_duration(): with pytest.raises(ArgumentTypeError): duration("banana") + + +def test_urlencode_url(): + assert urlencode_url("https://www.example.com") == "https://www.example.com" + assert urlencode_url("https://www.example.com/url%20with%20spaces") == "https://www.example.com/url%20with%20spaces" diff --git a/tests/tui/test_rich_text.py b/tests/tui/test_rich_text.py new file mode 100644 index 0000000..68453fa --- /dev/null +++ b/tests/tui/test_rich_text.py @@ -0,0 +1,45 @@ +from urwid import Divider, Filler, Pile +from toot.tui.richtext import url_to_widget +from urwidgets import Hyperlink, TextEmbed + +from toot.tui.richtext.richtext import html_to_widgets + + +def test_url_to_widget(): + url = "http://foo.bar" + embed_widget = url_to_widget(url) + assert isinstance(embed_widget, TextEmbed) + + [(filler, length)] = embed_widget.embedded + assert length == len(url) + assert isinstance(filler, Filler) + + link_widget: Hyperlink = filler.base_widget + assert isinstance(link_widget, Hyperlink) + + assert link_widget.attrib == "link" + assert link_widget.text == url + assert link_widget.uri == url + + +def test_html_to_widgets(): + html = """ +

foo

+

foo bar baz

+ """.strip() + + [foo, divider, bar] = html_to_widgets(html) + + assert isinstance(foo, Pile) + assert isinstance(divider, Divider) + assert isinstance(bar, Pile) + + [foo_embed] = foo.widget_list + assert foo_embed.embedded == [] + assert foo_embed.attrib == [] + assert foo_embed.text == "foo" + + [bar_embed] = bar.widget_list + assert bar_embed.embedded == [] + assert bar_embed.attrib == [(None, 4), ("b", 3), (None, 1), ("i", 3)] + assert bar_embed.text == "foo bar baz" diff --git a/toot/output.py b/toot/output.py index 6fd59a2..83898f8 100644 --- a/toot/output.py +++ b/toot/output.py @@ -6,7 +6,7 @@ import textwrap from functools import lru_cache from toot import settings from toot.entities import Instance, Notification, Poll, Status -from toot.utils import get_text, parse_html +from toot.utils import get_text, html_to_paragraphs from toot.wcstring import wc_wrap from typing import List from wcwidth import wcswidth @@ -321,7 +321,7 @@ def print_status(status: Status, width: int = 80): def print_html(text, width=80): first = True - for paragraph in parse_html(text): + for paragraph in html_to_paragraphs(text): if not first: print_out("") for line in paragraph: diff --git a/toot/tui/app.py b/toot/tui/app.py index 9d78b12..6909d79 100644 --- a/toot/tui/app.py +++ b/toot/tui/app.py @@ -143,7 +143,6 @@ class TUI(urwid.Frame): def run(self): self.loop.set_alarm_in(0, lambda *args: self.async_load_instance()) self.loop.set_alarm_in(0, lambda *args: self.async_load_followed_accounts()) - self.loop.set_alarm_in(0, lambda *args: self.async_load_followed_tags()) self.loop.set_alarm_in(0, lambda *args: self.async_load_timeline( is_initial=True, timeline_name="home")) self.loop.run() @@ -339,22 +338,6 @@ class TUI(urwid.Frame): self.run_in_thread(_load_accounts, done_callback=_done_accounts) - def async_load_followed_tags(self): - def _load_tag_list(): - try: - return api.followed_tags(self.app, self.user) - except ApiError: - # not supported by all Mastodon servers so fail silently if necessary - return [] - - def _done_tag_list(tags): - if len(tags) > 0: - self.followed_tags = [t["name"] for t in tags] - else: - self.followed_tags = [] - - self.run_in_thread(_load_tag_list, done_callback=_done_tag_list) - def refresh_footer(self, timeline): """Show status details in footer.""" status, index, count = timeline.get_focused_status_with_counts() diff --git a/toot/tui/constants.py b/toot/tui/constants.py index 91bb3b7..f51ae61 100644 --- a/toot/tui/constants.py +++ b/toot/tui/constants.py @@ -57,6 +57,29 @@ PALETTE = [ ('dim', 'dark gray', ''), ('highlight', 'yellow', ''), ('success', 'dark green', ''), + + # HTML tag styling + ('a', ',italics', '', 'italics'), + # em tag is mapped to i + ('i', ',italics', '', 'italics'), + # strong tag is mapped to b + ('b', ',bold', '', 'bold'), + # special case for bold + italic nested tags + ('bi', ',bold,italics', '', ',bold,italics'), + ('u', ',underline', '', ',underline'), + ('del', ',strikethrough', '', ',strikethrough'), + ('code', 'light gray, standout', '', ',standout'), + ('pre', 'light gray, standout', '', ',standout'), + ('blockquote', 'light gray', '', ''), + ('h1', ',bold', '', ',bold'), + ('h2', ',bold', '', ',bold'), + ('h3', ',bold', '', ',bold'), + ('h4', ',bold', '', ',bold'), + ('h5', ',bold', '', ',bold'), + ('h6', ',bold', '', ',bold'), + ('class_mention_hashtag', 'light cyan', '', ''), + ('class_hashtag', 'light cyan', '', ''), + ] VISIBILITY_OPTIONS = [ diff --git a/toot/tui/overlays.py b/toot/tui/overlays.py index 75be80c..58eb457 100644 --- a/toot/tui/overlays.py +++ b/toot/tui/overlays.py @@ -4,10 +4,10 @@ import urwid import webbrowser from toot import __version__ -from toot.utils import format_content -from .utils import highlight_hashtags, highlight_keys -from .widgets import Button, EditBox, SelectableText from toot import api +from toot.tui.utils import highlight_keys +from toot.tui.widgets import Button, EditBox, SelectableText +from toot.tui.richtext import html_to_widgets class StatusSource(urwid.Padding): @@ -279,8 +279,10 @@ class Account(urwid.ListBox): if account["note"]: yield urwid.Divider() - for line in format_content(account["note"]): - yield urwid.Text(highlight_hashtags(line, followed_tags=set())) + + widgetlist = html_to_widgets(account["note"]) + for line in widgetlist: + yield (line) yield urwid.Divider() yield urwid.Text(["ID: ", ("highlight", f"{account['id']}")]) @@ -312,8 +314,11 @@ class Account(urwid.ListBox): name = field["name"].title() yield urwid.Divider() yield urwid.Text([("bold", f"{name.rstrip(':')}"), ":"]) - for line in format_content(field["value"]): - yield urwid.Text(highlight_hashtags(line, followed_tags=set())) + + widgetlist = html_to_widgets(field["value"]) + for line in widgetlist: + yield (line) + if field["verified_at"]: yield urwid.Text(("success", "✓ Verified")) diff --git a/toot/tui/poll.py b/toot/tui/poll.py index 0c3ff46..e738fc7 100644 --- a/toot/tui/poll.py +++ b/toot/tui/poll.py @@ -2,11 +2,9 @@ import urwid from toot import api from toot.exceptions import ApiError -from toot.utils import format_content from toot.utils.datetime import parse_datetime - -from .utils import highlight_hashtags from .widgets import Button, CheckBox, RadioButton +from .richtext import html_to_widgets class Poll(urwid.ListBox): @@ -87,8 +85,11 @@ class Poll(urwid.ListBox): def generate_contents(self, status): yield urwid.Divider() - for line in format_content(status.data["content"]): - yield urwid.Text(highlight_hashtags(line, set())) + + widgetlist = html_to_widgets(status.data["content"]) + + for line in widgetlist: + yield (line) yield urwid.Divider() yield self.build_linebox(self.generate_poll_detail()) diff --git a/toot/tui/richtext/__init__.py b/toot/tui/richtext/__init__.py new file mode 100644 index 0000000..07e31c8 --- /dev/null +++ b/toot/tui/richtext/__init__.py @@ -0,0 +1,18 @@ +import urwid + +from toot.tui.utils import highlight_hashtags +from toot.utils import format_content +from typing import List + +try: + from .richtext import html_to_widgets, url_to_widget +except ImportError: + # Fallback if urwidgets are not available + def html_to_widgets(html: str) -> List[urwid.Widget]: + return [ + urwid.Text(highlight_hashtags(line)) + for line in format_content(html) + ] + + def url_to_widget(url: str): + return urwid.Text(("link", url)) diff --git a/toot/tui/richtext/richtext.py b/toot/tui/richtext/richtext.py new file mode 100644 index 0000000..71897c4 --- /dev/null +++ b/toot/tui/richtext/richtext.py @@ -0,0 +1,452 @@ +import re +import urwid +import unicodedata + +from bs4.element import NavigableString, Tag +from toot.tui.constants import PALETTE +from toot.utils import parse_html, urlencode_url +from typing import List, Tuple +from urwid.util import decompose_tagmarkup +from urwidgets import Hyperlink, TextEmbed + + +STYLE_NAMES = [p[0] for p in PALETTE] + +# NOTE: update this list if Mastodon starts supporting more block tags +BLOCK_TAGS = ["p", "pre", "li", "blockquote", "h1", "h2", "h3", "h4", "h5", "h6"] + + +def html_to_widgets(html, recovery_attempt=False) -> List[urwid.Widget]: + """Convert html to urwid widgets""" + widgets: List[urwid.Widget] = [] + html = unicodedata.normalize("NFKC", html) + soup = parse_html(html) + + first_tag = True + for e in soup.body or soup: + if isinstance(e, NavigableString): + if first_tag and not recovery_attempt: + # if our first "tag" is a navigable string + # the HTML is out of spec, doesn't start with a tag, + # we see this in content from Pixelfed servers. + # attempt a fix by wrapping the HTML with

+ return html_to_widgets(f"

{html}

", recovery_attempt=True) + else: + continue + else: + name = e.name + # if our HTML starts with a tag, but not a block tag + # the HTML is out of spec. Attempt a fix by wrapping the + # HTML with

+ if (first_tag and not recovery_attempt and name not in BLOCK_TAGS): + return html_to_widgets(f"

{html}

", recovery_attempt=True) + + markup = render(name, e) + first_tag = False + + if not isinstance(markup, urwid.Widget): + # plaintext, so create a padded text widget + txt = text_to_widget("", markup) + markup = urwid.Padding( + txt, + align="left", + width=("relative", 100), + min_width=None, + ) + widgets.append(markup) + # separate top level widgets with a blank line + widgets.append(urwid.Divider(" ")) + return widgets[:-1] # but suppress the last blank line + + +def url_to_widget(url: str): + widget = len(url), urwid.Filler(Hyperlink(url, "link", url)) + return TextEmbed(widget) + + +def inline_tag_to_text(tag) -> Tuple: + """Convert html tag to plain text with tag as attributes recursively""" + markups = process_inline_tag_children(tag) + if not markups: + return (tag.name, "") + return (tag.name, markups) + + +def process_inline_tag_children(tag) -> List: + """Recursively retrieve all children + and convert to a list of markup text""" + markups = [] + for child in tag.children: + if isinstance(child, Tag): + markup = render(child.name, child) + markups.append(markup) + else: + markups.append(child) + return markups + + +URL_PATTERN = re.compile(r"(^.+)\x03(.+$)") + + +def text_to_widget(attr, markup) -> urwid.Widget: + markup_list = [] + for run in markup: + if isinstance(run, tuple): + txt, attr_list = decompose_tagmarkup(run) + # find anchor titles with an ETX separator followed by href + match = URL_PATTERN.match(txt) + if match: + label, url = match.groups() + anchor_attr = get_best_anchor_attr(attr_list) + markup_list.append(( + len(label), + urwid.Filler(Hyperlink(url, anchor_attr, label)), + )) + else: + markup_list.append(run) + else: + markup_list.append(run) + + return TextEmbed(markup_list) + + +def process_block_tag_children(tag) -> List[urwid.Widget]: + """Recursively retrieve all children + and convert to a list of widgets + any inline tags containing text will be + converted to Text widgets""" + + pre_widget_markups = [] + post_widget_markups = [] + child_widgets = [] + found_nested_widget = False + + for child in tag.children: + if isinstance(child, Tag): + # child is a nested tag; process using custom method + # or default to inline_tag_to_text + result = render(child.name, child) + if isinstance(result, urwid.Widget): + found_nested_widget = True + child_widgets.append(result) + else: + if not found_nested_widget: + pre_widget_markups.append(result) + else: + post_widget_markups.append(result) + else: + # child is text; append to the appropriate markup list + if not found_nested_widget: + pre_widget_markups.append(child) + else: + post_widget_markups.append(child) + + widget_list = [] + if len(pre_widget_markups): + widget_list.append(text_to_widget(tag.name, pre_widget_markups)) + + if len(child_widgets): + widget_list += child_widgets + + if len(post_widget_markups): + widget_list.append(text_to_widget(tag.name, post_widget_markups)) + + return widget_list + + +def get_urwid_attr_name(tag) -> str: + """Get the class name and translate to a + name suitable for use as an urwid + text attribute name""" + + if "class" in tag.attrs: + clss = tag.attrs["class"] + if len(clss) > 0: + style_name = "class_" + "_".join(clss) + # return the class name, only if we + # find it as a defined palette name + if style_name in STYLE_NAMES: + return style_name + + # fallback to returning the tag name + return tag.name + + +def basic_block_tag_handler(tag) -> urwid.Widget: + """default for block tags that need no special treatment""" + return urwid.Pile(process_block_tag_children(tag)) + + +def get_best_anchor_attr(attrib_list) -> str: + if not attrib_list: + return "" + flat_al = list(flatten(attrib_list)) + + for a in flat_al[0]: + # ref: https://docs.joinmastodon.org/spec/activitypub/ + # these are the class names (translated to attrib names) + # that we can support for display + + try: + if a[0] in ["class_hashtag", "class_mention_hashtag", "class_mention"]: + return a[0] + except KeyError: + continue + + return "a" + + +def render(attr: str, content: str): + if attr in ["a"]: + return render_anchor(content) + + if attr in ["blockquote"]: + return render_blockquote(content) + + if attr in ["br"]: + return render_br(content) + + if attr in ["em"]: + return render_em(content) + + if attr in ["ol"]: + return render_ol(content) + + if attr in ["pre"]: + return render_pre(content) + + if attr in ["span"]: + return render_span(content) + + if attr in ["b", "strong"]: + return render_strong(content) + + if attr in ["ul"]: + return render_ul(content) + + # Glitch-soc and Pleroma allow

...

in content + # Mastodon (PR #23913) does not; header tags are converted to

+ if attr in ["p", "div", "li", "h1", "h2", "h3", "h4", "h5", "h6"]: + return basic_block_tag_handler(content) + + # Fall back to inline_tag_to_text handler + return inline_tag_to_text(content) + + +def render_anchor(tag) -> Tuple: + """anchor tag handler""" + + markups = process_inline_tag_children(tag) + if not markups: + return (tag.name, "") + + href = tag.attrs["href"] + title, attrib_list = decompose_tagmarkup(markups) + if not attrib_list: + attrib_list = [tag] + if href: + # urlencode the path and query portions of the URL + href = urlencode_url(href) + # use ASCII ETX (end of record) as a + # delimiter between the title and the HREF + title += f"\x03{href}" + + attr = get_best_anchor_attr(attrib_list) + + if attr == "a": + # didn't find an attribute to use + # in the child markup, so let's + # try the anchor tag's own attributes + + attr = get_urwid_attr_name(tag) + + # hashtag anchors have a class of "mention hashtag" + # or "hashtag" + # we'll return style "class_mention_hashtag" + # or "class_hashtag" + # in that case; see corresponding palette entry + # in constants.py controlling hashtag highlighting + + return (attr, title) + + +def render_blockquote(tag) -> urwid.Widget: + widget_list = process_block_tag_children(tag) + blockquote_widget = urwid.LineBox( + urwid.Padding( + urwid.Pile(widget_list), + align="left", + width=("relative", 100), + min_width=None, + left=1, + right=1, + ), + tlcorner="", + tline="", + lline="│", + trcorner="", + blcorner="", + rline="", + bline="", + brcorner="", + ) + return urwid.Pile([urwid.AttrMap(blockquote_widget, "blockquote")]) + + +def render_br(tag) -> Tuple: + return ("br", "\n") + + +def render_em(tag) -> Tuple: + # to simplify the number of palette entries + # translate EM to I (italic) + markups = process_inline_tag_children(tag) + if not markups: + return ("i", "") + + # special case processing for bold and italic + for parent in tag.parents: + if parent.name == "b" or parent.name == "strong": + return ("bi", markups) + + return ("i", markups) + + +def render_ol(tag) -> urwid.Widget: + """ordered list tag handler""" + + widgets = [] + list_item_num = 1 + increment = -1 if tag.has_attr("reversed") else 1 + + # get ol start= attribute if present + if tag.has_attr("start") and len(tag.attrs["start"]) > 0: + try: + list_item_num = int(tag.attrs["start"]) + except ValueError: + pass + + for li in tag.find_all("li", recursive=False): + markup = render("li", li) + + # li value= attribute will change the item number + # it also overrides any ol start= attribute + + if li.has_attr("value") and len(li.attrs["value"]) > 0: + try: + list_item_num = int(li.attrs["value"]) + except ValueError: + pass + + if not isinstance(markup, urwid.Widget): + txt = text_to_widget("li", [str(list_item_num), ". ", markup]) + # 1. foo, 2. bar, etc. + widgets.append(txt) + else: + txt = text_to_widget("li", [str(list_item_num), ". "]) + columns = urwid.Columns( + [txt, ("weight", 9999, markup)], dividechars=1, min_width=3 + ) + widgets.append(columns) + + list_item_num += increment + + return urwid.Pile(widgets) + + +def render_pre(tag) -> urwid.Widget: + #
 tag spec says that text should not wrap,
+    # but horizontal screen space is at a premium
+    # and we have no horizontal scroll bar, so allow
+    # wrapping.
+
+    widget_list = [urwid.Divider(" ")]
+    widget_list += process_block_tag_children(tag)
+
+    pre_widget = urwid.Padding(
+        urwid.Pile(widget_list),
+        align="left",
+        width=("relative", 100),
+        min_width=None,
+        left=1,
+        right=1,
+    )
+    return urwid.Pile([urwid.AttrMap(pre_widget, "pre")])
+
+
+def render_span(tag) -> Tuple:
+    markups = process_inline_tag_children(tag)
+
+    if not markups:
+        return (tag.name, "")
+
+    # span inherits its parent's class definition
+    # unless it has a specific class definition
+    # of its own
+
+    if "class" in tag.attrs:
+        # uncomment the following code to hide all HTML marked
+        # invisible (generally, the http:// prefix of URLs)
+        # could be a user preference, it's only advisable if
+        # the terminal supports OCS 8 hyperlinks (and that's not
+        # automatically detectable)
+
+        # if "invisible" in tag.attrs["class"]:
+        #     return (tag.name, "")
+
+        style_name = get_urwid_attr_name(tag)
+
+        if style_name != "span":
+            # unique class name matches an entry in our palette
+            return (style_name, markups)
+
+    if tag.parent:
+        return (get_urwid_attr_name(tag.parent), markups)
+    else:
+        # fallback
+        return ("span", markups)
+
+
+def render_strong(tag) -> Tuple:
+    # to simplify the number of palette entries
+    # translate STRONG to B (bold)
+    markups = process_inline_tag_children(tag)
+    if not markups:
+        return ("b", "")
+
+    # special case processing for bold and italic
+    for parent in tag.parents:
+        if parent.name == "i" or parent.name == "em":
+            return ("bi", markups)
+
+    return ("b", markups)
+
+
+def render_ul(tag) -> urwid.Widget:
+    """unordered list tag handler"""
+
+    widgets = []
+
+    for li in tag.find_all("li", recursive=False):
+        markup = render("li", li)
+
+        if not isinstance(markup, urwid.Widget):
+            txt = text_to_widget("li", ["\N{bullet} ", markup])
+            # * foo, * bar, etc.
+            widgets.append(txt)
+        else:
+            txt = text_to_widget("li", ["\N{bullet} "])
+            columns = urwid.Columns(
+                [txt, ("weight", 9999, markup)], dividechars=1, min_width=3
+            )
+            widgets.append(columns)
+
+    return urwid.Pile(widgets)
+
+
+def flatten(data):
+    if isinstance(data, tuple):
+        for x in data:
+            yield from flatten(x)
+    else:
+        yield data
diff --git a/toot/tui/timeline.py b/toot/tui/timeline.py
index ddc4a35..93421ce 100644
--- a/toot/tui/timeline.py
+++ b/toot/tui/timeline.py
@@ -5,14 +5,14 @@ import webbrowser
 from typing import List, Optional
 
 from toot.tui import app
-from toot.utils import format_content
+from toot.tui.richtext import html_to_widgets, url_to_widget
 from toot.utils.datetime import parse_datetime, time_ago
 from toot.utils.language import language_name
 
-from .entities import Status
-from .scroll import Scrollable, ScrollBar
-from .utils import highlight_hashtags, highlight_keys
-from .widgets import SelectableText, SelectableColumns
+from toot.entities import Status
+from toot.tui.scroll import Scrollable, ScrollBar
+from toot.tui.utils import highlight_keys
+from toot.tui.widgets import SelectableText, SelectableColumns
 
 logger = logging.getLogger("toot")
 
@@ -310,7 +310,6 @@ class Timeline(urwid.Columns):
 class StatusDetails(urwid.Pile):
     def __init__(self, timeline: Timeline, status: Optional[Status]):
         self.status = status
-        self.followed_tags = timeline.tui.followed_tags
         self.followed_accounts = timeline.tui.followed_accounts
 
         reblogged_by = status.author if status and status.reblog else None
@@ -340,8 +339,10 @@ class StatusDetails(urwid.Pile):
             yield ("pack", urwid.Text(("content_warning", "Marked as sensitive. Press S to view.")))
         else:
             content = status.original.translation if status.original.show_translation else status.data["content"]
-            for line in format_content(content):
-                yield ("pack", urwid.Text(highlight_hashtags(line, self.followed_tags)))
+            widgetlist = html_to_widgets(content)
+
+            for line in widgetlist:
+                yield (line)
 
             media = status.data["media_attachments"]
             if media:
@@ -350,7 +351,7 @@ class StatusDetails(urwid.Pile):
                     yield ("pack", urwid.Text([("bold", "Media attachment"), " (", m["type"], ")"]))
                     if m["description"]:
                         yield ("pack", urwid.Text(m["description"]))
-                    yield ("pack", urwid.Text(("link", m["url"])))
+                    yield ("pack", url_to_widget(m["url"]))
 
             poll = status.original.data.get("poll")
             if poll:
@@ -410,7 +411,7 @@ class StatusDetails(urwid.Pile):
         if card["description"]:
             yield urwid.Text(card["description"].strip())
             yield urwid.Text("")
-        yield urwid.Text(("link", card["url"]))
+        yield url_to_widget(card["url"])
 
     def poll_generator(self, poll):
         for idx, option in enumerate(poll["options"]):
diff --git a/toot/tui/utils.py b/toot/tui/utils.py
index 377522b..734ae32 100644
--- a/toot/tui/utils.py
+++ b/toot/tui/utils.py
@@ -35,15 +35,12 @@ def highlight_keys(text, high_attr, low_attr=""):
     return list(_gen())
 
 
-def highlight_hashtags(line, followed_tags, attr="hashtag", followed_attr="hashtag_followed"):
+def highlight_hashtags(line):
     hline = []
 
     for p in re.split(HASHTAG_PATTERN, line):
         if p.startswith("#"):
-            if p[1:].lower() in (t.lower() for t in followed_tags):
-                hline.append((followed_attr, p))
-            else:
-                hline.append((attr, p))
+            hline.append(("hashtag", p))
         else:
             hline.append(p)
 
diff --git a/toot/utils/__init__.py b/toot/utils/__init__.py
index e8103ac..c4afa7f 100644
--- a/toot/utils/__init__.py
+++ b/toot/utils/__init__.py
@@ -10,6 +10,7 @@ from bs4 import BeautifulSoup
 from typing import Dict
 
 from toot.exceptions import ConsoleError
+from urllib.parse import urlparse, urlencode, quote, unquote
 
 
 def str_bool(b):
@@ -22,20 +23,22 @@ def str_bool_nullable(b):
     return None if b is None else str_bool(b)
 
 
-def get_text(html):
-    """Converts html to text, strips all tags."""
-
+def parse_html(html: str) -> BeautifulSoup:
     # Ignore warnings made by BeautifulSoup, if passed something that looks like
     # a file (e.g. a dot which matches current dict), it will warn that the file
     # should be opened instead of passing a filename.
     with warnings.catch_warnings():
         warnings.simplefilter("ignore")
-        text = BeautifulSoup(html.replace(''', "'"), "html.parser").get_text()
-
-    return unicodedata.normalize('NFKC', text)
+        return BeautifulSoup(html.replace("'", "'"), "html.parser")
 
 
-def parse_html(html):
+def get_text(html):
+    """Converts html to text, strips all tags."""
+    text = parse_html(html).get_text()
+    return unicodedata.normalize("NFKC", text)
+
+
+def html_to_paragraphs(html):
     """Attempt to convert html to plain text while keeping line breaks.
     Returns a list of paragraphs, each being a list of lines.
     """
@@ -54,7 +57,7 @@ def format_content(content):
     Returns a generator yielding lines of content.
     """
 
-    paragraphs = parse_html(content)
+    paragraphs = html_to_paragraphs(content)
 
     first = True
 
@@ -186,3 +189,14 @@ def _warn_scheme_deprecated():
         "instead write:",
         "  toot instance http://unsafehost.com\n"
     ]))
+
+
+def urlencode_url(url):
+    parsed_url = urlparse(url)
+
+    # unencode before encoding, to prevent double-urlencoding
+    encoded_path = quote(unquote(parsed_url.path), safe="-._~()'!*:@,;+&=/")
+    encoded_query = urlencode({k: quote(unquote(v), safe="-._~()'!*:@,;?/") for k, v in parsed_url.params})
+    encoded_url = parsed_url._replace(path=encoded_path, params=encoded_query).geturl()
+
+    return encoded_url