From 0f39b1087f052148f8f83aac6c24e99b24413767 Mon Sep 17 00:00:00 2001
From: Daniel Schwarz <schwarz.dan@gmail.com>
Date: Fri, 22 Sep 2023 21:32:19 -0400
Subject: [PATCH 01/20] Support to display a limited set of HTML tags

HTML tag support is aligned with Mastodon 4.2 supported tags.
This code introduces a soft dependency on the urwidgets library.
If urwidgets is not available, HTML tags are still supported,
but hyperlinks are not underlined using the OCS 8 terminal
feature (on supported terminals).
---
 .flake8                           |   1 +
 toot/tui/app.py                   |  17 --
 toot/tui/constants.py             |  23 ++
 toot/tui/overlays.py              |  21 +-
 toot/tui/poll.py                  |  12 +-
 toot/tui/richtext.py              | 457 ++++++++++++++++++++++++++++++
 toot/tui/stubs/stub_hyperlink.py  |  30 ++
 toot/tui/stubs/stub_text_embed.py |  29 ++
 toot/tui/stubs/urwidgets.py       |   8 +
 toot/tui/timeline.py              |  40 ++-
 toot/tui/urwidgets.py             |   8 +
 toot/tui/utils.py                 |  15 -
 toot/utils/__init__.py            |  12 +
 13 files changed, 619 insertions(+), 54 deletions(-)
 create mode 100644 toot/tui/richtext.py
 create mode 100644 toot/tui/stubs/stub_hyperlink.py
 create mode 100644 toot/tui/stubs/stub_text_embed.py
 create mode 100644 toot/tui/stubs/urwidgets.py
 create mode 100644 toot/tui/urwidgets.py

diff --git a/.flake8 b/.flake8
index 6efbecd..cc916ad 100644
--- a/.flake8
+++ b/.flake8
@@ -1,4 +1,5 @@
 [flake8]
 exclude=build,tests,tmp,venv,toot/tui/scroll.py
 ignore=E128,W503
+per-file-ignores=toot/tui/stubs/urwidgets.py:F401
 max-line-length=120
diff --git a/toot/tui/app.py b/toot/tui/app.py
index 9d78b12..6909d79 100644
--- a/toot/tui/app.py
+++ b/toot/tui/app.py
@@ -143,7 +143,6 @@ class TUI(urwid.Frame):
     def run(self):
         self.loop.set_alarm_in(0, lambda *args: self.async_load_instance())
         self.loop.set_alarm_in(0, lambda *args: self.async_load_followed_accounts())
-        self.loop.set_alarm_in(0, lambda *args: self.async_load_followed_tags())
         self.loop.set_alarm_in(0, lambda *args: self.async_load_timeline(
             is_initial=True, timeline_name="home"))
         self.loop.run()
@@ -339,22 +338,6 @@ class TUI(urwid.Frame):
 
         self.run_in_thread(_load_accounts, done_callback=_done_accounts)
 
-    def async_load_followed_tags(self):
-        def _load_tag_list():
-            try:
-                return api.followed_tags(self.app, self.user)
-            except ApiError:
-                # not supported by all Mastodon servers so fail silently if necessary
-                return []
-
-        def _done_tag_list(tags):
-            if len(tags) > 0:
-                self.followed_tags = [t["name"] for t in tags]
-            else:
-                self.followed_tags = []
-
-        self.run_in_thread(_load_tag_list, done_callback=_done_tag_list)
-
     def refresh_footer(self, timeline):
         """Show status details in footer."""
         status, index, count = timeline.get_focused_status_with_counts()
diff --git a/toot/tui/constants.py b/toot/tui/constants.py
index 91bb3b7..f51ae61 100644
--- a/toot/tui/constants.py
+++ b/toot/tui/constants.py
@@ -57,6 +57,29 @@ PALETTE = [
     ('dim', 'dark gray', ''),
     ('highlight', 'yellow', ''),
     ('success', 'dark green', ''),
+
+    # HTML tag styling
+    ('a', ',italics', '', 'italics'),
+    # em tag is mapped to i
+    ('i', ',italics', '', 'italics'),
+    # strong tag is mapped to b
+    ('b', ',bold', '', 'bold'),
+    # special case for bold + italic nested tags
+    ('bi', ',bold,italics', '', ',bold,italics'),
+    ('u', ',underline', '', ',underline'),
+    ('del', ',strikethrough', '', ',strikethrough'),
+    ('code', 'light gray, standout', '', ',standout'),
+    ('pre', 'light gray, standout', '', ',standout'),
+    ('blockquote', 'light gray', '', ''),
+    ('h1', ',bold', '', ',bold'),
+    ('h2', ',bold', '', ',bold'),
+    ('h3', ',bold', '', ',bold'),
+    ('h4', ',bold', '', ',bold'),
+    ('h5', ',bold', '', ',bold'),
+    ('h6', ',bold', '', ',bold'),
+    ('class_mention_hashtag', 'light cyan', '', ''),
+    ('class_hashtag', 'light cyan', '', ''),
+
 ]
 
 VISIBILITY_OPTIONS = [
diff --git a/toot/tui/overlays.py b/toot/tui/overlays.py
index 75be80c..530921a 100644
--- a/toot/tui/overlays.py
+++ b/toot/tui/overlays.py
@@ -4,10 +4,10 @@ import urwid
 import webbrowser
 
 from toot import __version__
-from toot.utils import format_content
-from .utils import highlight_hashtags, highlight_keys
-from .widgets import Button, EditBox, SelectableText
 from toot import api
+from toot.tui.utils import highlight_keys
+from toot.tui.widgets import Button, EditBox, SelectableText
+from toot.tui.richtext import ContentParser
 
 
 class StatusSource(urwid.Padding):
@@ -255,6 +255,8 @@ class Account(urwid.ListBox):
         super().__init__(walker)
 
     def generate_contents(self, account, relationship=None, last_action=None):
+        parser = ContentParser()
+
         if self.last_action and not self.last_action.startswith("Confirm"):
             yield Button(f"Confirm {self.last_action}", on_press=take_action, user_data=self)
             yield Button("Cancel", on_press=cancel_action, user_data=self)
@@ -279,8 +281,10 @@ class Account(urwid.ListBox):
 
         if account["note"]:
             yield urwid.Divider()
-            for line in format_content(account["note"]):
-                yield urwid.Text(highlight_hashtags(line, followed_tags=set()))
+
+            widgetlist = parser.html_to_widgets(account["note"])
+            for line in widgetlist:
+                yield (line)
 
         yield urwid.Divider()
         yield urwid.Text(["ID: ", ("highlight", f"{account['id']}")])
@@ -312,8 +316,11 @@ class Account(urwid.ListBox):
                 name = field["name"].title()
                 yield urwid.Divider()
                 yield urwid.Text([("bold", f"{name.rstrip(':')}"), ":"])
-                for line in format_content(field["value"]):
-                    yield urwid.Text(highlight_hashtags(line, followed_tags=set()))
+
+                widgetlist = parser.html_to_widgets(field["value"])
+                for line in widgetlist:
+                    yield (line)
+
                 if field["verified_at"]:
                     yield urwid.Text(("success", "✓ Verified"))
 
diff --git a/toot/tui/poll.py b/toot/tui/poll.py
index 0c3ff46..c92cc07 100644
--- a/toot/tui/poll.py
+++ b/toot/tui/poll.py
@@ -2,11 +2,9 @@ import urwid
 
 from toot import api
 from toot.exceptions import ApiError
-from toot.utils import format_content
 from toot.utils.datetime import parse_datetime
-
-from .utils import highlight_hashtags
 from .widgets import Button, CheckBox, RadioButton
+from .richtext import ContentParser
 
 
 class Poll(urwid.ListBox):
@@ -87,8 +85,12 @@ class Poll(urwid.ListBox):
 
     def generate_contents(self, status):
         yield urwid.Divider()
-        for line in format_content(status.data["content"]):
-            yield urwid.Text(highlight_hashtags(line, set()))
+
+        parser = ContentParser()
+        widgetlist = parser.html_to_widgets(status.data["content"])
+
+        for line in widgetlist:
+            yield (line)
 
         yield urwid.Divider()
         yield self.build_linebox(self.generate_poll_detail())
diff --git a/toot/tui/richtext.py b/toot/tui/richtext.py
new file mode 100644
index 0000000..e74dff8
--- /dev/null
+++ b/toot/tui/richtext.py
@@ -0,0 +1,457 @@
+"""
+richtext
+"""
+from typing import List, Tuple
+import re
+import urwid
+import unicodedata
+from .constants import PALETTE
+from bs4 import BeautifulSoup
+from bs4.element import NavigableString, Tag
+from .stubs.urwidgets import TextEmbed, Hyperlink, parse_text, has_urwidgets
+from urwid.util import decompose_tagmarkup
+from toot.utils import urlencode_url
+
+
+class ContentParser:
+    def __init__(self):
+        self.palette_names = []
+        for p in PALETTE:
+            self.palette_names.append(p[0])
+
+        """Parse a limited subset of HTML and create urwid widgets."""
+
+    def html_to_widgets(self, html, recovery_attempt=False) -> List[urwid.Widget]:
+        """Convert html to urwid widgets"""
+        widgets: List[urwid.Widget] = []
+        html = unicodedata.normalize("NFKC", html)
+        soup = BeautifulSoup(html.replace("&apos;", "'"), "html.parser")
+        first_tag = True
+        for e in soup.body or soup:
+            if isinstance(e, NavigableString):
+                if first_tag and not recovery_attempt:
+                    # if our first "tag" is a navigable string
+                    # the HTML is out of spec, doesn't start with a tag,
+                    # we see this in content from Pixelfed servers.
+                    # attempt a fix by wrapping the HTML with <p></p>
+                    return self.html_to_widgets(f"<p>{html}</p>", recovery_attempt=True)
+                else:
+                    continue
+            else:
+                name = e.name
+                # if our HTML starts with a tag, but not a block tag
+                # the HTML is out of spec. Attempt a fix by wrapping the
+                # HTML with <p></p>
+                if (
+                    first_tag
+                    and not recovery_attempt
+                    and name
+                    not in (
+                        "p",
+                        "pre",
+                        "li",
+                        "blockquote",
+                        "h1",
+                        "h2",
+                        "h3",
+                        "h4",
+                        "h5",
+                        "h6",
+                    )  # NOTE: update this list if Mastodon starts supporting more block tags
+                ):
+                    return self.html_to_widgets(f"<p>{html}</p>", recovery_attempt=True)
+
+                # First, look for a custom tag handler method in this class
+                # If that fails, fall back to inline_tag_to_text handler
+                method = getattr(self, "_" + name, self.inline_tag_to_text)
+                markup = method(e)  # either returns a Widget, or plain text
+                first_tag = False
+
+            if not isinstance(markup, urwid.Widget):
+                # plaintext, so create a padded text widget
+                txt = self.text_to_widget("", markup)
+                markup = urwid.Padding(
+                    txt,
+                    align="left",
+                    width=("relative", 100),
+                    min_width=None,
+                )
+            widgets.append(markup)
+            # separate top level widgets with a blank line
+            widgets.append(urwid.Divider(" "))
+        return widgets[:-1]  # but suppress the last blank line
+
+    def inline_tag_to_text(self, tag) -> Tuple:
+        """Convert html tag to plain text with tag as attributes recursively"""
+        markups = self.process_inline_tag_children(tag)
+        if not markups:
+            return (tag.name, "")
+        return (tag.name, markups)
+
+    def process_inline_tag_children(self, tag) -> List:
+        """Recursively retrieve all children
+        and convert to a list of markup text"""
+        markups = []
+        for child in tag.children:
+            if isinstance(child, Tag):
+                method = getattr(self, "_" + child.name, self.inline_tag_to_text)
+                markup = method(child)
+                markups.append(markup)
+            else:
+                markups.append(child)
+        return markups
+
+    def text_to_widget(self, attr, markup) -> urwid.Widget:
+        if not has_urwidgets:
+            return urwid.Text((attr, markup))
+
+        TRANSFORM = {
+            # convert http[s] URLs to Hyperlink widgets for nesting in a TextEmbed widget
+            re.compile(r"(^.+)\x03(.+$)"): lambda g: (
+                len(g[1]),
+                urwid.Filler(Hyperlink(g[2], anchor_attr, g[1])),
+            ),
+        }
+        markup_list = []
+
+        for run in markup:
+            if isinstance(run, tuple):
+                txt, attr_list = decompose_tagmarkup(run)
+                # find anchor titles with an ETX separator followed by href
+                m = re.match(r"(^.+)\x03(.+$)", txt)
+                if m:
+                    anchor_attr = self.get_best_anchor_attr(attr_list)
+                    markup_list.append(
+                        parse_text(
+                            txt,
+                            TRANSFORM,
+                            lambda pattern, groups, span: TRANSFORM[pattern](groups),
+                        )
+                    )
+                else:
+                    markup_list.append(run)
+            else:
+                markup_list.append(run)
+
+        return TextEmbed(markup_list)
+
+    def process_block_tag_children(self, tag) -> List[urwid.Widget]:
+        """Recursively retrieve all children
+        and convert to a list of widgets
+        any inline tags containing text will be
+        converted to Text widgets"""
+
+        pre_widget_markups = []
+        post_widget_markups = []
+        child_widgets = []
+        found_nested_widget = False
+
+        for child in tag.children:
+            if isinstance(child, Tag):
+                # child is a nested tag; process using custom method
+                # or default to inline_tag_to_text
+                method = getattr(self, "_" + child.name, self.inline_tag_to_text)
+                result = method(child)
+                if isinstance(result, urwid.Widget):
+                    found_nested_widget = True
+                    child_widgets.append(result)
+                else:
+                    if not found_nested_widget:
+                        pre_widget_markups.append(result)
+                    else:
+                        post_widget_markups.append(result)
+            else:
+                # child is text; append to the appropriate markup list
+                if not found_nested_widget:
+                    pre_widget_markups.append(child)
+                else:
+                    post_widget_markups.append(child)
+
+        widget_list = []
+        if len(pre_widget_markups):
+            widget_list.append(self.text_to_widget(tag.name, pre_widget_markups))
+
+        if len(child_widgets):
+            widget_list += child_widgets
+
+        if len(post_widget_markups):
+            widget_list.append(self.text_to_widget(tag.name, post_widget_markups))
+
+        return widget_list
+
+    def get_urwid_attr_name(self, tag) -> str:
+        """Get the class name and translate to a
+        name suitable for use as an urwid
+        text attribute name"""
+
+        if "class" in tag.attrs:
+            clss = tag.attrs["class"]
+            if len(clss) > 0:
+                style_name = "class_" + "_".join(clss)
+                # return the class name, only if we
+                # find it as a defined palette name
+                if style_name in self.palette_names:
+                    return style_name
+
+        # fallback to returning the tag name
+        return tag.name
+
+    # Tag handlers start here.
+    # Tags not explicitly listed are "supported" by
+    # rendering as text.
+    # Inline tags return a list of marked up text for urwid.Text
+    # Block tags return urwid.Widget
+
+    def basic_block_tag_handler(self, tag) -> urwid.Widget:
+        """default for block tags that need no special treatment"""
+        return urwid.Pile(self.process_block_tag_children(tag))
+
+    def get_best_anchor_attr(self, attrib_list) -> str:
+        if not attrib_list:
+            return ""
+        flat_al = list(flatten(attrib_list))
+
+        for a in flat_al[0]:
+            # ref: https://docs.joinmastodon.org/spec/activitypub/
+            # these are the class names (translated to attrib names)
+            # that we can support for display
+
+            try:
+                if a[0] in ["class_hashtag", "class_mention_hashtag", "class_mention"]:
+                    return a[0]
+            except KeyError:
+                continue
+
+        return "a"
+
+    def _a(self, tag) -> Tuple:
+        """anchor tag handler"""
+
+        markups = self.process_inline_tag_children(tag)
+        if not markups:
+            return (tag.name, "")
+
+        href = tag.attrs["href"]
+        title, attrib_list = decompose_tagmarkup(markups)
+        if not attrib_list:
+            attrib_list = [tag]
+        if href and has_urwidgets:
+            # only if we have urwidgets loaded for OCS 8 hyperlinks:
+            # urlencode the path and query portions of the URL
+            href = urlencode_url(href)
+            # use ASCII ETX (end of record) as a
+            # delimiter between the title and the HREF
+            title += f"\x03{href}"
+
+        attr = self.get_best_anchor_attr(attrib_list)
+
+        if attr == "a":
+            # didn't find an attribute to use
+            # in the child markup, so let's
+            # try the anchor tag's own attributes
+
+            attr = self.get_urwid_attr_name(tag)
+
+        # hashtag anchors have a class of "mention hashtag"
+        # or "hashtag"
+        # we'll return style "class_mention_hashtag"
+        # or "class_hashtag"
+        # in that case; see corresponding palette entry
+        # in constants.py controlling hashtag highlighting
+
+        return (attr, title)
+
+    def _blockquote(self, tag) -> urwid.Widget:
+        widget_list = self.process_block_tag_children(tag)
+        blockquote_widget = urwid.LineBox(
+            urwid.Padding(
+                urwid.Pile(widget_list),
+                align="left",
+                width=("relative", 100),
+                min_width=None,
+                left=1,
+                right=1,
+            ),
+            tlcorner="",
+            tline="",
+            lline="│",
+            trcorner="",
+            blcorner="",
+            rline="",
+            bline="",
+            brcorner="",
+        )
+        return urwid.Pile([urwid.AttrMap(blockquote_widget, "blockquote")])
+
+    def _br(self, tag) -> Tuple:
+        return ("br", "\n")
+
+    def _em(self, tag) -> Tuple:
+        # to simplify the number of palette entries
+        # translate EM to I (italic)
+        markups = self.process_inline_tag_children(tag)
+        if not markups:
+            return ("i", "")
+
+        # special case processing for bold and italic
+        for parent in tag.parents:
+            if parent.name == "b" or parent.name == "strong":
+                return ("bi", markups)
+
+        return ("i", markups)
+
+    def _ol(self, tag) -> urwid.Widget:
+        """ordered list tag handler"""
+
+        widgets = []
+        list_item_num = 1
+        increment = -1 if tag.has_attr("reversed") else 1
+
+        # get ol start= attribute if present
+        if tag.has_attr("start") and len(tag.attrs["start"]) > 0:
+            try:
+                list_item_num = int(tag.attrs["start"])
+            except ValueError:
+                pass
+
+        for li in tag.find_all("li", recursive=False):
+            method = getattr(self, "_li", self.inline_tag_to_text)
+            markup = method(li)
+
+            # li value= attribute will change the item number
+            # it also overrides any ol start= attribute
+
+            if li.has_attr("value") and len(li.attrs["value"]) > 0:
+                try:
+                    list_item_num = int(li.attrs["value"])
+                except ValueError:
+                    pass
+
+            if not isinstance(markup, urwid.Widget):
+                txt = self.text_to_widget("li", [str(list_item_num), ". ", markup])
+                # 1. foo, 2. bar, etc.
+                widgets.append(txt)
+            else:
+                txt = self.text_to_widget("li", [str(list_item_num), ". "])
+                columns = urwid.Columns(
+                    [txt, ("weight", 9999, markup)], dividechars=1, min_width=3
+                )
+                widgets.append(columns)
+
+            list_item_num += increment
+
+        return urwid.Pile(widgets)
+
+    def _pre(self, tag) -> urwid.Widget:
+        # <PRE> tag spec says that text should not wrap,
+        # but horizontal screen space is at a premium
+        # and we have no horizontal scroll bar, so allow
+        # wrapping.
+
+        widget_list = [urwid.Divider(" ")]
+        widget_list += self.process_block_tag_children(tag)
+
+        pre_widget = urwid.Padding(
+            urwid.Pile(widget_list),
+            align="left",
+            width=("relative", 100),
+            min_width=None,
+            left=1,
+            right=1,
+        )
+        return urwid.Pile([urwid.AttrMap(pre_widget, "pre")])
+
+    def _span(self, tag) -> Tuple:
+        markups = self.process_inline_tag_children(tag)
+
+        if not markups:
+            return (tag.name, "")
+
+        # span inherits its parent's class definition
+        # unless it has a specific class definition
+        # of its own
+
+        if "class" in tag.attrs:
+            # uncomment the following code to hide all HTML marked
+            # invisible (generally, the http:// prefix of URLs)
+            # could be a user preference, it's only advisable if
+            # the terminal supports OCS 8 hyperlinks (and that's not
+            # automatically detectable)
+
+            # if "invisible" in tag.attrs["class"]:
+            #     return (tag.name, "")
+
+            style_name = self.get_urwid_attr_name(tag)
+
+            if style_name != "span":
+                # unique class name matches an entry in our palette
+                return (style_name, markups)
+
+        if tag.parent:
+            return (self.get_urwid_attr_name(tag.parent), markups)
+        else:
+            # fallback
+            return ("span", markups)
+
+    def _strong(self, tag) -> Tuple:
+        # to simplify the number of palette entries
+        # translate STRONG to B (bold)
+        markups = self.process_inline_tag_children(tag)
+        if not markups:
+            return ("b", "")
+
+        # special case processing for bold and italic
+        for parent in tag.parents:
+            if parent.name == "i" or parent.name == "em":
+                return ("bi", markups)
+
+        return ("b", markups)
+
+    def _ul(self, tag) -> urwid.Widget:
+        """unordered list tag handler"""
+
+        widgets = []
+
+        for li in tag.find_all("li", recursive=False):
+            method = getattr(self, "_li", self.inline_tag_to_text)
+            markup = method(li)
+
+            if not isinstance(markup, urwid.Widget):
+                txt = self.text_to_widget("li", ["\N{bullet} ", markup])
+                # * foo, * bar, etc.
+                widgets.append(txt)
+            else:
+                txt = self.text_to_widget("li", ["\N{bullet} "])
+                columns = urwid.Columns(
+                    [txt, ("weight", 9999, markup)], dividechars=1, min_width=3
+                )
+                widgets.append(columns)
+
+        return urwid.Pile(widgets)
+
+    # These tags are handled identically to others
+    # the only difference being the tag name used for
+    # urwid attribute mapping
+
+    _b = _strong
+
+    _div = basic_block_tag_handler
+
+    _i = _em
+
+    _li = basic_block_tag_handler
+
+    # Glitch-soc and Pleroma allow <H1>...<H6> in content
+    # Mastodon (PR #23913) does not; header tags are converted to <P><STRONG></STRONG></P>
+
+    _h1 = _h2 = _h3 = _h4 = _h5 = _h6 = basic_block_tag_handler
+
+    _p = basic_block_tag_handler
+
+
+def flatten(data):
+    if isinstance(data, tuple):
+        for x in data:
+            yield from flatten(x)
+    else:
+        yield data
diff --git a/toot/tui/stubs/stub_hyperlink.py b/toot/tui/stubs/stub_hyperlink.py
new file mode 100644
index 0000000..a2831ba
--- /dev/null
+++ b/toot/tui/stubs/stub_hyperlink.py
@@ -0,0 +1,30 @@
+__all__ = ("Hyperlink",)
+
+import urwid
+
+
+class Hyperlink(urwid.WidgetWrap):
+    def __init__(
+        self,
+        uri,
+        attr,
+        text,
+    ):
+        pass
+
+    def render(self, size, focus):
+        return None
+
+
+class HyperlinkCanvas(urwid.Canvas):
+    def __init__(self, uri: str, text_canv: urwid.TextCanvas):
+        pass
+
+    def cols(self):
+        return 0
+
+    def content(self, *args, **kwargs):
+        yield [None]
+
+    def rows(self):
+        return 0
diff --git a/toot/tui/stubs/stub_text_embed.py b/toot/tui/stubs/stub_text_embed.py
new file mode 100644
index 0000000..bf587d2
--- /dev/null
+++ b/toot/tui/stubs/stub_text_embed.py
@@ -0,0 +1,29 @@
+__all__ = ("parse_text", "TextEmbed")
+
+import urwid
+
+
+class TextEmbed(urwid.Text):
+    def get_text(
+        self,
+    ):
+        return None
+
+    def render(self, size, focus):
+        return None
+
+    def set_text(self, markup):
+        pass
+
+    def set_wrap_mode(self, mode):
+        pass
+
+
+def parse_text(
+    text,
+    patterns,
+    repl,
+    *repl_args,
+    **repl_kwargs,
+):
+    return None
diff --git a/toot/tui/stubs/urwidgets.py b/toot/tui/stubs/urwidgets.py
new file mode 100644
index 0000000..92737d3
--- /dev/null
+++ b/toot/tui/stubs/urwidgets.py
@@ -0,0 +1,8 @@
+# If urwidgets is loaded use it; otherwise use our stubs
+try:
+    from urwidgets import Hyperlink, TextEmbed, parse_text
+    has_urwidgets = True
+except ImportError:
+    from .stub_hyperlink import Hyperlink
+    from .stub_text_embed import TextEmbed, parse_text
+    has_urwidgets = False
diff --git a/toot/tui/timeline.py b/toot/tui/timeline.py
index ddc4a35..1fef40c 100644
--- a/toot/tui/timeline.py
+++ b/toot/tui/timeline.py
@@ -1,18 +1,21 @@
 import logging
+import re
 import urwid
 import webbrowser
 
 from typing import List, Optional
 
 from toot.tui import app
-from toot.utils import format_content
 from toot.utils.datetime import parse_datetime, time_ago
 from toot.utils.language import language_name
 
-from .entities import Status
-from .scroll import Scrollable, ScrollBar
-from .utils import highlight_hashtags, highlight_keys
-from .widgets import SelectableText, SelectableColumns
+from toot.entities import Status
+from toot.tui.scroll import Scrollable, ScrollBar
+from toot.tui.utils import highlight_keys
+from toot.tui.widgets import SelectableText, SelectableColumns
+from toot.tui.richtext import ContentParser
+from toot.utils import urlencode_url
+from toot.tui.stubs.urwidgets import Hyperlink, TextEmbed, parse_text, has_urwidgets
 
 logger = logging.getLogger("toot")
 
@@ -310,7 +313,6 @@ class Timeline(urwid.Columns):
 class StatusDetails(urwid.Pile):
     def __init__(self, timeline: Timeline, status: Optional[Status]):
         self.status = status
-        self.followed_tags = timeline.tui.followed_tags
         self.followed_accounts = timeline.tui.followed_accounts
 
         reblogged_by = status.author if status and status.reblog else None
@@ -318,6 +320,20 @@ class StatusDetails(urwid.Pile):
             if status else ())
         return super().__init__(widget_list)
 
+    def linkify_content(self, text) -> urwid.Widget:
+        if not has_urwidgets:
+            return urwid.Text(("link", text))
+        TRANSFORM = {
+            # convert http[s] URLs to Hyperlink widgets for nesting in a TextEmbed widget
+            re.compile(r'(https?://[^\s]+)'):
+                lambda g: (len(g[1]), urwid.Filler(Hyperlink(urlencode_url(g[1]), "link", g[1]))),
+        }
+        markup_list = []
+
+        markup_list.append(parse_text(text, TRANSFORM,
+            lambda pattern, groups, span: TRANSFORM[pattern](groups)))
+        return TextEmbed(markup_list, align='left')
+
     def content_generator(self, status, reblogged_by):
         if reblogged_by:
             text = "♺ {} boosted".format(reblogged_by.display_name or reblogged_by.username)
@@ -340,8 +356,12 @@ class StatusDetails(urwid.Pile):
             yield ("pack", urwid.Text(("content_warning", "Marked as sensitive. Press S to view.")))
         else:
             content = status.original.translation if status.original.show_translation else status.data["content"]
-            for line in format_content(content):
-                yield ("pack", urwid.Text(highlight_hashtags(line, self.followed_tags)))
+
+            parser = ContentParser()
+            widgetlist = parser.html_to_widgets(content)
+
+            for line in widgetlist:
+                yield (line)
 
             media = status.data["media_attachments"]
             if media:
@@ -350,7 +370,7 @@ class StatusDetails(urwid.Pile):
                     yield ("pack", urwid.Text([("bold", "Media attachment"), " (", m["type"], ")"]))
                     if m["description"]:
                         yield ("pack", urwid.Text(m["description"]))
-                    yield ("pack", urwid.Text(("link", m["url"])))
+                    yield ("pack", self.linkify_content(m["url"]))
 
             poll = status.original.data.get("poll")
             if poll:
@@ -410,7 +430,7 @@ class StatusDetails(urwid.Pile):
         if card["description"]:
             yield urwid.Text(card["description"].strip())
             yield urwid.Text("")
-        yield urwid.Text(("link", card["url"]))
+        yield self.linkify_content(card["url"])
 
     def poll_generator(self, poll):
         for idx, option in enumerate(poll["options"]):
diff --git a/toot/tui/urwidgets.py b/toot/tui/urwidgets.py
new file mode 100644
index 0000000..ee731a8
--- /dev/null
+++ b/toot/tui/urwidgets.py
@@ -0,0 +1,8 @@
+# If urwidgets is loaded use it; otherwise use our stubs
+try:
+    from urwidgets import Hyperlink, TextEmbed, parse_text  # noqa: F401
+    has_urwidgets = True
+except ImportError:
+    from .stub_hyperlink import Hyperlink  # noqa: F401
+    from .stub_text_embed import TextEmbed, parse_text  # noqa: F401
+    has_urwidgets = False
diff --git a/toot/tui/utils.py b/toot/tui/utils.py
index 377522b..0ccff9d 100644
--- a/toot/tui/utils.py
+++ b/toot/tui/utils.py
@@ -35,21 +35,6 @@ def highlight_keys(text, high_attr, low_attr=""):
     return list(_gen())
 
 
-def highlight_hashtags(line, followed_tags, attr="hashtag", followed_attr="hashtag_followed"):
-    hline = []
-
-    for p in re.split(HASHTAG_PATTERN, line):
-        if p.startswith("#"):
-            if p[1:].lower() in (t.lower() for t in followed_tags):
-                hline.append((followed_attr, p))
-            else:
-                hline.append((attr, p))
-        else:
-            hline.append(p)
-
-    return hline
-
-
 def show_media(paths):
     """
     Attempt to open an image viewer to show given media files.
diff --git a/toot/utils/__init__.py b/toot/utils/__init__.py
index e8103ac..43af373 100644
--- a/toot/utils/__init__.py
+++ b/toot/utils/__init__.py
@@ -10,6 +10,7 @@ from bs4 import BeautifulSoup
 from typing import Dict
 
 from toot.exceptions import ConsoleError
+from urllib.parse import urlparse, urlencode, quote, unquote
 
 
 def str_bool(b):
@@ -186,3 +187,14 @@ def _warn_scheme_deprecated():
         "instead write:",
         "  toot instance http://unsafehost.com\n"
     ]))
+
+
+def urlencode_url(url):
+    parsed_url = urlparse(url)
+
+    # unencode before encoding, to prevent double-urlencoding
+    encoded_path = quote(unquote(parsed_url.path), safe="-._~()'!*:@,;+&=/")
+    encoded_query = urlencode({k: quote(unquote(v), safe="-._~()'!*:@,;?/") for k, v in parsed_url.params})
+    encoded_url = parsed_url._replace(path=encoded_path, params=encoded_query).geturl()
+
+    return encoded_url

From 89e905cd8b271b394b63483d70672e5a7cda0098 Mon Sep 17 00:00:00 2001
From: Daniel Schwarz <schwarz.dan@gmail.com>
Date: Thu, 28 Sep 2023 14:22:59 -0400
Subject: [PATCH 02/20] added urwidgets as an optional depenency for
 'hyperlinks' extra

---
 requirements.txt | 2 +-
 setup.py         | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 3616ac3..54b69dd 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,4 +2,4 @@ requests>=2.13,<3.0
 beautifulsoup4>=4.5.0,<5.0
 wcwidth>=0.1.7
 urwid>=2.0.0,<3.0
-
+urwidgets=>=0.1,<0.2
diff --git a/setup.py b/setup.py
index d893764..0739408 100644
--- a/setup.py
+++ b/setup.py
@@ -40,6 +40,9 @@ setup(
         "urwid>=2.0.0,<3.0",
         "tomlkit>=0.10.0,<1.0"
     ],
+    extras_require={
+        "hyperlinks": ['urwidgets>=0.1,<0.2'],
+    },
     entry_points={
         'console_scripts': [
             'toot=toot.console:main',

From 06167a5bc9185c67ac6dcafced799d8f460b64d0 Mon Sep 17 00:00:00 2001
From: Daniel Schwarz <schwarz.dan@gmail.com>
Date: Fri, 29 Sep 2023 07:18:59 -0400
Subject: [PATCH 03/20] typo fix in requirements.txt

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 54b69dd..8579f41 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,4 +2,4 @@ requests>=2.13,<3.0
 beautifulsoup4>=4.5.0,<5.0
 wcwidth>=0.1.7
 urwid>=2.0.0,<3.0
-urwidgets=>=0.1,<0.2
+urwidgets>=0.1,<0.2

From a9ef96c31b50654b63655f19a05f88d25b2fb14e Mon Sep 17 00:00:00 2001
From: Ivan Habunek <ivan@habunek.com>
Date: Sat, 4 Nov 2023 07:26:45 +0100
Subject: [PATCH 04/20] Cleanup formatting

---
 toot/tui/richtext.py              | 12 +++++-------
 toot/tui/stubs/stub_hyperlink.py  |  7 +------
 toot/tui/stubs/stub_text_embed.py | 12 ++----------
 3 files changed, 8 insertions(+), 23 deletions(-)

diff --git a/toot/tui/richtext.py b/toot/tui/richtext.py
index e74dff8..b1a4c68 100644
--- a/toot/tui/richtext.py
+++ b/toot/tui/richtext.py
@@ -1,16 +1,14 @@
-"""
-richtext
-"""
-from typing import List, Tuple
 import re
 import urwid
 import unicodedata
-from .constants import PALETTE
+
 from bs4 import BeautifulSoup
 from bs4.element import NavigableString, Tag
-from .stubs.urwidgets import TextEmbed, Hyperlink, parse_text, has_urwidgets
-from urwid.util import decompose_tagmarkup
+from toot.tui.constants import PALETTE
+from toot.tui.stubs.urwidgets import TextEmbed, Hyperlink, parse_text, has_urwidgets
 from toot.utils import urlencode_url
+from typing import List, Tuple
+from urwid.util import decompose_tagmarkup
 
 
 class ContentParser:
diff --git a/toot/tui/stubs/stub_hyperlink.py b/toot/tui/stubs/stub_hyperlink.py
index a2831ba..aa0488d 100644
--- a/toot/tui/stubs/stub_hyperlink.py
+++ b/toot/tui/stubs/stub_hyperlink.py
@@ -4,12 +4,7 @@ import urwid
 
 
 class Hyperlink(urwid.WidgetWrap):
-    def __init__(
-        self,
-        uri,
-        attr,
-        text,
-    ):
+    def __init__(self, uri, attr, text):
         pass
 
     def render(self, size, focus):
diff --git a/toot/tui/stubs/stub_text_embed.py b/toot/tui/stubs/stub_text_embed.py
index bf587d2..622b5f7 100644
--- a/toot/tui/stubs/stub_text_embed.py
+++ b/toot/tui/stubs/stub_text_embed.py
@@ -4,9 +4,7 @@ import urwid
 
 
 class TextEmbed(urwid.Text):
-    def get_text(
-        self,
-    ):
+    def get_text(self):
         return None
 
     def render(self, size, focus):
@@ -19,11 +17,5 @@ class TextEmbed(urwid.Text):
         pass
 
 
-def parse_text(
-    text,
-    patterns,
-    repl,
-    *repl_args,
-    **repl_kwargs,
-):
+def parse_text(text, patterns, repl, *repl_args, **repl_kwargs):
     return None

From d91c73520e4d662c00ad8ab191aa4a0bcd9d5a1e Mon Sep 17 00:00:00 2001
From: Ivan Habunek <ivan@habunek.com>
Date: Sat, 4 Nov 2023 07:38:47 +0100
Subject: [PATCH 05/20] Better function name

---
 toot/output.py         | 4 ++--
 toot/utils/__init__.py | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/toot/output.py b/toot/output.py
index 6fd59a2..83898f8 100644
--- a/toot/output.py
+++ b/toot/output.py
@@ -6,7 +6,7 @@ import textwrap
 from functools import lru_cache
 from toot import settings
 from toot.entities import Instance, Notification, Poll, Status
-from toot.utils import get_text, parse_html
+from toot.utils import get_text, html_to_paragraphs
 from toot.wcstring import wc_wrap
 from typing import List
 from wcwidth import wcswidth
@@ -321,7 +321,7 @@ def print_status(status: Status, width: int = 80):
 
 def print_html(text, width=80):
     first = True
-    for paragraph in parse_html(text):
+    for paragraph in html_to_paragraphs(text):
         if not first:
             print_out("")
         for line in paragraph:
diff --git a/toot/utils/__init__.py b/toot/utils/__init__.py
index 43af373..f0fda9e 100644
--- a/toot/utils/__init__.py
+++ b/toot/utils/__init__.py
@@ -36,7 +36,7 @@ def get_text(html):
     return unicodedata.normalize('NFKC', text)
 
 
-def parse_html(html):
+def html_to_paragraphs(html):
     """Attempt to convert html to plain text while keeping line breaks.
     Returns a list of paragraphs, each being a list of lines.
     """
@@ -55,7 +55,7 @@ def format_content(content):
     Returns a generator yielding lines of content.
     """
 
-    paragraphs = parse_html(content)
+    paragraphs = html_to_paragraphs(content)
 
     first = True
 

From 199a96625b3ca60f2f6f8e16495c79b49a0c23f9 Mon Sep 17 00:00:00 2001
From: Ivan Habunek <ivan@habunek.com>
Date: Sat, 4 Nov 2023 07:40:56 +0100
Subject: [PATCH 06/20] Extract parsing html

---
 toot/tui/richtext.py   |  5 ++---
 toot/utils/__init__.py | 12 +++++++-----
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/toot/tui/richtext.py b/toot/tui/richtext.py
index b1a4c68..66c1f27 100644
--- a/toot/tui/richtext.py
+++ b/toot/tui/richtext.py
@@ -2,11 +2,10 @@ import re
 import urwid
 import unicodedata
 
-from bs4 import BeautifulSoup
 from bs4.element import NavigableString, Tag
 from toot.tui.constants import PALETTE
 from toot.tui.stubs.urwidgets import TextEmbed, Hyperlink, parse_text, has_urwidgets
-from toot.utils import urlencode_url
+from toot.utils import parse_html, urlencode_url
 from typing import List, Tuple
 from urwid.util import decompose_tagmarkup
 
@@ -23,7 +22,7 @@ class ContentParser:
         """Convert html to urwid widgets"""
         widgets: List[urwid.Widget] = []
         html = unicodedata.normalize("NFKC", html)
-        soup = BeautifulSoup(html.replace("&apos;", "'"), "html.parser")
+        soup = parse_html(html)
         first_tag = True
         for e in soup.body or soup:
             if isinstance(e, NavigableString):
diff --git a/toot/utils/__init__.py b/toot/utils/__init__.py
index f0fda9e..c4afa7f 100644
--- a/toot/utils/__init__.py
+++ b/toot/utils/__init__.py
@@ -23,17 +23,19 @@ def str_bool_nullable(b):
     return None if b is None else str_bool(b)
 
 
-def get_text(html):
-    """Converts html to text, strips all tags."""
-
+def parse_html(html: str) -> BeautifulSoup:
     # Ignore warnings made by BeautifulSoup, if passed something that looks like
     # a file (e.g. a dot which matches current dict), it will warn that the file
     # should be opened instead of passing a filename.
     with warnings.catch_warnings():
         warnings.simplefilter("ignore")
-        text = BeautifulSoup(html.replace('&apos;', "'"), "html.parser").get_text()
+        return BeautifulSoup(html.replace("&apos;", "'"), "html.parser")
 
-    return unicodedata.normalize('NFKC', text)
+
+def get_text(html):
+    """Converts html to text, strips all tags."""
+    text = parse_html(html).get_text()
+    return unicodedata.normalize("NFKC", text)
 
 
 def html_to_paragraphs(html):

From a8b4c7971682d81164e2de8cb4c9c83fd208ba3e Mon Sep 17 00:00:00 2001
From: Ivan Habunek <ivan@habunek.com>
Date: Mon, 6 Nov 2023 09:36:30 +0100
Subject: [PATCH 07/20] Eliminate constructor

---
 toot/tui/richtext.py | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/toot/tui/richtext.py b/toot/tui/richtext.py
index 66c1f27..e46de6b 100644
--- a/toot/tui/richtext.py
+++ b/toot/tui/richtext.py
@@ -10,13 +10,11 @@ from typing import List, Tuple
 from urwid.util import decompose_tagmarkup
 
 
-class ContentParser:
-    def __init__(self):
-        self.palette_names = []
-        for p in PALETTE:
-            self.palette_names.append(p[0])
+STYLE_NAMES = [p[0] for p in PALETTE]
 
-        """Parse a limited subset of HTML and create urwid widgets."""
+
+class ContentParser:
+    """Parse a limited subset of HTML and create urwid widgets."""
 
     def html_to_widgets(self, html, recovery_attempt=False) -> List[urwid.Widget]:
         """Convert html to urwid widgets"""
@@ -187,7 +185,7 @@ class ContentParser:
                 style_name = "class_" + "_".join(clss)
                 # return the class name, only if we
                 # find it as a defined palette name
-                if style_name in self.palette_names:
+                if style_name in STYLE_NAMES:
                     return style_name
 
         # fallback to returning the tag name

From 2aba3f93f990194bc82bb6ded8b710293e14b107 Mon Sep 17 00:00:00 2001
From: Ivan Habunek <ivan@habunek.com>
Date: Mon, 6 Nov 2023 09:56:12 +0100
Subject: [PATCH 08/20] Extract block tags

---
 toot/tui/richtext.py | 22 +++++-----------------
 1 file changed, 5 insertions(+), 17 deletions(-)

diff --git a/toot/tui/richtext.py b/toot/tui/richtext.py
index e46de6b..f47cbde 100644
--- a/toot/tui/richtext.py
+++ b/toot/tui/richtext.py
@@ -12,6 +12,9 @@ from urwid.util import decompose_tagmarkup
 
 STYLE_NAMES = [p[0] for p in PALETTE]
 
+# NOTE: update this list if Mastodon starts supporting more block tags
+BLOCK_TAGS = ["p", "pre", "li", "blockquote", "h1", "h2", "h3", "h4", "h5", "h6"]
+
 
 class ContentParser:
     """Parse a limited subset of HTML and create urwid widgets."""
@@ -21,6 +24,7 @@ class ContentParser:
         widgets: List[urwid.Widget] = []
         html = unicodedata.normalize("NFKC", html)
         soup = parse_html(html)
+
         first_tag = True
         for e in soup.body or soup:
             if isinstance(e, NavigableString):
@@ -37,23 +41,7 @@ class ContentParser:
                 # if our HTML starts with a tag, but not a block tag
                 # the HTML is out of spec. Attempt a fix by wrapping the
                 # HTML with <p></p>
-                if (
-                    first_tag
-                    and not recovery_attempt
-                    and name
-                    not in (
-                        "p",
-                        "pre",
-                        "li",
-                        "blockquote",
-                        "h1",
-                        "h2",
-                        "h3",
-                        "h4",
-                        "h5",
-                        "h6",
-                    )  # NOTE: update this list if Mastodon starts supporting more block tags
-                ):
+                if (first_tag and not recovery_attempt and name not in BLOCK_TAGS):
                     return self.html_to_widgets(f"<p>{html}</p>", recovery_attempt=True)
 
                 # First, look for a custom tag handler method in this class

From ce6faccb991f163bff628c1eb9bd6ec65d7d9f06 Mon Sep 17 00:00:00 2001
From: Ivan Habunek <ivan@habunek.com>
Date: Mon, 6 Nov 2023 17:43:02 +0100
Subject: [PATCH 09/20] Extract render method

---
 toot/tui/richtext.py | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/toot/tui/richtext.py b/toot/tui/richtext.py
index f47cbde..22e4b2b 100644
--- a/toot/tui/richtext.py
+++ b/toot/tui/richtext.py
@@ -44,10 +44,7 @@ class ContentParser:
                 if (first_tag and not recovery_attempt and name not in BLOCK_TAGS):
                     return self.html_to_widgets(f"<p>{html}</p>", recovery_attempt=True)
 
-                # First, look for a custom tag handler method in this class
-                # If that fails, fall back to inline_tag_to_text handler
-                method = getattr(self, "_" + name, self.inline_tag_to_text)
-                markup = method(e)  # either returns a Widget, or plain text
+                markup = self.render(name, e)
                 first_tag = False
 
             if not isinstance(markup, urwid.Widget):
@@ -77,8 +74,7 @@ class ContentParser:
         markups = []
         for child in tag.children:
             if isinstance(child, Tag):
-                method = getattr(self, "_" + child.name, self.inline_tag_to_text)
-                markup = method(child)
+                markup = self.render(child.name, child)
                 markups.append(markup)
             else:
                 markups.append(child)
@@ -133,8 +129,7 @@ class ContentParser:
             if isinstance(child, Tag):
                 # child is a nested tag; process using custom method
                 # or default to inline_tag_to_text
-                method = getattr(self, "_" + child.name, self.inline_tag_to_text)
-                result = method(child)
+                result = self.render(child.name, child)
                 if isinstance(result, urwid.Widget):
                     found_nested_widget = True
                     child_widgets.append(result)
@@ -207,6 +202,12 @@ class ContentParser:
 
         return "a"
 
+    def render(self, attr, content):
+        # First, look for a custom tag handler method in this class
+        # If that fails, fall back to inline_tag_to_text handler
+        method = getattr(self, f"_{attr}", self.inline_tag_to_text)
+        return method(content)
+
     def _a(self, tag) -> Tuple:
         """anchor tag handler"""
 
@@ -298,8 +299,7 @@ class ContentParser:
                 pass
 
         for li in tag.find_all("li", recursive=False):
-            method = getattr(self, "_li", self.inline_tag_to_text)
-            markup = method(li)
+            markup = self.render("li", li)
 
             # li value= attribute will change the item number
             # it also overrides any ol start= attribute
@@ -396,8 +396,7 @@ class ContentParser:
         widgets = []
 
         for li in tag.find_all("li", recursive=False):
-            method = getattr(self, "_li", self.inline_tag_to_text)
-            markup = method(li)
+            markup = self.render("li", li)
 
             if not isinstance(markup, urwid.Widget):
                 txt = self.text_to_widget("li", ["\N{bullet} ", markup])

From a5444533381cd7dace785f2fbb4cd849d2447113 Mon Sep 17 00:00:00 2001
From: Ivan Habunek <ivan@habunek.com>
Date: Mon, 6 Nov 2023 17:51:03 +0100
Subject: [PATCH 10/20] Remove magic lookup

Having the choice explicit makes the code easier to read.
---
 toot/tui/richtext.py | 77 +++++++++++++++++++++++++-------------------
 1 file changed, 44 insertions(+), 33 deletions(-)

diff --git a/toot/tui/richtext.py b/toot/tui/richtext.py
index 22e4b2b..f1829de 100644
--- a/toot/tui/richtext.py
+++ b/toot/tui/richtext.py
@@ -202,13 +202,43 @@ class ContentParser:
 
         return "a"
 
-    def render(self, attr, content):
-        # First, look for a custom tag handler method in this class
-        # If that fails, fall back to inline_tag_to_text handler
-        method = getattr(self, f"_{attr}", self.inline_tag_to_text)
-        return method(content)
+    def render(self, attr: str, content: str):
+        if attr in ["a"]:
+            return self.render_anchor(content)
 
-    def _a(self, tag) -> Tuple:
+        if attr in ["blockquote"]:
+            return self.render_blockquote(content)
+
+        if attr in ["br"]:
+            return self.render_br(content)
+
+        if attr in ["em"]:
+            return self.render_em(content)
+
+        if attr in ["ol"]:
+            return self.render_ol(content)
+
+        if attr in ["pre"]:
+            return self.render_pre(content)
+
+        if attr in ["span"]:
+            return self.render_span(content)
+
+        if attr in ["b", "strong"]:
+            return self.render_strong(content)
+
+        if attr in ["ul"]:
+            return self.render_ul(content)
+
+        # Glitch-soc and Pleroma allow <H1>...<H6> in content
+        # Mastodon (PR #23913) does not; header tags are converted to <P><STRONG></STRONG></P>
+        if attr in ["p", "div", "li", "h1", "h2", "h3", "h4", "h5", "h6"]:
+            return self.basic_block_tag_handler(content)
+
+        # Fall back to inline_tag_to_text handler
+        return self.inline_tag_to_text(content)
+
+    def render_anchor(self, tag) -> Tuple:
         """anchor tag handler"""
 
         markups = self.process_inline_tag_children(tag)
@@ -245,7 +275,7 @@ class ContentParser:
 
         return (attr, title)
 
-    def _blockquote(self, tag) -> urwid.Widget:
+    def render_blockquote(self, tag) -> urwid.Widget:
         widget_list = self.process_block_tag_children(tag)
         blockquote_widget = urwid.LineBox(
             urwid.Padding(
@@ -267,10 +297,10 @@ class ContentParser:
         )
         return urwid.Pile([urwid.AttrMap(blockquote_widget, "blockquote")])
 
-    def _br(self, tag) -> Tuple:
+    def render_br(self, tag) -> Tuple:
         return ("br", "\n")
 
-    def _em(self, tag) -> Tuple:
+    def render_em(self, tag) -> Tuple:
         # to simplify the number of palette entries
         # translate EM to I (italic)
         markups = self.process_inline_tag_children(tag)
@@ -284,7 +314,7 @@ class ContentParser:
 
         return ("i", markups)
 
-    def _ol(self, tag) -> urwid.Widget:
+    def render_ol(self, tag) -> urwid.Widget:
         """ordered list tag handler"""
 
         widgets = []
@@ -325,7 +355,7 @@ class ContentParser:
 
         return urwid.Pile(widgets)
 
-    def _pre(self, tag) -> urwid.Widget:
+    def render_pre(self, tag) -> urwid.Widget:
         # <PRE> tag spec says that text should not wrap,
         # but horizontal screen space is at a premium
         # and we have no horizontal scroll bar, so allow
@@ -344,7 +374,7 @@ class ContentParser:
         )
         return urwid.Pile([urwid.AttrMap(pre_widget, "pre")])
 
-    def _span(self, tag) -> Tuple:
+    def render_span(self, tag) -> Tuple:
         markups = self.process_inline_tag_children(tag)
 
         if not markups:
@@ -376,7 +406,7 @@ class ContentParser:
             # fallback
             return ("span", markups)
 
-    def _strong(self, tag) -> Tuple:
+    def render_strong(self, tag) -> Tuple:
         # to simplify the number of palette entries
         # translate STRONG to B (bold)
         markups = self.process_inline_tag_children(tag)
@@ -390,7 +420,7 @@ class ContentParser:
 
         return ("b", markups)
 
-    def _ul(self, tag) -> urwid.Widget:
+    def render_ul(self, tag) -> urwid.Widget:
         """unordered list tag handler"""
 
         widgets = []
@@ -411,25 +441,6 @@ class ContentParser:
 
         return urwid.Pile(widgets)
 
-    # These tags are handled identically to others
-    # the only difference being the tag name used for
-    # urwid attribute mapping
-
-    _b = _strong
-
-    _div = basic_block_tag_handler
-
-    _i = _em
-
-    _li = basic_block_tag_handler
-
-    # Glitch-soc and Pleroma allow <H1>...<H6> in content
-    # Mastodon (PR #23913) does not; header tags are converted to <P><STRONG></STRONG></P>
-
-    _h1 = _h2 = _h3 = _h4 = _h5 = _h6 = basic_block_tag_handler
-
-    _p = basic_block_tag_handler
-
 
 def flatten(data):
     if isinstance(data, tuple):

From 073dd3025cf2381dfb85b561914a2cd2d539b8ff Mon Sep 17 00:00:00 2001
From: Ivan Habunek <ivan@habunek.com>
Date: Mon, 6 Nov 2023 18:14:21 +0100
Subject: [PATCH 11/20] Remove the ContentParser class, use functions instead

It did not help, just added to the indent.
---
 toot/tui/overlays.py |   8 +-
 toot/tui/poll.py     |   5 +-
 toot/tui/richtext.py | 766 ++++++++++++++++++++++---------------------
 toot/tui/timeline.py |   6 +-
 4 files changed, 394 insertions(+), 391 deletions(-)

diff --git a/toot/tui/overlays.py b/toot/tui/overlays.py
index 530921a..58eb457 100644
--- a/toot/tui/overlays.py
+++ b/toot/tui/overlays.py
@@ -7,7 +7,7 @@ from toot import __version__
 from toot import api
 from toot.tui.utils import highlight_keys
 from toot.tui.widgets import Button, EditBox, SelectableText
-from toot.tui.richtext import ContentParser
+from toot.tui.richtext import html_to_widgets
 
 
 class StatusSource(urwid.Padding):
@@ -255,8 +255,6 @@ class Account(urwid.ListBox):
         super().__init__(walker)
 
     def generate_contents(self, account, relationship=None, last_action=None):
-        parser = ContentParser()
-
         if self.last_action and not self.last_action.startswith("Confirm"):
             yield Button(f"Confirm {self.last_action}", on_press=take_action, user_data=self)
             yield Button("Cancel", on_press=cancel_action, user_data=self)
@@ -282,7 +280,7 @@ class Account(urwid.ListBox):
         if account["note"]:
             yield urwid.Divider()
 
-            widgetlist = parser.html_to_widgets(account["note"])
+            widgetlist = html_to_widgets(account["note"])
             for line in widgetlist:
                 yield (line)
 
@@ -317,7 +315,7 @@ class Account(urwid.ListBox):
                 yield urwid.Divider()
                 yield urwid.Text([("bold", f"{name.rstrip(':')}"), ":"])
 
-                widgetlist = parser.html_to_widgets(field["value"])
+                widgetlist = html_to_widgets(field["value"])
                 for line in widgetlist:
                     yield (line)
 
diff --git a/toot/tui/poll.py b/toot/tui/poll.py
index c92cc07..e738fc7 100644
--- a/toot/tui/poll.py
+++ b/toot/tui/poll.py
@@ -4,7 +4,7 @@ from toot import api
 from toot.exceptions import ApiError
 from toot.utils.datetime import parse_datetime
 from .widgets import Button, CheckBox, RadioButton
-from .richtext import ContentParser
+from .richtext import html_to_widgets
 
 
 class Poll(urwid.ListBox):
@@ -86,8 +86,7 @@ class Poll(urwid.ListBox):
     def generate_contents(self, status):
         yield urwid.Divider()
 
-        parser = ContentParser()
-        widgetlist = parser.html_to_widgets(status.data["content"])
+        widgetlist = html_to_widgets(status.data["content"])
 
         for line in widgetlist:
             yield (line)
diff --git a/toot/tui/richtext.py b/toot/tui/richtext.py
index f1829de..b4e5b03 100644
--- a/toot/tui/richtext.py
+++ b/toot/tui/richtext.py
@@ -16,430 +16,438 @@ STYLE_NAMES = [p[0] for p in PALETTE]
 BLOCK_TAGS = ["p", "pre", "li", "blockquote", "h1", "h2", "h3", "h4", "h5", "h6"]
 
 
-class ContentParser:
-    """Parse a limited subset of HTML and create urwid widgets."""
+def html_to_widgets(html, recovery_attempt=False) -> List[urwid.Widget]:
+    """Convert html to urwid widgets"""
+    widgets: List[urwid.Widget] = []
+    html = unicodedata.normalize("NFKC", html)
+    soup = parse_html(html)
 
-    def html_to_widgets(self, html, recovery_attempt=False) -> List[urwid.Widget]:
-        """Convert html to urwid widgets"""
-        widgets: List[urwid.Widget] = []
-        html = unicodedata.normalize("NFKC", html)
-        soup = parse_html(html)
-
-        first_tag = True
-        for e in soup.body or soup:
-            if isinstance(e, NavigableString):
-                if first_tag and not recovery_attempt:
-                    # if our first "tag" is a navigable string
-                    # the HTML is out of spec, doesn't start with a tag,
-                    # we see this in content from Pixelfed servers.
-                    # attempt a fix by wrapping the HTML with <p></p>
-                    return self.html_to_widgets(f"<p>{html}</p>", recovery_attempt=True)
-                else:
-                    continue
+    first_tag = True
+    for e in soup.body or soup:
+        if isinstance(e, NavigableString):
+            if first_tag and not recovery_attempt:
+                # if our first "tag" is a navigable string
+                # the HTML is out of spec, doesn't start with a tag,
+                # we see this in content from Pixelfed servers.
+                # attempt a fix by wrapping the HTML with <p></p>
+                return html_to_widgets(f"<p>{html}</p>", recovery_attempt=True)
             else:
-                name = e.name
-                # if our HTML starts with a tag, but not a block tag
-                # the HTML is out of spec. Attempt a fix by wrapping the
-                # HTML with <p></p>
-                if (first_tag and not recovery_attempt and name not in BLOCK_TAGS):
-                    return self.html_to_widgets(f"<p>{html}</p>", recovery_attempt=True)
-
-                markup = self.render(name, e)
-                first_tag = False
-
-            if not isinstance(markup, urwid.Widget):
-                # plaintext, so create a padded text widget
-                txt = self.text_to_widget("", markup)
-                markup = urwid.Padding(
-                    txt,
-                    align="left",
-                    width=("relative", 100),
-                    min_width=None,
-                )
-            widgets.append(markup)
-            # separate top level widgets with a blank line
-            widgets.append(urwid.Divider(" "))
-        return widgets[:-1]  # but suppress the last blank line
-
-    def inline_tag_to_text(self, tag) -> Tuple:
-        """Convert html tag to plain text with tag as attributes recursively"""
-        markups = self.process_inline_tag_children(tag)
-        if not markups:
-            return (tag.name, "")
-        return (tag.name, markups)
-
-    def process_inline_tag_children(self, tag) -> List:
-        """Recursively retrieve all children
-        and convert to a list of markup text"""
-        markups = []
-        for child in tag.children:
-            if isinstance(child, Tag):
-                markup = self.render(child.name, child)
-                markups.append(markup)
-            else:
-                markups.append(child)
-        return markups
-
-    def text_to_widget(self, attr, markup) -> urwid.Widget:
-        if not has_urwidgets:
-            return urwid.Text((attr, markup))
-
-        TRANSFORM = {
-            # convert http[s] URLs to Hyperlink widgets for nesting in a TextEmbed widget
-            re.compile(r"(^.+)\x03(.+$)"): lambda g: (
-                len(g[1]),
-                urwid.Filler(Hyperlink(g[2], anchor_attr, g[1])),
-            ),
-        }
-        markup_list = []
-
-        for run in markup:
-            if isinstance(run, tuple):
-                txt, attr_list = decompose_tagmarkup(run)
-                # find anchor titles with an ETX separator followed by href
-                m = re.match(r"(^.+)\x03(.+$)", txt)
-                if m:
-                    anchor_attr = self.get_best_anchor_attr(attr_list)
-                    markup_list.append(
-                        parse_text(
-                            txt,
-                            TRANSFORM,
-                            lambda pattern, groups, span: TRANSFORM[pattern](groups),
-                        )
-                    )
-                else:
-                    markup_list.append(run)
-            else:
-                markup_list.append(run)
-
-        return TextEmbed(markup_list)
-
-    def process_block_tag_children(self, tag) -> List[urwid.Widget]:
-        """Recursively retrieve all children
-        and convert to a list of widgets
-        any inline tags containing text will be
-        converted to Text widgets"""
-
-        pre_widget_markups = []
-        post_widget_markups = []
-        child_widgets = []
-        found_nested_widget = False
-
-        for child in tag.children:
-            if isinstance(child, Tag):
-                # child is a nested tag; process using custom method
-                # or default to inline_tag_to_text
-                result = self.render(child.name, child)
-                if isinstance(result, urwid.Widget):
-                    found_nested_widget = True
-                    child_widgets.append(result)
-                else:
-                    if not found_nested_widget:
-                        pre_widget_markups.append(result)
-                    else:
-                        post_widget_markups.append(result)
-            else:
-                # child is text; append to the appropriate markup list
-                if not found_nested_widget:
-                    pre_widget_markups.append(child)
-                else:
-                    post_widget_markups.append(child)
-
-        widget_list = []
-        if len(pre_widget_markups):
-            widget_list.append(self.text_to_widget(tag.name, pre_widget_markups))
-
-        if len(child_widgets):
-            widget_list += child_widgets
-
-        if len(post_widget_markups):
-            widget_list.append(self.text_to_widget(tag.name, post_widget_markups))
-
-        return widget_list
-
-    def get_urwid_attr_name(self, tag) -> str:
-        """Get the class name and translate to a
-        name suitable for use as an urwid
-        text attribute name"""
-
-        if "class" in tag.attrs:
-            clss = tag.attrs["class"]
-            if len(clss) > 0:
-                style_name = "class_" + "_".join(clss)
-                # return the class name, only if we
-                # find it as a defined palette name
-                if style_name in STYLE_NAMES:
-                    return style_name
-
-        # fallback to returning the tag name
-        return tag.name
-
-    # Tag handlers start here.
-    # Tags not explicitly listed are "supported" by
-    # rendering as text.
-    # Inline tags return a list of marked up text for urwid.Text
-    # Block tags return urwid.Widget
-
-    def basic_block_tag_handler(self, tag) -> urwid.Widget:
-        """default for block tags that need no special treatment"""
-        return urwid.Pile(self.process_block_tag_children(tag))
-
-    def get_best_anchor_attr(self, attrib_list) -> str:
-        if not attrib_list:
-            return ""
-        flat_al = list(flatten(attrib_list))
-
-        for a in flat_al[0]:
-            # ref: https://docs.joinmastodon.org/spec/activitypub/
-            # these are the class names (translated to attrib names)
-            # that we can support for display
-
-            try:
-                if a[0] in ["class_hashtag", "class_mention_hashtag", "class_mention"]:
-                    return a[0]
-            except KeyError:
                 continue
+        else:
+            name = e.name
+            # if our HTML starts with a tag, but not a block tag
+            # the HTML is out of spec. Attempt a fix by wrapping the
+            # HTML with <p></p>
+            if (first_tag and not recovery_attempt and name not in BLOCK_TAGS):
+                return html_to_widgets(f"<p>{html}</p>", recovery_attempt=True)
 
-        return "a"
+            markup = render(name, e)
+            first_tag = False
 
-    def render(self, attr: str, content: str):
-        if attr in ["a"]:
-            return self.render_anchor(content)
-
-        if attr in ["blockquote"]:
-            return self.render_blockquote(content)
-
-        if attr in ["br"]:
-            return self.render_br(content)
-
-        if attr in ["em"]:
-            return self.render_em(content)
-
-        if attr in ["ol"]:
-            return self.render_ol(content)
-
-        if attr in ["pre"]:
-            return self.render_pre(content)
-
-        if attr in ["span"]:
-            return self.render_span(content)
-
-        if attr in ["b", "strong"]:
-            return self.render_strong(content)
-
-        if attr in ["ul"]:
-            return self.render_ul(content)
-
-        # Glitch-soc and Pleroma allow <H1>...<H6> in content
-        # Mastodon (PR #23913) does not; header tags are converted to <P><STRONG></STRONG></P>
-        if attr in ["p", "div", "li", "h1", "h2", "h3", "h4", "h5", "h6"]:
-            return self.basic_block_tag_handler(content)
-
-        # Fall back to inline_tag_to_text handler
-        return self.inline_tag_to_text(content)
-
-    def render_anchor(self, tag) -> Tuple:
-        """anchor tag handler"""
-
-        markups = self.process_inline_tag_children(tag)
-        if not markups:
-            return (tag.name, "")
-
-        href = tag.attrs["href"]
-        title, attrib_list = decompose_tagmarkup(markups)
-        if not attrib_list:
-            attrib_list = [tag]
-        if href and has_urwidgets:
-            # only if we have urwidgets loaded for OCS 8 hyperlinks:
-            # urlencode the path and query portions of the URL
-            href = urlencode_url(href)
-            # use ASCII ETX (end of record) as a
-            # delimiter between the title and the HREF
-            title += f"\x03{href}"
-
-        attr = self.get_best_anchor_attr(attrib_list)
-
-        if attr == "a":
-            # didn't find an attribute to use
-            # in the child markup, so let's
-            # try the anchor tag's own attributes
-
-            attr = self.get_urwid_attr_name(tag)
-
-        # hashtag anchors have a class of "mention hashtag"
-        # or "hashtag"
-        # we'll return style "class_mention_hashtag"
-        # or "class_hashtag"
-        # in that case; see corresponding palette entry
-        # in constants.py controlling hashtag highlighting
-
-        return (attr, title)
-
-    def render_blockquote(self, tag) -> urwid.Widget:
-        widget_list = self.process_block_tag_children(tag)
-        blockquote_widget = urwid.LineBox(
-            urwid.Padding(
-                urwid.Pile(widget_list),
+        if not isinstance(markup, urwid.Widget):
+            # plaintext, so create a padded text widget
+            txt = text_to_widget("", markup)
+            markup = urwid.Padding(
+                txt,
                 align="left",
                 width=("relative", 100),
                 min_width=None,
-                left=1,
-                right=1,
-            ),
-            tlcorner="",
-            tline="",
-            lline="│",
-            trcorner="",
-            blcorner="",
-            rline="",
-            bline="",
-            brcorner="",
-        )
-        return urwid.Pile([urwid.AttrMap(blockquote_widget, "blockquote")])
+            )
+        widgets.append(markup)
+        # separate top level widgets with a blank line
+        widgets.append(urwid.Divider(" "))
+    return widgets[:-1]  # but suppress the last blank line
 
-    def render_br(self, tag) -> Tuple:
-        return ("br", "\n")
 
-    def render_em(self, tag) -> Tuple:
-        # to simplify the number of palette entries
-        # translate EM to I (italic)
-        markups = self.process_inline_tag_children(tag)
-        if not markups:
-            return ("i", "")
+def inline_tag_to_text(tag) -> Tuple:
+    """Convert html tag to plain text with tag as attributes recursively"""
+    markups = process_inline_tag_children(tag)
+    if not markups:
+        return (tag.name, "")
+    return (tag.name, markups)
 
-        # special case processing for bold and italic
-        for parent in tag.parents:
-            if parent.name == "b" or parent.name == "strong":
-                return ("bi", markups)
 
-        return ("i", markups)
+def process_inline_tag_children(tag) -> List:
+    """Recursively retrieve all children
+    and convert to a list of markup text"""
+    markups = []
+    for child in tag.children:
+        if isinstance(child, Tag):
+            markup = render(child.name, child)
+            markups.append(markup)
+        else:
+            markups.append(child)
+    return markups
 
-    def render_ol(self, tag) -> urwid.Widget:
-        """ordered list tag handler"""
 
-        widgets = []
-        list_item_num = 1
-        increment = -1 if tag.has_attr("reversed") else 1
+def text_to_widget(attr, markup) -> urwid.Widget:
+    if not has_urwidgets:
+        return urwid.Text((attr, markup))
 
-        # get ol start= attribute if present
-        if tag.has_attr("start") and len(tag.attrs["start"]) > 0:
-            try:
-                list_item_num = int(tag.attrs["start"])
-            except ValueError:
-                pass
+    TRANSFORM = {
+        # convert http[s] URLs to Hyperlink widgets for nesting in a TextEmbed widget
+        re.compile(r"(^.+)\x03(.+$)"): lambda g: (
+            len(g[1]),
+            urwid.Filler(Hyperlink(g[2], anchor_attr, g[1])),
+        ),
+    }
+    markup_list = []
 
-        for li in tag.find_all("li", recursive=False):
-            markup = self.render("li", li)
-
-            # li value= attribute will change the item number
-            # it also overrides any ol start= attribute
-
-            if li.has_attr("value") and len(li.attrs["value"]) > 0:
-                try:
-                    list_item_num = int(li.attrs["value"])
-                except ValueError:
-                    pass
-
-            if not isinstance(markup, urwid.Widget):
-                txt = self.text_to_widget("li", [str(list_item_num), ". ", markup])
-                # 1. foo, 2. bar, etc.
-                widgets.append(txt)
-            else:
-                txt = self.text_to_widget("li", [str(list_item_num), ". "])
-                columns = urwid.Columns(
-                    [txt, ("weight", 9999, markup)], dividechars=1, min_width=3
+    for run in markup:
+        if isinstance(run, tuple):
+            txt, attr_list = decompose_tagmarkup(run)
+            # find anchor titles with an ETX separator followed by href
+            m = re.match(r"(^.+)\x03(.+$)", txt)
+            if m:
+                anchor_attr = get_best_anchor_attr(attr_list)
+                markup_list.append(
+                    parse_text(
+                        txt,
+                        TRANSFORM,
+                        lambda pattern, groups, span: TRANSFORM[pattern](groups),
+                    )
                 )
-                widgets.append(columns)
+            else:
+                markup_list.append(run)
+        else:
+            markup_list.append(run)
 
-            list_item_num += increment
+    return TextEmbed(markup_list)
 
-        return urwid.Pile(widgets)
 
-    def render_pre(self, tag) -> urwid.Widget:
-        # <PRE> tag spec says that text should not wrap,
-        # but horizontal screen space is at a premium
-        # and we have no horizontal scroll bar, so allow
-        # wrapping.
+def process_block_tag_children(tag) -> List[urwid.Widget]:
+    """Recursively retrieve all children
+    and convert to a list of widgets
+    any inline tags containing text will be
+    converted to Text widgets"""
 
-        widget_list = [urwid.Divider(" ")]
-        widget_list += self.process_block_tag_children(tag)
+    pre_widget_markups = []
+    post_widget_markups = []
+    child_widgets = []
+    found_nested_widget = False
 
-        pre_widget = urwid.Padding(
+    for child in tag.children:
+        if isinstance(child, Tag):
+            # child is a nested tag; process using custom method
+            # or default to inline_tag_to_text
+            result = render(child.name, child)
+            if isinstance(result, urwid.Widget):
+                found_nested_widget = True
+                child_widgets.append(result)
+            else:
+                if not found_nested_widget:
+                    pre_widget_markups.append(result)
+                else:
+                    post_widget_markups.append(result)
+        else:
+            # child is text; append to the appropriate markup list
+            if not found_nested_widget:
+                pre_widget_markups.append(child)
+            else:
+                post_widget_markups.append(child)
+
+    widget_list = []
+    if len(pre_widget_markups):
+        widget_list.append(text_to_widget(tag.name, pre_widget_markups))
+
+    if len(child_widgets):
+        widget_list += child_widgets
+
+    if len(post_widget_markups):
+        widget_list.append(text_to_widget(tag.name, post_widget_markups))
+
+    return widget_list
+
+
+def get_urwid_attr_name(tag) -> str:
+    """Get the class name and translate to a
+    name suitable for use as an urwid
+    text attribute name"""
+
+    if "class" in tag.attrs:
+        clss = tag.attrs["class"]
+        if len(clss) > 0:
+            style_name = "class_" + "_".join(clss)
+            # return the class name, only if we
+            # find it as a defined palette name
+            if style_name in STYLE_NAMES:
+                return style_name
+
+    # fallback to returning the tag name
+    return tag.name
+
+
+def basic_block_tag_handler(tag) -> urwid.Widget:
+    """default for block tags that need no special treatment"""
+    return urwid.Pile(process_block_tag_children(tag))
+
+
+def get_best_anchor_attr(attrib_list) -> str:
+    if not attrib_list:
+        return ""
+    flat_al = list(flatten(attrib_list))
+
+    for a in flat_al[0]:
+        # ref: https://docs.joinmastodon.org/spec/activitypub/
+        # these are the class names (translated to attrib names)
+        # that we can support for display
+
+        try:
+            if a[0] in ["class_hashtag", "class_mention_hashtag", "class_mention"]:
+                return a[0]
+        except KeyError:
+            continue
+
+    return "a"
+
+
+def render(attr: str, content: str):
+    if attr in ["a"]:
+        return render_anchor(content)
+
+    if attr in ["blockquote"]:
+        return render_blockquote(content)
+
+    if attr in ["br"]:
+        return render_br(content)
+
+    if attr in ["em"]:
+        return render_em(content)
+
+    if attr in ["ol"]:
+        return render_ol(content)
+
+    if attr in ["pre"]:
+        return render_pre(content)
+
+    if attr in ["span"]:
+        return render_span(content)
+
+    if attr in ["b", "strong"]:
+        return render_strong(content)
+
+    if attr in ["ul"]:
+        return render_ul(content)
+
+    # Glitch-soc and Pleroma allow <H1>...<H6> in content
+    # Mastodon (PR #23913) does not; header tags are converted to <P><STRONG></STRONG></P>
+    if attr in ["p", "div", "li", "h1", "h2", "h3", "h4", "h5", "h6"]:
+        return basic_block_tag_handler(content)
+
+    # Fall back to inline_tag_to_text handler
+    return inline_tag_to_text(content)
+
+
+def render_anchor(tag) -> Tuple:
+    """anchor tag handler"""
+
+    markups = process_inline_tag_children(tag)
+    if not markups:
+        return (tag.name, "")
+
+    href = tag.attrs["href"]
+    title, attrib_list = decompose_tagmarkup(markups)
+    if not attrib_list:
+        attrib_list = [tag]
+    if href and has_urwidgets:
+        # only if we have urwidgets loaded for OCS 8 hyperlinks:
+        # urlencode the path and query portions of the URL
+        href = urlencode_url(href)
+        # use ASCII ETX (end of record) as a
+        # delimiter between the title and the HREF
+        title += f"\x03{href}"
+
+    attr = get_best_anchor_attr(attrib_list)
+
+    if attr == "a":
+        # didn't find an attribute to use
+        # in the child markup, so let's
+        # try the anchor tag's own attributes
+
+        attr = get_urwid_attr_name(tag)
+
+    # hashtag anchors have a class of "mention hashtag"
+    # or "hashtag"
+    # we'll return style "class_mention_hashtag"
+    # or "class_hashtag"
+    # in that case; see corresponding palette entry
+    # in constants.py controlling hashtag highlighting
+
+    return (attr, title)
+
+
+def render_blockquote(tag) -> urwid.Widget:
+    widget_list = process_block_tag_children(tag)
+    blockquote_widget = urwid.LineBox(
+        urwid.Padding(
             urwid.Pile(widget_list),
             align="left",
             width=("relative", 100),
             min_width=None,
             left=1,
             right=1,
-        )
-        return urwid.Pile([urwid.AttrMap(pre_widget, "pre")])
+        ),
+        tlcorner="",
+        tline="",
+        lline="│",
+        trcorner="",
+        blcorner="",
+        rline="",
+        bline="",
+        brcorner="",
+    )
+    return urwid.Pile([urwid.AttrMap(blockquote_widget, "blockquote")])
 
-    def render_span(self, tag) -> Tuple:
-        markups = self.process_inline_tag_children(tag)
 
-        if not markups:
-            return (tag.name, "")
+def render_br(tag) -> Tuple:
+    return ("br", "\n")
 
-        # span inherits its parent's class definition
-        # unless it has a specific class definition
-        # of its own
 
-        if "class" in tag.attrs:
-            # uncomment the following code to hide all HTML marked
-            # invisible (generally, the http:// prefix of URLs)
-            # could be a user preference, it's only advisable if
-            # the terminal supports OCS 8 hyperlinks (and that's not
-            # automatically detectable)
+def render_em(tag) -> Tuple:
+    # to simplify the number of palette entries
+    # translate EM to I (italic)
+    markups = process_inline_tag_children(tag)
+    if not markups:
+        return ("i", "")
 
-            # if "invisible" in tag.attrs["class"]:
-            #     return (tag.name, "")
+    # special case processing for bold and italic
+    for parent in tag.parents:
+        if parent.name == "b" or parent.name == "strong":
+            return ("bi", markups)
 
-            style_name = self.get_urwid_attr_name(tag)
+    return ("i", markups)
 
-            if style_name != "span":
-                # unique class name matches an entry in our palette
-                return (style_name, markups)
 
-        if tag.parent:
-            return (self.get_urwid_attr_name(tag.parent), markups)
+def render_ol(tag) -> urwid.Widget:
+    """ordered list tag handler"""
+
+    widgets = []
+    list_item_num = 1
+    increment = -1 if tag.has_attr("reversed") else 1
+
+    # get ol start= attribute if present
+    if tag.has_attr("start") and len(tag.attrs["start"]) > 0:
+        try:
+            list_item_num = int(tag.attrs["start"])
+        except ValueError:
+            pass
+
+    for li in tag.find_all("li", recursive=False):
+        markup = render("li", li)
+
+        # li value= attribute will change the item number
+        # it also overrides any ol start= attribute
+
+        if li.has_attr("value") and len(li.attrs["value"]) > 0:
+            try:
+                list_item_num = int(li.attrs["value"])
+            except ValueError:
+                pass
+
+        if not isinstance(markup, urwid.Widget):
+            txt = text_to_widget("li", [str(list_item_num), ". ", markup])
+            # 1. foo, 2. bar, etc.
+            widgets.append(txt)
         else:
-            # fallback
-            return ("span", markups)
+            txt = text_to_widget("li", [str(list_item_num), ". "])
+            columns = urwid.Columns(
+                [txt, ("weight", 9999, markup)], dividechars=1, min_width=3
+            )
+            widgets.append(columns)
 
-    def render_strong(self, tag) -> Tuple:
-        # to simplify the number of palette entries
-        # translate STRONG to B (bold)
-        markups = self.process_inline_tag_children(tag)
-        if not markups:
-            return ("b", "")
+        list_item_num += increment
 
-        # special case processing for bold and italic
-        for parent in tag.parents:
-            if parent.name == "i" or parent.name == "em":
-                return ("bi", markups)
+    return urwid.Pile(widgets)
 
-        return ("b", markups)
 
-    def render_ul(self, tag) -> urwid.Widget:
-        """unordered list tag handler"""
+def render_pre(tag) -> urwid.Widget:
+    # <PRE> tag spec says that text should not wrap,
+    # but horizontal screen space is at a premium
+    # and we have no horizontal scroll bar, so allow
+    # wrapping.
 
-        widgets = []
+    widget_list = [urwid.Divider(" ")]
+    widget_list += process_block_tag_children(tag)
 
-        for li in tag.find_all("li", recursive=False):
-            markup = self.render("li", li)
+    pre_widget = urwid.Padding(
+        urwid.Pile(widget_list),
+        align="left",
+        width=("relative", 100),
+        min_width=None,
+        left=1,
+        right=1,
+    )
+    return urwid.Pile([urwid.AttrMap(pre_widget, "pre")])
 
-            if not isinstance(markup, urwid.Widget):
-                txt = self.text_to_widget("li", ["\N{bullet} ", markup])
-                # * foo, * bar, etc.
-                widgets.append(txt)
-            else:
-                txt = self.text_to_widget("li", ["\N{bullet} "])
-                columns = urwid.Columns(
-                    [txt, ("weight", 9999, markup)], dividechars=1, min_width=3
-                )
-                widgets.append(columns)
 
-        return urwid.Pile(widgets)
+def render_span(tag) -> Tuple:
+    markups = process_inline_tag_children(tag)
+
+    if not markups:
+        return (tag.name, "")
+
+    # span inherits its parent's class definition
+    # unless it has a specific class definition
+    # of its own
+
+    if "class" in tag.attrs:
+        # uncomment the following code to hide all HTML marked
+        # invisible (generally, the http:// prefix of URLs)
+        # could be a user preference, it's only advisable if
+        # the terminal supports OCS 8 hyperlinks (and that's not
+        # automatically detectable)
+
+        # if "invisible" in tag.attrs["class"]:
+        #     return (tag.name, "")
+
+        style_name = get_urwid_attr_name(tag)
+
+        if style_name != "span":
+            # unique class name matches an entry in our palette
+            return (style_name, markups)
+
+    if tag.parent:
+        return (get_urwid_attr_name(tag.parent), markups)
+    else:
+        # fallback
+        return ("span", markups)
+
+
+def render_strong(tag) -> Tuple:
+    # to simplify the number of palette entries
+    # translate STRONG to B (bold)
+    markups = process_inline_tag_children(tag)
+    if not markups:
+        return ("b", "")
+
+    # special case processing for bold and italic
+    for parent in tag.parents:
+        if parent.name == "i" or parent.name == "em":
+            return ("bi", markups)
+
+    return ("b", markups)
+
+
+def render_ul(tag) -> urwid.Widget:
+    """unordered list tag handler"""
+
+    widgets = []
+
+    for li in tag.find_all("li", recursive=False):
+        markup = render("li", li)
+
+        if not isinstance(markup, urwid.Widget):
+            txt = text_to_widget("li", ["\N{bullet} ", markup])
+            # * foo, * bar, etc.
+            widgets.append(txt)
+        else:
+            txt = text_to_widget("li", ["\N{bullet} "])
+            columns = urwid.Columns(
+                [txt, ("weight", 9999, markup)], dividechars=1, min_width=3
+            )
+            widgets.append(columns)
+
+    return urwid.Pile(widgets)
 
 
 def flatten(data):
diff --git a/toot/tui/timeline.py b/toot/tui/timeline.py
index 1fef40c..b278d08 100644
--- a/toot/tui/timeline.py
+++ b/toot/tui/timeline.py
@@ -6,6 +6,7 @@ import webbrowser
 from typing import List, Optional
 
 from toot.tui import app
+from toot.tui.richtext import html_to_widgets
 from toot.utils.datetime import parse_datetime, time_ago
 from toot.utils.language import language_name
 
@@ -13,7 +14,6 @@ from toot.entities import Status
 from toot.tui.scroll import Scrollable, ScrollBar
 from toot.tui.utils import highlight_keys
 from toot.tui.widgets import SelectableText, SelectableColumns
-from toot.tui.richtext import ContentParser
 from toot.utils import urlencode_url
 from toot.tui.stubs.urwidgets import Hyperlink, TextEmbed, parse_text, has_urwidgets
 
@@ -356,9 +356,7 @@ class StatusDetails(urwid.Pile):
             yield ("pack", urwid.Text(("content_warning", "Marked as sensitive. Press S to view.")))
         else:
             content = status.original.translation if status.original.show_translation else status.data["content"]
-
-            parser = ContentParser()
-            widgetlist = parser.html_to_widgets(content)
+            widgetlist = html_to_widgets(content)
 
             for line in widgetlist:
                 yield (line)

From f50dea1175d88ff5541418d8da71c4df0061379d Mon Sep 17 00:00:00 2001
From: Ivan Habunek <ivan@habunek.com>
Date: Thu, 16 Nov 2023 11:08:38 +0100
Subject: [PATCH 12/20] Simplify text_to_widget

This was doing double regex matching, calling parse_text was not needed
at all.
---
 toot/tui/richtext.py | 29 +++++++++++------------------
 1 file changed, 11 insertions(+), 18 deletions(-)

diff --git a/toot/tui/richtext.py b/toot/tui/richtext.py
index b4e5b03..ae463ae 100644
--- a/toot/tui/richtext.py
+++ b/toot/tui/richtext.py
@@ -4,7 +4,7 @@ import unicodedata
 
 from bs4.element import NavigableString, Tag
 from toot.tui.constants import PALETTE
-from toot.tui.stubs.urwidgets import TextEmbed, Hyperlink, parse_text, has_urwidgets
+from toot.tui.stubs.urwidgets import TextEmbed, Hyperlink, has_urwidgets
 from toot.utils import parse_html, urlencode_url
 from typing import List, Tuple
 from urwid.util import decompose_tagmarkup
@@ -80,33 +80,26 @@ def process_inline_tag_children(tag) -> List:
     return markups
 
 
+URL_PATTERN = re.compile(r"(^.+)\x03(.+$)")
+
+
 def text_to_widget(attr, markup) -> urwid.Widget:
     if not has_urwidgets:
         return urwid.Text((attr, markup))
 
-    TRANSFORM = {
-        # convert http[s] URLs to Hyperlink widgets for nesting in a TextEmbed widget
-        re.compile(r"(^.+)\x03(.+$)"): lambda g: (
-            len(g[1]),
-            urwid.Filler(Hyperlink(g[2], anchor_attr, g[1])),
-        ),
-    }
     markup_list = []
-
     for run in markup:
         if isinstance(run, tuple):
             txt, attr_list = decompose_tagmarkup(run)
             # find anchor titles with an ETX separator followed by href
-            m = re.match(r"(^.+)\x03(.+$)", txt)
-            if m:
+            match = URL_PATTERN.match(txt)
+            if match:
+                label, url = match.groups()
                 anchor_attr = get_best_anchor_attr(attr_list)
-                markup_list.append(
-                    parse_text(
-                        txt,
-                        TRANSFORM,
-                        lambda pattern, groups, span: TRANSFORM[pattern](groups),
-                    )
-                )
+                markup_list.append((
+                    len(label),
+                    urwid.Filler(Hyperlink(url, anchor_attr, label)),
+                ))
             else:
                 markup_list.append(run)
         else:

From f96b1b722cd429c4c7e69108e66df16673929bad Mon Sep 17 00:00:00 2001
From: Ivan Habunek <ivan@habunek.com>
Date: Thu, 16 Nov 2023 11:11:25 +0100
Subject: [PATCH 13/20] Move richtext to it's own module

This is the first step towards easier stubbing
---
 toot/tui/richtext/__init__.py       | 1 +
 toot/tui/{ => richtext}/richtext.py | 0
 2 files changed, 1 insertion(+)
 create mode 100644 toot/tui/richtext/__init__.py
 rename toot/tui/{ => richtext}/richtext.py (100%)

diff --git a/toot/tui/richtext/__init__.py b/toot/tui/richtext/__init__.py
new file mode 100644
index 0000000..ba857ab
--- /dev/null
+++ b/toot/tui/richtext/__init__.py
@@ -0,0 +1 @@
+from .richtext import html_to_widgets
diff --git a/toot/tui/richtext.py b/toot/tui/richtext/richtext.py
similarity index 100%
rename from toot/tui/richtext.py
rename to toot/tui/richtext/richtext.py

From e5ac82bb010bee0a634538ce2f2a5ba0edf4dcea Mon Sep 17 00:00:00 2001
From: Ivan Habunek <ivan@habunek.com>
Date: Thu, 16 Nov 2023 11:35:44 +0100
Subject: [PATCH 14/20] Add fallback for html_to_widgets

Remove has_urwidgets since we no longer need to worry if we have
urwidgets in the richtext module.
---
 toot/tui/richtext/__init__.py | 16 +++++++++++++++-
 toot/tui/richtext/richtext.py |  8 ++------
 toot/tui/utils.py             | 12 ++++++++++++
 3 files changed, 29 insertions(+), 7 deletions(-)

diff --git a/toot/tui/richtext/__init__.py b/toot/tui/richtext/__init__.py
index ba857ab..6359c24 100644
--- a/toot/tui/richtext/__init__.py
+++ b/toot/tui/richtext/__init__.py
@@ -1 +1,15 @@
-from .richtext import html_to_widgets
+import urwid
+
+from toot.tui.utils import highlight_hashtags
+from toot.utils import format_content
+from typing import List
+
+try:
+    from .richtext import html_to_widgets
+except ImportError:
+    # Fallback if urwidgets are not available
+    def html_to_widgets(html: str) -> List[urwid.Widget]:
+        return [
+            urwid.Text(highlight_hashtags(line))
+            for line in format_content(html)
+        ]
diff --git a/toot/tui/richtext/richtext.py b/toot/tui/richtext/richtext.py
index ae463ae..9db7e73 100644
--- a/toot/tui/richtext/richtext.py
+++ b/toot/tui/richtext/richtext.py
@@ -4,10 +4,10 @@ import unicodedata
 
 from bs4.element import NavigableString, Tag
 from toot.tui.constants import PALETTE
-from toot.tui.stubs.urwidgets import TextEmbed, Hyperlink, has_urwidgets
 from toot.utils import parse_html, urlencode_url
 from typing import List, Tuple
 from urwid.util import decompose_tagmarkup
+from urwidgets import Hyperlink, TextEmbed
 
 
 STYLE_NAMES = [p[0] for p in PALETTE]
@@ -84,9 +84,6 @@ URL_PATTERN = re.compile(r"(^.+)\x03(.+$)")
 
 
 def text_to_widget(attr, markup) -> urwid.Widget:
-    if not has_urwidgets:
-        return urwid.Text((attr, markup))
-
     markup_list = []
     for run in markup:
         if isinstance(run, tuple):
@@ -242,8 +239,7 @@ def render_anchor(tag) -> Tuple:
     title, attrib_list = decompose_tagmarkup(markups)
     if not attrib_list:
         attrib_list = [tag]
-    if href and has_urwidgets:
-        # only if we have urwidgets loaded for OCS 8 hyperlinks:
+    if href:
         # urlencode the path and query portions of the URL
         href = urlencode_url(href)
         # use ASCII ETX (end of record) as a
diff --git a/toot/tui/utils.py b/toot/tui/utils.py
index 0ccff9d..734ae32 100644
--- a/toot/tui/utils.py
+++ b/toot/tui/utils.py
@@ -35,6 +35,18 @@ def highlight_keys(text, high_attr, low_attr=""):
     return list(_gen())
 
 
+def highlight_hashtags(line):
+    hline = []
+
+    for p in re.split(HASHTAG_PATTERN, line):
+        if p.startswith("#"):
+            hline.append(("hashtag", p))
+        else:
+            hline.append(p)
+
+    return hline
+
+
 def show_media(paths):
     """
     Attempt to open an image viewer to show given media files.

From d6ff3cc3a80f83c834e6cb57602e1e52a4d3df80 Mon Sep 17 00:00:00 2001
From: Ivan Habunek <ivan@habunek.com>
Date: Thu, 16 Nov 2023 11:46:54 +0100
Subject: [PATCH 15/20] Extract url_to_widget, add fallback

---
 toot/tui/richtext/__init__.py |  5 ++++-
 toot/tui/richtext/richtext.py |  5 +++++
 toot/tui/timeline.py          | 23 +++--------------------
 3 files changed, 12 insertions(+), 21 deletions(-)

diff --git a/toot/tui/richtext/__init__.py b/toot/tui/richtext/__init__.py
index 6359c24..07e31c8 100644
--- a/toot/tui/richtext/__init__.py
+++ b/toot/tui/richtext/__init__.py
@@ -5,7 +5,7 @@ from toot.utils import format_content
 from typing import List
 
 try:
-    from .richtext import html_to_widgets
+    from .richtext import html_to_widgets, url_to_widget
 except ImportError:
     # Fallback if urwidgets are not available
     def html_to_widgets(html: str) -> List[urwid.Widget]:
@@ -13,3 +13,6 @@ except ImportError:
             urwid.Text(highlight_hashtags(line))
             for line in format_content(html)
         ]
+
+    def url_to_widget(url: str):
+        return urwid.Text(("link", url))
diff --git a/toot/tui/richtext/richtext.py b/toot/tui/richtext/richtext.py
index 9db7e73..71897c4 100644
--- a/toot/tui/richtext/richtext.py
+++ b/toot/tui/richtext/richtext.py
@@ -59,6 +59,11 @@ def html_to_widgets(html, recovery_attempt=False) -> List[urwid.Widget]:
     return widgets[:-1]  # but suppress the last blank line
 
 
+def url_to_widget(url: str):
+    widget = len(url), urwid.Filler(Hyperlink(url, "link", url))
+    return TextEmbed(widget)
+
+
 def inline_tag_to_text(tag) -> Tuple:
     """Convert html tag to plain text with tag as attributes recursively"""
     markups = process_inline_tag_children(tag)
diff --git a/toot/tui/timeline.py b/toot/tui/timeline.py
index b278d08..93421ce 100644
--- a/toot/tui/timeline.py
+++ b/toot/tui/timeline.py
@@ -1,12 +1,11 @@
 import logging
-import re
 import urwid
 import webbrowser
 
 from typing import List, Optional
 
 from toot.tui import app
-from toot.tui.richtext import html_to_widgets
+from toot.tui.richtext import html_to_widgets, url_to_widget
 from toot.utils.datetime import parse_datetime, time_ago
 from toot.utils.language import language_name
 
@@ -14,8 +13,6 @@ from toot.entities import Status
 from toot.tui.scroll import Scrollable, ScrollBar
 from toot.tui.utils import highlight_keys
 from toot.tui.widgets import SelectableText, SelectableColumns
-from toot.utils import urlencode_url
-from toot.tui.stubs.urwidgets import Hyperlink, TextEmbed, parse_text, has_urwidgets
 
 logger = logging.getLogger("toot")
 
@@ -320,20 +317,6 @@ class StatusDetails(urwid.Pile):
             if status else ())
         return super().__init__(widget_list)
 
-    def linkify_content(self, text) -> urwid.Widget:
-        if not has_urwidgets:
-            return urwid.Text(("link", text))
-        TRANSFORM = {
-            # convert http[s] URLs to Hyperlink widgets for nesting in a TextEmbed widget
-            re.compile(r'(https?://[^\s]+)'):
-                lambda g: (len(g[1]), urwid.Filler(Hyperlink(urlencode_url(g[1]), "link", g[1]))),
-        }
-        markup_list = []
-
-        markup_list.append(parse_text(text, TRANSFORM,
-            lambda pattern, groups, span: TRANSFORM[pattern](groups)))
-        return TextEmbed(markup_list, align='left')
-
     def content_generator(self, status, reblogged_by):
         if reblogged_by:
             text = "♺ {} boosted".format(reblogged_by.display_name or reblogged_by.username)
@@ -368,7 +351,7 @@ class StatusDetails(urwid.Pile):
                     yield ("pack", urwid.Text([("bold", "Media attachment"), " (", m["type"], ")"]))
                     if m["description"]:
                         yield ("pack", urwid.Text(m["description"]))
-                    yield ("pack", self.linkify_content(m["url"]))
+                    yield ("pack", url_to_widget(m["url"]))
 
             poll = status.original.data.get("poll")
             if poll:
@@ -428,7 +411,7 @@ class StatusDetails(urwid.Pile):
         if card["description"]:
             yield urwid.Text(card["description"].strip())
             yield urwid.Text("")
-        yield self.linkify_content(card["url"])
+        yield url_to_widget(card["url"])
 
     def poll_generator(self, poll):
         for idx, option in enumerate(poll["options"]):

From 57cfd41613822bfa54a2e83639096795c0ed28ca Mon Sep 17 00:00:00 2001
From: Ivan Habunek <ivan@habunek.com>
Date: Thu, 16 Nov 2023 11:49:48 +0100
Subject: [PATCH 16/20] Remove old stubs

---
 .flake8                           |  1 -
 toot/tui/stubs/stub_hyperlink.py  | 25 -------------------------
 toot/tui/stubs/stub_text_embed.py | 21 ---------------------
 toot/tui/stubs/urwidgets.py       |  8 --------
 toot/tui/urwidgets.py             |  8 --------
 5 files changed, 63 deletions(-)
 delete mode 100644 toot/tui/stubs/stub_hyperlink.py
 delete mode 100644 toot/tui/stubs/stub_text_embed.py
 delete mode 100644 toot/tui/stubs/urwidgets.py
 delete mode 100644 toot/tui/urwidgets.py

diff --git a/.flake8 b/.flake8
index cc916ad..6efbecd 100644
--- a/.flake8
+++ b/.flake8
@@ -1,5 +1,4 @@
 [flake8]
 exclude=build,tests,tmp,venv,toot/tui/scroll.py
 ignore=E128,W503
-per-file-ignores=toot/tui/stubs/urwidgets.py:F401
 max-line-length=120
diff --git a/toot/tui/stubs/stub_hyperlink.py b/toot/tui/stubs/stub_hyperlink.py
deleted file mode 100644
index aa0488d..0000000
--- a/toot/tui/stubs/stub_hyperlink.py
+++ /dev/null
@@ -1,25 +0,0 @@
-__all__ = ("Hyperlink",)
-
-import urwid
-
-
-class Hyperlink(urwid.WidgetWrap):
-    def __init__(self, uri, attr, text):
-        pass
-
-    def render(self, size, focus):
-        return None
-
-
-class HyperlinkCanvas(urwid.Canvas):
-    def __init__(self, uri: str, text_canv: urwid.TextCanvas):
-        pass
-
-    def cols(self):
-        return 0
-
-    def content(self, *args, **kwargs):
-        yield [None]
-
-    def rows(self):
-        return 0
diff --git a/toot/tui/stubs/stub_text_embed.py b/toot/tui/stubs/stub_text_embed.py
deleted file mode 100644
index 622b5f7..0000000
--- a/toot/tui/stubs/stub_text_embed.py
+++ /dev/null
@@ -1,21 +0,0 @@
-__all__ = ("parse_text", "TextEmbed")
-
-import urwid
-
-
-class TextEmbed(urwid.Text):
-    def get_text(self):
-        return None
-
-    def render(self, size, focus):
-        return None
-
-    def set_text(self, markup):
-        pass
-
-    def set_wrap_mode(self, mode):
-        pass
-
-
-def parse_text(text, patterns, repl, *repl_args, **repl_kwargs):
-    return None
diff --git a/toot/tui/stubs/urwidgets.py b/toot/tui/stubs/urwidgets.py
deleted file mode 100644
index 92737d3..0000000
--- a/toot/tui/stubs/urwidgets.py
+++ /dev/null
@@ -1,8 +0,0 @@
-# If urwidgets is loaded use it; otherwise use our stubs
-try:
-    from urwidgets import Hyperlink, TextEmbed, parse_text
-    has_urwidgets = True
-except ImportError:
-    from .stub_hyperlink import Hyperlink
-    from .stub_text_embed import TextEmbed, parse_text
-    has_urwidgets = False
diff --git a/toot/tui/urwidgets.py b/toot/tui/urwidgets.py
deleted file mode 100644
index ee731a8..0000000
--- a/toot/tui/urwidgets.py
+++ /dev/null
@@ -1,8 +0,0 @@
-# If urwidgets is loaded use it; otherwise use our stubs
-try:
-    from urwidgets import Hyperlink, TextEmbed, parse_text  # noqa: F401
-    has_urwidgets = True
-except ImportError:
-    from .stub_hyperlink import Hyperlink  # noqa: F401
-    from .stub_text_embed import TextEmbed, parse_text  # noqa: F401
-    has_urwidgets = False

From bc542b5e37d2af3de0b4c71bdffa4803096f3e41 Mon Sep 17 00:00:00 2001
From: Ivan Habunek <ivan@habunek.com>
Date: Thu, 16 Nov 2023 11:51:11 +0100
Subject: [PATCH 17/20] Add richtext package

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 0739408..c0574c6 100644
--- a/setup.py
+++ b/setup.py
@@ -31,7 +31,7 @@ setup(
         'License :: OSI Approved :: GNU General Public License v3 (GPLv3)',
         'Programming Language :: Python :: 3',
     ],
-    packages=['toot', 'toot.tui', 'toot.utils'],
+    packages=['toot', 'toot.tui', 'toot.tui.richtext', 'toot.utils'],
     python_requires=">=3.7",
     install_requires=[
         "requests>=2.13,<3.0",

From 414d9e8ff2bbae999af92696c02e89eae95dc800 Mon Sep 17 00:00:00 2001
From: Ivan Habunek <ivan@habunek.com>
Date: Thu, 16 Nov 2023 12:29:37 +0100
Subject: [PATCH 18/20] Start testing richtext

---
 tests/tui/test_rich_text.py | 45 +++++++++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)
 create mode 100644 tests/tui/test_rich_text.py

diff --git a/tests/tui/test_rich_text.py b/tests/tui/test_rich_text.py
new file mode 100644
index 0000000..68453fa
--- /dev/null
+++ b/tests/tui/test_rich_text.py
@@ -0,0 +1,45 @@
+from urwid import Divider, Filler, Pile
+from toot.tui.richtext import url_to_widget
+from urwidgets import Hyperlink, TextEmbed
+
+from toot.tui.richtext.richtext import html_to_widgets
+
+
+def test_url_to_widget():
+    url = "http://foo.bar"
+    embed_widget = url_to_widget(url)
+    assert isinstance(embed_widget, TextEmbed)
+
+    [(filler, length)] = embed_widget.embedded
+    assert length == len(url)
+    assert isinstance(filler, Filler)
+
+    link_widget: Hyperlink = filler.base_widget
+    assert isinstance(link_widget, Hyperlink)
+
+    assert link_widget.attrib == "link"
+    assert link_widget.text == url
+    assert link_widget.uri == url
+
+
+def test_html_to_widgets():
+    html = """
+    <p>foo</p>
+    <p>foo <b>bar</b> <i>baz</i></p>
+    """.strip()
+
+    [foo, divider, bar] = html_to_widgets(html)
+
+    assert isinstance(foo, Pile)
+    assert isinstance(divider, Divider)
+    assert isinstance(bar, Pile)
+
+    [foo_embed] = foo.widget_list
+    assert foo_embed.embedded == []
+    assert foo_embed.attrib == []
+    assert foo_embed.text == "foo"
+
+    [bar_embed] = bar.widget_list
+    assert bar_embed.embedded == []
+    assert bar_embed.attrib == [(None, 4), ("b", 3), (None, 1), ("i", 3)]
+    assert bar_embed.text == "foo bar baz"

From 732b9feed5d958b0caa1be77b42ed0befd879e62 Mon Sep 17 00:00:00 2001
From: Daniel Schwarz <schwarz.dan@gmail.com>
Date: Thu, 16 Nov 2023 20:24:53 -0500
Subject: [PATCH 19/20] Added test for toot.utils.urlencode_url(...)

---
 tests/test_utils.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tests/test_utils.py b/tests/test_utils.py
index a1bba9a..9dbb579 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -3,6 +3,7 @@ import pytest
 
 from toot.console import duration
 from toot.wcstring import wc_wrap, trunc, pad, fit_text
+from toot.utils import urlencode_url
 
 
 def test_pad():
@@ -201,3 +202,8 @@ def test_duration():
 
     with pytest.raises(ArgumentTypeError):
         duration("banana")
+
+
+def test_urlencode_url():
+    assert urlencode_url("https://www.example.com") == "https://www.example.com"
+    assert urlencode_url("https://www.example.com/url%20with%20spaces") == "https://www.example.com/url%20with%20spaces"

From 9b9c153531fe3c1d437397564930a58658d101c5 Mon Sep 17 00:00:00 2001
From: Daniel Schwarz <schwarz.dan@gmail.com>
Date: Thu, 16 Nov 2023 20:33:24 -0500
Subject: [PATCH 20/20] Fixed github build to include richtext "extra" which
 pulls in urwidgets dependeency, required for builds

---
 .github/workflows/test.yml | 2 +-
 setup.py                   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index b13d2a5..7747370 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -18,7 +18,7 @@ jobs:
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
-          pip install -e .
+          pip install -e .\[richtext\]
           pip install -r requirements-test.txt
       - name: Run tests
         run: |
diff --git a/setup.py b/setup.py
index c0574c6..b260ce5 100644
--- a/setup.py
+++ b/setup.py
@@ -41,7 +41,7 @@ setup(
         "tomlkit>=0.10.0,<1.0"
     ],
     extras_require={
-        "hyperlinks": ['urwidgets>=0.1,<0.2'],
+        "richtext": ['urwidgets>=0.1,<0.2'],
     },
     entry_points={
         'console_scripts': [