diff --git a/.gitignore b/.gitignore index bc647eb..06bdc4a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ *.egg-info/ *.pyc .pypirc +.vscode /.cache/ /.coverage /.env @@ -14,3 +15,4 @@ debug.log /pyrightconfig.json /book +/venv \ No newline at end of file diff --git a/toot/tui/constants.py b/toot/tui/constants.py index e866e34..13f201d 100644 --- a/toot/tui/constants.py +++ b/toot/tui/constants.py @@ -37,7 +37,38 @@ PALETTE = [ ('yellow_bold', 'yellow,bold', ''), ('red', 'dark red', ''), ('warning', 'light red', ''), - ('white_bold', 'white,bold', '') + ('white_bold', 'white,bold', ''), + + # HTML tag styling + + # note, anchor styling is often overridden + # by class names in Mastodon statuses + # so you won't see the italics. + ('a', ',italics', ''), + ('em', 'white,italics', ''), + ('i', 'white,italics', ''), + + ('strong', 'white,bold', ''), + ('b', 'white,bold', ''), + + ('u', 'white,underline', ''), + + ('del', 'white, strikethrough', ''), + + ('code', 'white, standout', ''), + ('pre', 'white, standout', ''), + + ('blockquote', 'light gray', ''), + + ('h1', 'yellow, bold', ''), + ('h2', 'dark red, bold', ''), + ('h3', 'yellow, bold', ''), + ('h4', 'yellow, bold', ''), + ('h5', 'yellow, bold', ''), + ('h6', 'yellow, bold', ''), + + ('class_mention_hashtag', 'light cyan,bold', ''), + ] VISIBILITY_OPTIONS = [ diff --git a/toot/tui/richtext.py b/toot/tui/richtext.py new file mode 100644 index 0000000..5031521 --- /dev/null +++ b/toot/tui/richtext.py @@ -0,0 +1,267 @@ +""" +richtext +""" +from typing import List +import urwid +from bs4 import BeautifulSoup +from bs4.element import NavigableString, Tag + + +class ContentParser: + def __init__(self, config={}): + """Parse a limited subset of HTML and create urwid widgets.""" + self.tag_to_method = { + "b": self.inline_tag_to_text, + "i": self.inline_tag_to_text, + "code": self.inline_tag_to_text, + "em": self.inline_tag_to_text, + "strong": self.inline_tag_to_text, + "del": self.inline_tag_to_text, + } + + def html_to_widgets(self, html) -> List[urwid.Widget]: + """Convert html to urwid widgets""" + widgets: List[urwid.Widget] = [] + soup = BeautifulSoup(html.replace(''', "'"), "html.parser") + for e in soup.body or soup: + if isinstance(e, NavigableString): + continue + name = e.name + # get the custom method for the tag, defaulting to tag_to_text if none defined for this tag + method = self.tag_to_method.get( + name, getattr(self, "_" + name, self.inline_tag_to_text) + ) + + markup = method(e) # either returns a Widget, or plain text + if not isinstance(markup, urwid.Widget): + # plaintext, so create a padded text widget + txt = urwid.Text(markup) + markup = urwid.Padding( + txt, + align="left", + width=("relative", 100), + min_width=None, + ) + widgets.append(markup) + return widgets + + def inline_tag_to_text(self, tag) -> list: + """Convert html tag to plain text with tag as attributes recursively""" + markups = self.process_inline_tag_children(tag) + if not markups: + return "" + return (tag.name, markups) + + def process_inline_tag_children(self, tag) -> list: + markups = [] + for child in tag.children: + if isinstance(child, Tag): + method = self.tag_to_method.get( + child.name, getattr(self, "_" + child.name, self.inline_tag_to_text) + ) + markup = method(child) + markups.append(markup) + else: + markups.append(child) + return markups + + def process_block_tag_children(self, tag) -> List[urwid.Widget]: + pre_widget_markups = [] + post_widget_markups = [] + child_widgets = [] + found_nested_widget = False + + for child in tag.children: + if isinstance(child, Tag): + # child is a nested tag; process using custom method + # or default to inline_tag_to_text + method = self.tag_to_method.get( + child.name, getattr(self, "_" + child.name, self.inline_tag_to_text) + ) + result = method(child) + if isinstance(result, urwid.Widget): + found_nested_widget = True + child_widgets.append(result) + else: + if not found_nested_widget: + pre_widget_markups.append(result) + else: + post_widget_markups.append(result) + else: + # child is text; append to the appropriate markup list + if not found_nested_widget: + pre_widget_markups.append(child) + else: + post_widget_markups.append(child) + + widget_list = [] + if len(pre_widget_markups): + widget_list.append(urwid.Text((tag.name, pre_widget_markups))) + + if len(child_widgets): + widget_list += child_widgets + + if len(post_widget_markups): + widget_list.append(urwid.Text((tag.name, post_widget_markups))) + + return widget_list + + def get_style_name(self, tag) -> str: + # TODO: think about whitelisting allowed classes, + # or blacklisting classes we do not want. + # Classes to whitelist: "mention" "hashtag" + # used in anchor tags + # Classes to blacklist: "invisible" used in Akkoma + # anchor titles + style_name = tag.name + if "class" in tag.attrs: + clss = tag.attrs["class"] + if len(clss) > 0: + style_name = "class_" + "_".join(clss) + return style_name + + # Tag handlers start here. + # Tags not explicitly listed are "supported" by + # rendering as text. + # Inline tags return a list of marked up text for urwid.Text + # Block tags return urwid.Widget + + + def basic_block_tag_handler(self, tag) -> urwid.Widget: + """default for block tags that need no special treatment""" + return urwid.Pile(self.process_block_tag_children(tag)) + + def _a(self, tag) -> list: + markups = self.process_inline_tag_children(tag) + if not markups: + return "" + + # hashtag anchors have a class of "mention hashtag" + # we'll return style "class_mention_hashtag" + # in that case; set this up in constants.py + # to control highlighting of hashtags + + return (self.get_style_name(tag), markups) + + def _blockquote(self, tag) -> urwid.Widget: + widget_list = self.process_block_tag_children(tag) + blockquote_widget = urwid.LineBox( + urwid.Padding( + urwid.Pile(widget_list), + align="left", + width=("relative", 100), + min_width=None, + left=1, + right=1, + ), + tlcorner="", + tline="", + lline="│", + trcorner="", + blcorner="", + rline="", + bline="", + brcorner="", + ) + return urwid.Pile([urwid.AttrMap(blockquote_widget, "blockquote")]) + + + def _br(self, tag) -> list: + return (tag.name, ("br", "\n")) + + _div = basic_block_tag_handler + + _li = basic_block_tag_handler + + # Glitch-soc and Pleroma allow

...

in content + # Mastodon (PR #23913) does not; header tags are converted to + + _h1 = basic_block_tag_handler + + _h2 = basic_block_tag_handler + + _h3 = basic_block_tag_handler + + _h4 = basic_block_tag_handler + + _h5 = basic_block_tag_handler + + _h6 = basic_block_tag_handler + + def _ol(self, tag) -> urwid.Widget: + return self.list_widget(tag, ordered=True) + + _p = basic_block_tag_handler + + def _pre(self, tag) -> urwid.Widget: + + #
 tag spec says that text should not wrap,
+        # but horizontal screen space is at a premium
+        # and we have no horizontal scroll bar, so allow
+        # wrapping.
+
+        widget_list = [urwid.Divider(" ")]
+        widget_list += self.process_block_tag_children(tag)
+
+        pre_widget = urwid.Padding(
+            urwid.Pile(widget_list),
+            align="left",
+            width=("relative", 100),
+            min_width=None,
+            left=1,
+            right=1,
+        )
+        return urwid.Pile([urwid.AttrMap(pre_widget, "pre")])
+
+    def _span(self, tag) -> list:
+        markups = self.process_inline_tag_children(tag)
+
+        if not markups:
+            return ""
+
+        # span inherits its parent's class definition
+        # unless it has a specific class definition
+        # of its own
+
+        if "class" in tag.attrs:
+            style_name = self.get_style_name(tag)
+        elif tag.parent:
+            style_name = self.get_style_name(tag.parent)
+        else:
+            style_name = tag.name
+
+        return (style_name, markups)
+
+    def _ul(self, tag) -> urwid.Widget:
+        return self.list_widget(tag, ordered=False)
+
+    def list_widget(self, tag, ordered=False) -> urwid.Widget:
+        widgets = []
+        i = 1
+        for li in tag.find_all("li", recursive=False):
+            method = self.tag_to_method.get(
+                "li", getattr(self, "_li", self.inline_tag_to_text)
+            )
+            markup = method(li)
+
+            if not isinstance(markup, urwid.Widget):
+                if ordered:
+                    txt = urwid.Text(
+                        ("li", [str(i), ". ", markup])
+                    )  # 1. foo, 2. bar, etc.
+                else:
+                    txt = urwid.Text(("li", ["* ", markup]))  # * foo, * bar, etc.
+                widgets.append(txt)
+            else:
+                if ordered:
+                    txt = urwid.Text(("li", [str(i) + "."]))
+                else:
+                    txt = urwid.Text(("li", "*"))
+
+                columns = urwid.Columns(
+                    [txt, ("weight", 9999, markup)], dividechars=1, min_width=4
+                )
+                widgets.append(columns)
+            i += 1
+
+        return urwid.Pile(widgets)
diff --git a/toot/tui/timeline.py b/toot/tui/timeline.py
index fb255c8..b102e29 100644
--- a/toot/tui/timeline.py
+++ b/toot/tui/timeline.py
@@ -7,11 +7,11 @@ from typing import List, Optional
 
 from .entities import Status
 from .scroll import Scrollable, ScrollBar
-from .utils import highlight_hashtags, parse_datetime, highlight_keys
+from .utils import parse_datetime, highlight_keys
 from .widgets import SelectableText, SelectableColumns
+from .richtext import ContentParser
 from toot.tui import app
 from toot.tui.utils import time_ago
-from toot.utils import format_content
 from toot.utils.language import language_name
 
 logger = logging.getLogger("toot")
@@ -341,8 +341,12 @@ class StatusDetails(urwid.Pile):
             yield ("pack", urwid.Text(("content_warning", "Marked as sensitive. Press S to view.")))
         else:
             content = status.original.translation if status.original.show_translation else status.data["content"]
-            for line in format_content(content):
-                yield ("pack", urwid.Text(highlight_hashtags(line, self.followed_tags)))
+
+            parser = ContentParser()
+            widgetlist = parser.html_to_widgets(content)
+
+            for line in widgetlist:
+                yield (line)
 
             media = status.data["media_attachments"]
             if media: