Support for rendering a subset of HTML tags in status content

Code is adapted from GPL3-licensed muv by @seonon
https://github.com/seonon/muv
richtext
Daniel Schwarz 2023-03-23 22:47:56 -04:00 zatwierdzone przez Ivan Habunek
rodzic 6ce728e020
commit 6b2c3f09bf
Nie znaleziono w bazie danych klucza dla tego podpisu
ID klucza GPG: CDBD63C43A30BB95
4 zmienionych plików z 309 dodań i 5 usunięć

2
.gitignore vendored
Wyświetl plik

@ -1,6 +1,7 @@
*.egg-info/
*.pyc
.pypirc
.vscode
/.cache/
/.coverage
/.env
@ -14,3 +15,4 @@
debug.log
/pyrightconfig.json
/book
/venv

Wyświetl plik

@ -37,7 +37,38 @@ PALETTE = [
('yellow_bold', 'yellow,bold', ''),
('red', 'dark red', ''),
('warning', 'light red', ''),
('white_bold', 'white,bold', '')
('white_bold', 'white,bold', ''),
# HTML tag styling
# note, anchor styling is often overridden
# by class names in Mastodon statuses
# so you won't see the italics.
('a', ',italics', ''),
('em', 'white,italics', ''),
('i', 'white,italics', ''),
('strong', 'white,bold', ''),
('b', 'white,bold', ''),
('u', 'white,underline', ''),
('del', 'white, strikethrough', ''),
('code', 'white, standout', ''),
('pre', 'white, standout', ''),
('blockquote', 'light gray', ''),
('h1', 'yellow, bold', ''),
('h2', 'dark red, bold', ''),
('h3', 'yellow, bold', ''),
('h4', 'yellow, bold', ''),
('h5', 'yellow, bold', ''),
('h6', 'yellow, bold', ''),
('class_mention_hashtag', 'light cyan,bold', ''),
]
VISIBILITY_OPTIONS = [

Wyświetl plik

@ -0,0 +1,267 @@
"""
richtext
"""
from typing import List
import urwid
from bs4 import BeautifulSoup
from bs4.element import NavigableString, Tag
class ContentParser:
def __init__(self, config={}):
"""Parse a limited subset of HTML and create urwid widgets."""
self.tag_to_method = {
"b": self.inline_tag_to_text,
"i": self.inline_tag_to_text,
"code": self.inline_tag_to_text,
"em": self.inline_tag_to_text,
"strong": self.inline_tag_to_text,
"del": self.inline_tag_to_text,
}
def html_to_widgets(self, html) -> List[urwid.Widget]:
"""Convert html to urwid widgets"""
widgets: List[urwid.Widget] = []
soup = BeautifulSoup(html.replace(''', "'"), "html.parser")
for e in soup.body or soup:
if isinstance(e, NavigableString):
continue
name = e.name
# get the custom method for the tag, defaulting to tag_to_text if none defined for this tag
method = self.tag_to_method.get(
name, getattr(self, "_" + name, self.inline_tag_to_text)
)
markup = method(e) # either returns a Widget, or plain text
if not isinstance(markup, urwid.Widget):
# plaintext, so create a padded text widget
txt = urwid.Text(markup)
markup = urwid.Padding(
txt,
align="left",
width=("relative", 100),
min_width=None,
)
widgets.append(markup)
return widgets
def inline_tag_to_text(self, tag) -> list:
"""Convert html tag to plain text with tag as attributes recursively"""
markups = self.process_inline_tag_children(tag)
if not markups:
return ""
return (tag.name, markups)
def process_inline_tag_children(self, tag) -> list:
markups = []
for child in tag.children:
if isinstance(child, Tag):
method = self.tag_to_method.get(
child.name, getattr(self, "_" + child.name, self.inline_tag_to_text)
)
markup = method(child)
markups.append(markup)
else:
markups.append(child)
return markups
def process_block_tag_children(self, tag) -> List[urwid.Widget]:
pre_widget_markups = []
post_widget_markups = []
child_widgets = []
found_nested_widget = False
for child in tag.children:
if isinstance(child, Tag):
# child is a nested tag; process using custom method
# or default to inline_tag_to_text
method = self.tag_to_method.get(
child.name, getattr(self, "_" + child.name, self.inline_tag_to_text)
)
result = method(child)
if isinstance(result, urwid.Widget):
found_nested_widget = True
child_widgets.append(result)
else:
if not found_nested_widget:
pre_widget_markups.append(result)
else:
post_widget_markups.append(result)
else:
# child is text; append to the appropriate markup list
if not found_nested_widget:
pre_widget_markups.append(child)
else:
post_widget_markups.append(child)
widget_list = []
if len(pre_widget_markups):
widget_list.append(urwid.Text((tag.name, pre_widget_markups)))
if len(child_widgets):
widget_list += child_widgets
if len(post_widget_markups):
widget_list.append(urwid.Text((tag.name, post_widget_markups)))
return widget_list
def get_style_name(self, tag) -> str:
# TODO: think about whitelisting allowed classes,
# or blacklisting classes we do not want.
# Classes to whitelist: "mention" "hashtag"
# used in anchor tags
# Classes to blacklist: "invisible" used in Akkoma
# anchor titles
style_name = tag.name
if "class" in tag.attrs:
clss = tag.attrs["class"]
if len(clss) > 0:
style_name = "class_" + "_".join(clss)
return style_name
# Tag handlers start here.
# Tags not explicitly listed are "supported" by
# rendering as text.
# Inline tags return a list of marked up text for urwid.Text
# Block tags return urwid.Widget
def basic_block_tag_handler(self, tag) -> urwid.Widget:
"""default for block tags that need no special treatment"""
return urwid.Pile(self.process_block_tag_children(tag))
def _a(self, tag) -> list:
markups = self.process_inline_tag_children(tag)
if not markups:
return ""
# hashtag anchors have a class of "mention hashtag"
# we'll return style "class_mention_hashtag"
# in that case; set this up in constants.py
# to control highlighting of hashtags
return (self.get_style_name(tag), markups)
def _blockquote(self, tag) -> urwid.Widget:
widget_list = self.process_block_tag_children(tag)
blockquote_widget = urwid.LineBox(
urwid.Padding(
urwid.Pile(widget_list),
align="left",
width=("relative", 100),
min_width=None,
left=1,
right=1,
),
tlcorner="",
tline="",
lline="",
trcorner="",
blcorner="",
rline="",
bline="",
brcorner="",
)
return urwid.Pile([urwid.AttrMap(blockquote_widget, "blockquote")])
def _br(self, tag) -> list:
return (tag.name, ("br", "\n"))
_div = basic_block_tag_handler
_li = basic_block_tag_handler
# Glitch-soc and Pleroma allow <H1>...<H6> in content
# Mastodon (PR #23913) does not; header tags are converted to <STRONG>
_h1 = basic_block_tag_handler
_h2 = basic_block_tag_handler
_h3 = basic_block_tag_handler
_h4 = basic_block_tag_handler
_h5 = basic_block_tag_handler
_h6 = basic_block_tag_handler
def _ol(self, tag) -> urwid.Widget:
return self.list_widget(tag, ordered=True)
_p = basic_block_tag_handler
def _pre(self, tag) -> urwid.Widget:
# <PRE> tag spec says that text should not wrap,
# but horizontal screen space is at a premium
# and we have no horizontal scroll bar, so allow
# wrapping.
widget_list = [urwid.Divider(" ")]
widget_list += self.process_block_tag_children(tag)
pre_widget = urwid.Padding(
urwid.Pile(widget_list),
align="left",
width=("relative", 100),
min_width=None,
left=1,
right=1,
)
return urwid.Pile([urwid.AttrMap(pre_widget, "pre")])
def _span(self, tag) -> list:
markups = self.process_inline_tag_children(tag)
if not markups:
return ""
# span inherits its parent's class definition
# unless it has a specific class definition
# of its own
if "class" in tag.attrs:
style_name = self.get_style_name(tag)
elif tag.parent:
style_name = self.get_style_name(tag.parent)
else:
style_name = tag.name
return (style_name, markups)
def _ul(self, tag) -> urwid.Widget:
return self.list_widget(tag, ordered=False)
def list_widget(self, tag, ordered=False) -> urwid.Widget:
widgets = []
i = 1
for li in tag.find_all("li", recursive=False):
method = self.tag_to_method.get(
"li", getattr(self, "_li", self.inline_tag_to_text)
)
markup = method(li)
if not isinstance(markup, urwid.Widget):
if ordered:
txt = urwid.Text(
("li", [str(i), ". ", markup])
) # 1. foo, 2. bar, etc.
else:
txt = urwid.Text(("li", ["* ", markup])) # * foo, * bar, etc.
widgets.append(txt)
else:
if ordered:
txt = urwid.Text(("li", [str(i) + "."]))
else:
txt = urwid.Text(("li", "*"))
columns = urwid.Columns(
[txt, ("weight", 9999, markup)], dividechars=1, min_width=4
)
widgets.append(columns)
i += 1
return urwid.Pile(widgets)

Wyświetl plik

@ -7,11 +7,11 @@ from typing import List, Optional
from .entities import Status
from .scroll import Scrollable, ScrollBar
from .utils import highlight_hashtags, parse_datetime, highlight_keys
from .utils import parse_datetime, highlight_keys
from .widgets import SelectableText, SelectableColumns
from .richtext import ContentParser
from toot.tui import app
from toot.tui.utils import time_ago
from toot.utils import format_content
from toot.utils.language import language_name
logger = logging.getLogger("toot")
@ -341,8 +341,12 @@ class StatusDetails(urwid.Pile):
yield ("pack", urwid.Text(("content_warning", "Marked as sensitive. Press S to view.")))
else:
content = status.original.translation if status.original.show_translation else status.data["content"]
for line in format_content(content):
yield ("pack", urwid.Text(highlight_hashtags(line, self.followed_tags)))
parser = ContentParser()
widgetlist = parser.html_to_widgets(content)
for line in widgetlist:
yield (line)
media = status.data["media_attachments"]
if media: