toot/toot/tui/richtext.py

268 wiersze
8.6 KiB
Python

"""
richtext
"""
from typing import List
import urwid
from bs4 import BeautifulSoup
from bs4.element import NavigableString, Tag
class ContentParser:
def __init__(self, config={}):
"""Parse a limited subset of HTML and create urwid widgets."""
self.tag_to_method = {
"b": self.inline_tag_to_text,
"i": self.inline_tag_to_text,
"code": self.inline_tag_to_text,
"em": self.inline_tag_to_text,
"strong": self.inline_tag_to_text,
"del": self.inline_tag_to_text,
}
def html_to_widgets(self, html) -> List[urwid.Widget]:
"""Convert html to urwid widgets"""
widgets: List[urwid.Widget] = []
soup = BeautifulSoup(html.replace(''', "'"), "html.parser")
for e in soup.body or soup:
if isinstance(e, NavigableString):
continue
name = e.name
# get the custom method for the tag, defaulting to tag_to_text if none defined for this tag
method = self.tag_to_method.get(
name, getattr(self, "_" + name, self.inline_tag_to_text)
)
markup = method(e) # either returns a Widget, or plain text
if not isinstance(markup, urwid.Widget):
# plaintext, so create a padded text widget
txt = urwid.Text(markup)
markup = urwid.Padding(
txt,
align="left",
width=("relative", 100),
min_width=None,
)
widgets.append(markup)
return widgets
def inline_tag_to_text(self, tag) -> list:
"""Convert html tag to plain text with tag as attributes recursively"""
markups = self.process_inline_tag_children(tag)
if not markups:
return ""
return (tag.name, markups)
def process_inline_tag_children(self, tag) -> list:
markups = []
for child in tag.children:
if isinstance(child, Tag):
method = self.tag_to_method.get(
child.name, getattr(self, "_" + child.name, self.inline_tag_to_text)
)
markup = method(child)
markups.append(markup)
else:
markups.append(child)
return markups
def process_block_tag_children(self, tag) -> List[urwid.Widget]:
pre_widget_markups = []
post_widget_markups = []
child_widgets = []
found_nested_widget = False
for child in tag.children:
if isinstance(child, Tag):
# child is a nested tag; process using custom method
# or default to inline_tag_to_text
method = self.tag_to_method.get(
child.name, getattr(self, "_" + child.name, self.inline_tag_to_text)
)
result = method(child)
if isinstance(result, urwid.Widget):
found_nested_widget = True
child_widgets.append(result)
else:
if not found_nested_widget:
pre_widget_markups.append(result)
else:
post_widget_markups.append(result)
else:
# child is text; append to the appropriate markup list
if not found_nested_widget:
pre_widget_markups.append(child)
else:
post_widget_markups.append(child)
widget_list = []
if len(pre_widget_markups):
widget_list.append(urwid.Text((tag.name, pre_widget_markups)))
if len(child_widgets):
widget_list += child_widgets
if len(post_widget_markups):
widget_list.append(urwid.Text((tag.name, post_widget_markups)))
return widget_list
def get_style_name(self, tag) -> str:
# TODO: think about whitelisting allowed classes,
# or blacklisting classes we do not want.
# Classes to whitelist: "mention" "hashtag"
# used in anchor tags
# Classes to blacklist: "invisible" used in Akkoma
# anchor titles
style_name = tag.name
if "class" in tag.attrs:
clss = tag.attrs["class"]
if len(clss) > 0:
style_name = "class_" + "_".join(clss)
return style_name
# Tag handlers start here.
# Tags not explicitly listed are "supported" by
# rendering as text.
# Inline tags return a list of marked up text for urwid.Text
# Block tags return urwid.Widget
def basic_block_tag_handler(self, tag) -> urwid.Widget:
"""default for block tags that need no special treatment"""
return urwid.Pile(self.process_block_tag_children(tag))
def _a(self, tag) -> list:
markups = self.process_inline_tag_children(tag)
if not markups:
return ""
# hashtag anchors have a class of "mention hashtag"
# we'll return style "class_mention_hashtag"
# in that case; set this up in constants.py
# to control highlighting of hashtags
return (self.get_style_name(tag), markups)
def _blockquote(self, tag) -> urwid.Widget:
widget_list = self.process_block_tag_children(tag)
blockquote_widget = urwid.LineBox(
urwid.Padding(
urwid.Pile(widget_list),
align="left",
width=("relative", 100),
min_width=None,
left=1,
right=1,
),
tlcorner="",
tline="",
lline="",
trcorner="",
blcorner="",
rline="",
bline="",
brcorner="",
)
return urwid.Pile([urwid.AttrMap(blockquote_widget, "blockquote")])
def _br(self, tag) -> list:
return (tag.name, ("br", "\n"))
_div = basic_block_tag_handler
_li = basic_block_tag_handler
# Glitch-soc and Pleroma allow <H1>...<H6> in content
# Mastodon (PR #23913) does not; header tags are converted to <STRONG>
_h1 = basic_block_tag_handler
_h2 = basic_block_tag_handler
_h3 = basic_block_tag_handler
_h4 = basic_block_tag_handler
_h5 = basic_block_tag_handler
_h6 = basic_block_tag_handler
def _ol(self, tag) -> urwid.Widget:
return self.list_widget(tag, ordered=True)
_p = basic_block_tag_handler
def _pre(self, tag) -> urwid.Widget:
# <PRE> tag spec says that text should not wrap,
# but horizontal screen space is at a premium
# and we have no horizontal scroll bar, so allow
# wrapping.
widget_list = [urwid.Divider(" ")]
widget_list += self.process_block_tag_children(tag)
pre_widget = urwid.Padding(
urwid.Pile(widget_list),
align="left",
width=("relative", 100),
min_width=None,
left=1,
right=1,
)
return urwid.Pile([urwid.AttrMap(pre_widget, "pre")])
def _span(self, tag) -> list:
markups = self.process_inline_tag_children(tag)
if not markups:
return ""
# span inherits its parent's class definition
# unless it has a specific class definition
# of its own
if "class" in tag.attrs:
style_name = self.get_style_name(tag)
elif tag.parent:
style_name = self.get_style_name(tag.parent)
else:
style_name = tag.name
return (style_name, markups)
def _ul(self, tag) -> urwid.Widget:
return self.list_widget(tag, ordered=False)
def list_widget(self, tag, ordered=False) -> urwid.Widget:
widgets = []
i = 1
for li in tag.find_all("li", recursive=False):
method = self.tag_to_method.get(
"li", getattr(self, "_li", self.inline_tag_to_text)
)
markup = method(li)
if not isinstance(markup, urwid.Widget):
if ordered:
txt = urwid.Text(
("li", [str(i), ". ", markup])
) # 1. foo, 2. bar, etc.
else:
txt = urwid.Text(("li", ["* ", markup])) # * foo, * bar, etc.
widgets.append(txt)
else:
if ordered:
txt = urwid.Text(("li", [str(i) + "."]))
else:
txt = urwid.Text(("li", "*"))
columns = urwid.Columns(
[txt, ("weight", 9999, markup)], dividechars=1, min_width=4
)
widgets.append(columns)
i += 1
return urwid.Pile(widgets)