Compare commits

...

1 Commits

2 changed files with 440 additions and 182 deletions

View File

@@ -167,20 +167,6 @@ def create_app():
allowed_themes.sort(key=(lambda x: (x != 'style', x)))
app.config['allowed_themes'] = allowed_themes
with app.app_context():
from .schema import create as create_tables
from .migrations import run_migrations
create_tables()
run_migrations()
create_default_avatar()
create_admin()
create_deleted_user()
reparse_babycode()
bind_default_badges(app.config['BADGES_PATH'])
from app.routes.app import bp as app_bp
from app.routes.topics import bp as topics_bp
from app.routes.threads import bp as threads_bp
@@ -200,6 +186,20 @@ def create_app():
app.register_blueprint(hyperapi_bp)
app.register_blueprint(guides_bp)
with app.app_context():
from .schema import create as create_tables
from .migrations import run_migrations
create_tables()
run_migrations()
create_default_avatar()
create_admin()
create_deleted_user()
reparse_babycode()
bind_default_badges(app.config['BADGES_PATH'])
app.config['SESSION_COOKIE_SECURE'] = True
@app.before_request
@@ -251,12 +251,12 @@ def create_app():
return permission_level_string(term)
@app.template_filter('babycode')
def babycode_filter(markup):
return babycode_to_html(markup).result
def babycode_filter(markup, nofrag=False):
return babycode_to_html(markup, fragment=not nofrag).result
@app.template_filter('babycode_strict')
def babycode_strict_filter(markup):
return babycode_to_html(markup, STRICT_BANNED_TAGS).result
def babycode_strict_filter(markup, nofrag=False):
return babycode_to_html(markup, banned_tags=STRICT_BANNED_TAGS, fragment=not nofrag).result
@app.template_filter('extract_h2')
def extract_h2(content):

View File

@@ -6,7 +6,25 @@ from pygments.lexers import get_lexer_by_name
from pygments.util import ClassNotFound as PygmentsClassNotFound
import re
class BabycodeParseResult:
BABYCODE_VERSION = 7
class BabycodeError(Exception):
pass
class BabycodeRenderError(BabycodeError):
pass
class UnknownASTElementError(BabycodeRenderError):
def __init__(self, element_type, element=None):
self.element_type = element_type
self.element = element
message = f'Unknown AST element: {element_type}'
if element:
message += f' (element: {element})'
super().__init__(message)
class BabycodeRenderResult:
def __init__(self, result, mentions=[]):
self.result = result
self.mentions = mentions
@@ -15,8 +33,177 @@ class BabycodeParseResult:
def __str__(self):
return self.result
def dumps(self):
return self.result
class BabycodeRenderer:
def __init__(self, tag_map, void_tag_map, emote_map, fragment=False):
self.tag_map = tag_map
self.void_tag_map = void_tag_map
self.emote_map = emote_map
self.fragment = fragment
def make_mention(self, element):
raise NotImplementedError
def transform_para_whitespace(self, text):
# markdown rules:
# two spaces at end of line -> <br>
text = re.sub(r' +\n', '<br>', text)
# single newlines -> space (collapsed)
text = re.sub(r'\n', ' ', text)
return text
def wrap_in_paragraphs(self, nodes, context_is_block=True, is_root=False):
result = []
current_paragraph = []
is_first_para = is_root and self.fragment
def flush_paragraph():
# TIL nonlocal exists
nonlocal result, current_paragraph, is_first_para
if not current_paragraph:
return
para_content = ''.join(current_paragraph)
if para_content.strip(): # skip empty paragraphs
if is_first_para:
result.append(para_content)
is_first_para = False
else:
result.append(f"<p>{para_content}</p>")
current_paragraph.clear()
for node in nodes:
if isinstance(node, str):
paras = re.split(r'\n\n+', node)
for i, para in enumerate(paras):
if i > 0 and context_is_block:
flush_paragraph()
if para:
processed = self.transform_para_whitespace(para)
current_paragraph.append(processed)
else:
inline = is_inline(node)
if inline and context_is_block:
# inline child within a paragraph context
current_paragraph.append(self.fold(node))
elif not inline and context_is_block:
# block child within a block context
flush_paragraph()
if is_root:
# this is relevant for fragment.
# fragment only applies to the first inline node(s).
# if the first element is a block, reset "fragment mode".
is_first_para = False
result.append(self.fold(node))
else:
# either inline in inline context, or block in inline context
current_paragraph.append(self.fold(node))
if context_is_block:
# flush final para if we're in a block context
flush_paragraph()
elif current_paragraph:
# inline context - just append whatever we collected
result.append(''.join(current_paragraph))
return ''.join(result)
def fold(self, element):
if isinstance(element, str):
return element
match element['type']:
case 'bbcode':
tag_name = element['name']
if is_inline(element):
# inline tag
# since its inline, all children should be processed inline
content = "".join(self.fold(child) for child in element['children'])
return self.tag_map[tag_name](content, element['attr'])
else:
# block tag
if tag_name in {'ul', 'ol', 'code', 'img'}:
# these handle their own internal structure
content = ''.join(
child if isinstance(child, str) else self.fold(child)
for child in element['children']
)
return self.tag_map[tag_name](content, element['attr'])
else:
# block elements that can contain paragraphs
content = self.wrap_in_paragraphs(element['children'], context_is_block=True, is_root=False)
return self.tag_map[tag_name](content, element['attr'])
case 'bbcode_void':
return self.void_tag_map[element['name']](element['attr'])
case 'link':
return f"<a href=\"{element['url']}\">{element['url']}</a>"
case 'emote':
return self.emote_map[element['name']]
case 'rule':
return '<hr>'
case 'mention':
return self.make_mention(element)
case _:
raise UnknownASTElementError(
element_type=element['type'],
element=element
)
def render(self, ast):
out = self.wrap_in_paragraphs(ast, context_is_block=True, is_root=True)
return out
class HTMLRenderer(BabycodeRenderer):
def __init__(self, fragment=False):
super().__init__(TAGS, VOID_TAGS, EMOJI, fragment)
self.mentions = []
def make_mention(self, e):
from ..models import Users
from flask import url_for, current_app
with current_app.test_request_context('/'):
target_user = Users.find({'username': e['name'].lower()})
if not target_user:
return f"@{e['name']}"
mention_data = {
'mention_text': f"@{e['name']}",
'mentioned_user_id': int(target_user.id),
"start": e['start'],
"end": e['end'],
}
if mention_data not in self.mentions:
self.mentions.append(mention_data)
return f"<a class='mention{' display' if target_user.has_display_name() else ''}' href='{url_for('users.page', username=target_user.username)}' title='@{target_user.username}' data-init='highlightMentions' data-username='{target_user.username}'>{'@' if not target_user.has_display_name() else ''}{target_user.get_readable_name()}</a>"
def render(self, ast):
out = super().render(ast)
return BabycodeRenderResult(out, self.mentions)
class RSSXMLRenderer(BabycodeRenderer):
def __init__(self, fragment=False):
super().__init__(RSS_TAGS, VOID_TAGS, RSS_EMOJI, fragment)
def make_mention(self, element):
from ..models import Users
from flask import url_for, current_app
with current_app.test_request_context('/'):
target_user = Users.find({'username': e['name'].lower()})
if not target_user:
return f"@{e['name']}"
return f'<a href="{url_for('users.page', username=target_user.username, _external=True)}" title="@{target_user.username}">{target_user.get_readable_name()}</a>'
BABYCODE_VERSION = 5
NAMED_COLORS = [
'black', 'silver', 'gray', 'white', 'maroon', 'red',
@@ -49,111 +236,6 @@ NAMED_COLORS = [
'violet', 'wheat', 'white', 'whitesmoke', 'yellow', 'yellowgreen',
]
def is_tag(e, tag=None):
if e is None:
return False
if isinstance(e, str):
return False
if e['type'] != 'bbcode':
return False
if tag is None:
return True
return e['name'] == tag
def is_text(e):
return isinstance(e, str)
def tag_code(children, attr, surrounding):
is_inline = children.find('\n') == -1
if is_inline:
return f"<code class=\"inline-code\">{children}</code>"
else:
input_code = children.strip()
button = f"<button type=button class=\"copy-code\" value=\"{input_code}\" data-send=\"copyCode\" data-receive=\"copyCode\">Copy</button>"
unhighlighted = f"<pre><span class=\"copy-code-container\"><span class=\"code-language-identifier\">code block</span>{button}</span><code>{input_code}</code></pre>"
if not attr:
return unhighlighted
try:
lexer = get_lexer_by_name(attr.strip())
formatter = HtmlFormatter(nowrap=True)
return f"<pre><span class=\"copy-code-container\"><span class=\"code-language-identifier\">{lexer.name}</span>{button}</span><code>{highlight(input_code.unescape(), lexer, formatter)}</code></pre>"
except PygmentsClassNotFound:
return unhighlighted
def tag_list(children):
list_body = re.sub(r" +\n", "<br>", children.strip())
list_body = re.sub(r"\n\n+", "\1", list_body)
return " ".join([f"<li>{x}</li>" for x in list_body.split("\1") if x])
def tag_color(children, attr, surrounding):
if not attr:
return f"[color]{children}[/color]"
hex_re = r"^#?([0-9a-f]{6}|[0-9a-f]{3})$"
potential_color = attr.lower().strip()
if potential_color in NAMED_COLORS:
return f"<span style='color: {potential_color};'>{children}</span>"
m = re.match(hex_re, potential_color)
if m:
return f"<span style='color: #{m.group(1)};'>{children}</span>"
# return just the way it was if we can't parse it
return f"[color={attr}]{children}[/color]"
def tag_spoiler(children, attr, surrounding):
spoiler_name = attr if attr else "Spoiler"
content = f"<div class='accordion-content post-accordion-content hidden'>{children}</div>"
container = f"""<div class='accordion hidden' data-receive='toggleAccordion'><div class='accordion-header'><button type='button' class='accordion-toggle' data-send='toggleAccordion'>+</button><span>{spoiler_name}</span></div>{content}</div>"""
return container
def tag_image(children, attr, surrounding):
img = f"<img class=\"post-image\" src=\"{attr}\" alt=\"{children}\">"
if not is_tag(surrounding[0], 'img'):
img = f"<div class=post-img-container>{img}"
if not is_tag(surrounding[1], 'img'):
img = f"{img}</div>"
return img
TAGS = {
"b": lambda children, attr, _: f"<strong>{children}</strong>",
"i": lambda children, attr, _: f"<em>{children}</em>",
"s": lambda children, attr, _: f"<del>{children}</del>",
"u": lambda children, attr, _: f"<u>{children}</u>",
"img": tag_image,
"url": lambda children, attr, _: f"<a href={attr}>{children}</a>",
"quote": lambda children, attr, _: f"<blockquote>{children}</blockquote>",
"code": tag_code,
"ul": lambda children, attr, _: f"<ul>{tag_list(children)}</ul>",
"ol": lambda children, attr, _: f"<ol>{tag_list(children)}</ol>",
"big": lambda children, attr, _: f"<span style='font-size: 2rem;'>{children}</span>",
"small": lambda children, attr, _: f"<span style='font-size: 0.75rem;'>{children}</span>",
"color": tag_color,
"center": lambda children, attr, _: f"<div style='text-align: center;'>{children}</div>",
"right": lambda children, attr, _: f"<div style='text-align: right;'>{children}</div>",
"spoiler": tag_spoiler,
}
VOID_TAGS = {
'lb': lambda attr: '[',
'rb': lambda attr: ']',
'@': lambda attr: '@',
}
# [img] is considered block for the purposes of collapsing whitespace,
# despite being potentially inline (since the resulting <img> tag is inline, but creates a block container around itself and sibling images).
# [code] has a special case in is_inline().
INLINE_TAGS = {
'b', 'i', 's', 'u', 'color', 'big', 'small', 'url'
}
def make_emoji(name, code):
return f' <img class=emoji src="/static/emoji/{name}.png" alt="{name}" title=":{code}:">'
@@ -203,12 +285,173 @@ EMOJI = {
'wink': make_emoji('wink', 'wink'),
}
RSS_EMOJI = {
**EMOJI,
'angry': '😡',
'(': '🙁',
'D': '😃',
'imp': '😈',
'angryimp': '👿',
'impangry': '👿',
'lobster': '🦞',
'|': '😐',
'pensive': '😔',
'scissors': '✂️',
')': '🙂',
'smiletear': '🥲',
'crytear': '🥲',
',': '😭',
'T': '😭',
'cry': '😭',
'sob': '😭',
'o': '😮',
'O': '😮',
'hmm': '🤔',
'think': '🤔',
'thinking': '🤔',
'P': '😛',
'p': '😛',
'weary': '😩',
';': '😉',
'wink': '😉',
}
TEXT_ONLY = ["code"]
def break_lines(text):
text = re.sub(r" +\n", "<br>", text)
text = re.sub(r"\n\n+", "<br><br>", text)
return text
def tag_code(children, attr):
is_inline = children.find('\n') == -1
if is_inline:
return f"<code class=\"inline-code\">{children}</code>"
else:
input_code = children.strip()
button = f"<button type=button class=\"copy-code\" value=\"{input_code}\" data-send=\"copyCode\" data-receive=\"copyCode\">Copy</button>"
unhighlighted = f"<pre><span class=\"copy-code-container\"><span class=\"code-language-identifier\">code block</span>{button}</span><code>{input_code}</code></pre>"
if not attr:
return unhighlighted
try:
lexer = get_lexer_by_name(attr.strip())
formatter = HtmlFormatter(nowrap=True)
return f"<pre><span class=\"copy-code-container\"><span class=\"code-language-identifier\">{lexer.name}</span>{button}</span><code>{highlight(Markup(input_code).unescape(), lexer, formatter)}</code></pre>"
except PygmentsClassNotFound:
return unhighlighted
def tag_list(children):
list_body = re.sub(r" +\n", "<br>", children.strip())
list_body = re.sub(r"\n\n+", "\1", list_body)
return " ".join([f"<li>{x}</li>" for x in list_body.split("\1") if x])
def tag_color(children, attr):
if not attr:
return f"[color]{children}[/color]"
hex_re = r"^#?([0-9a-f]{6}|[0-9a-f]{3})$"
potential_color = attr.lower().strip()
if potential_color in NAMED_COLORS:
return f"<span style='color: {potential_color};'>{children}</span>"
m = re.match(hex_re, potential_color)
if m:
return f"<span style='color: #{m.group(1)};'>{children}</span>"
# return just the way it was if we can't parse it
return f"[color={attr}]{children}[/color]"
def tag_spoiler(children, attr):
spoiler_name = attr if attr else "Spoiler"
content = f"<div class='accordion-content post-accordion-content hidden'>{children}</div>"
container = f"""<div class='accordion hidden' data-receive='toggleAccordion'><div class='accordion-header'><button type='button' class='accordion-toggle' data-send='toggleAccordion'>+</button><span>{spoiler_name}</span></div>{content}</div>"""
return container
def tag_image(children, attr):
img = f"<img class=\"post-image\" src=\"{attr}\" alt=\"{children}\">"
return f"<div class=post-img-container>{img}</div>"
TAGS = {
"b": lambda children, attr: f"<strong>{children}</strong>",
"i": lambda children, attr: f"<em>{children}</em>",
"s": lambda children, attr: f"<del>{children}</del>",
"u": lambda children, attr: f"<u>{children}</u>",
"img": tag_image,
"url": lambda children, attr: f"<a href={attr}>{children}</a>",
"quote": lambda children, attr: f"<blockquote>{children}</blockquote>",
"code": tag_code,
"ul": lambda children, attr: f"<ul>{tag_list(children)}</ul>",
"ol": lambda children, attr: f"<ol>{tag_list(children)}</ol>",
"big": lambda children, attr: f"<span style='font-size: 2rem;'>{children}</span>",
"small": lambda children, attr: f"<span style='font-size: 0.75rem;'>{children}</span>",
"color": tag_color,
"center": lambda children, attr: f"<div style='text-align: center;'>{children}</div>",
"right": lambda children, attr: f"<div style='text-align: right;'>{children}</div>",
"spoiler": tag_spoiler,
}
def tag_code_rss(children, attr):
is_inline = children.find('\n') == -1
if is_inline:
return f'<code>{children}</code>'
else:
return f'<pre><code>{children}</code></pre>'
RSS_TAGS = {
**TAGS,
'img': lambda children, attr: f'<img src="{attr}" alt={children} />',
'spoiler': lambda children, attr: f'<details><summary>{attr or "Spoiler"}</summary>{children}</details>',
'code': tag_code_rss,
'big': lambda children, attr: f'<span style="font-size: 1.2em">{children}</span>',
'small': lambda children, attr: f'<small>{children}</small>'
}
VOID_TAGS = {
'lb': lambda attr: '[',
'rb': lambda attr: ']',
'@': lambda attr: '@',
}
# [img] is considered block for the purposes of collapsing whitespace,
# despite being potentially inline (since the resulting <img> tag is inline, but creates a block container around itself and sibling images).
# [code] has a special case in is_inline().
INLINE_TAGS = {
'b', 'i', 's', 'u', 'color', 'big', 'small', 'url', 'lb', 'rb', '@'
}
def is_tag(e, tag=None):
if e is None:
return False
if isinstance(e, str):
return False
if e['type'] != 'bbcode' and e['type'] != 'bbcode_void':
return False
if tag is None:
return True
return e['name'] == tag
def is_text(e):
return isinstance(e, str)
def is_inline(e):
if e is None:
@@ -219,7 +462,7 @@ def is_inline(e):
if is_tag(e):
if is_tag(e, 'code'): # special case, since [code] can be inline OR block
return '\n' not in e['children']
return '\n' not in e['children'][0]
return e['name'] in INLINE_TAGS
@@ -227,21 +470,22 @@ def is_inline(e):
def make_mention(e, mentions):
from ..models import Users
from flask import url_for
target_user = Users.find({'username': e['name'].lower()})
if not target_user:
return f"@{e['name']}"
from flask import url_for, current_app
with current_app.test_request_context('/'):
target_user = Users.find({'username': e['name'].lower()})
if not target_user:
return f"@{e['name']}"
mention_data = {
'mention_text': f"@{e['name']}",
'mentioned_user_id': int(target_user.id),
"start": e['start'],
"end": e['end'],
}
if mention_data not in mentions:
mentions.append(mention_data)
mention_data = {
'mention_text': f"@{e['name']}",
'mentioned_user_id': int(target_user.id),
"start": e['start'],
"end": e['end'],
}
if mention_data not in mentions:
mentions.append(mention_data)
return f"<a class='mention{' display' if target_user.has_display_name() else ''}' href='{url_for('users.page', username=target_user.username)}' title='@{target_user.username}' data-init='highlightMentions' data-username='{target_user.username}'>{'@' if not target_user.has_display_name() else ''}{target_user.get_readable_name()}</a>"
return f"<a class='mention{' display' if target_user.has_display_name() else ''}' href='{url_for('users.page', username=target_user.username)}' title='@{target_user.username}' data-init='highlightMentions' data-username='{target_user.username}'>{'@' if not target_user.has_display_name() else ''}{target_user.get_readable_name()}</a>"
def should_collapse(text, surrounding):
if not isinstance(text, str):
@@ -255,10 +499,30 @@ def should_collapse(text, surrounding):
return False
def sanitize(s):
return escape(s.strip().replace('\r\n', '\n').replace('\r', '\n'))
def babycode_to_html(s, banned_tags=[]):
def babycode_ast(s: str, banned_tags=[]):
"""
transforms a string of babycode into an AST.
the AST is a list of strings or dicts.
a string element is plain unformatted text.
a dict element is a node that contains at least the key `type`.
possible types are:
- bbcode
- bbcode_void
- link
- emote
- rule
- mention
bbcode type elements have a children key that is a list of children of that node. the children are themselves elements (string or dict).
"""
allowed_tags = set(TAGS.keys())
if banned_tags is not None:
for tag in banned_tags:
@@ -281,44 +545,38 @@ def babycode_to_html(s, banned_tags=[]):
)
if not should_collapse(e, surrounding):
elements.append(e)
return elements
out = ""
mentions = []
def fold(element, nobr, surrounding):
if isinstance(element, str):
if nobr:
return element
return break_lines(element)
match element['type']:
case "bbcode":
c = ""
for i in range(len(element['children'])):
child = element['children'][i]
_surrounding = (
element['children'][i - 1] if i-1 >= 0 else None,
element['children'][i + 1] if i+1 < len(element['children']) else None
)
_nobr = element['name'] == "code" or element['name'] == "ul" or element['name'] == "ol"
c = c + Markup(fold(child, _nobr, _surrounding))
res = TAGS[element['name']](c, element['attr'], surrounding)
return res
case "bbcode_void":
return VOID_TAGS[element['name']](element['attr'])
case "link":
return f"<a href=\"{element['url']}\">{element['url']}</a>"
case 'emote':
return EMOJI[element['name']]
case "rule":
return "<hr>"
case "mention":
return make_mention(element, mentions)
def babycode_to_html(s: str, banned_tags=[], fragment=False):
"""
transforms a string of babycode into html.
for i in range(len(elements)):
e = elements[i]
surrounding = (
elements[i - 1] if i-1 >= 0 else None,
elements[i + 1] if i+1 < len(elements) else None
)
out = out + fold(e, False, surrounding)
return BabycodeParseResult(out, mentions)
parameters:
s (str) - babycode string
banned_tags (list) - babycode tags to exclude from being parsed. they will remain as plain text in the transformation.
fragment (bool) - skip adding an html p tag to the first element if it is inline.
"""
ast = babycode_ast(s, banned_tags)
r = HTMLRenderer(fragment=fragment)
return r.render(ast)
def babycode_to_rssxml(s: str, banned_tags=[], fragment=False):
"""
transforms a string of babycode into rss-compatible x/html.
parameters:
s (str) - babycode string
banned_tags (list) - babycode tags to exclude from being parsed. they will remain as plain text in the transformation.
fragment (bool) - skip adding an html p tag to the first element if it is inline.
"""
ast = babycode_ast(s, banned_tags)
r = RSSXMLRenderer(fragment=fragment)
return r.render(ast)