from .babycode_parser import Parser from markupsafe import Markup, escape from pygments import highlight from pygments.formatters import HtmlFormatter from pygments.lexers import get_lexer_by_name from pygments.util import ClassNotFound as PygmentsClassNotFound import re BABYCODE_VERSION = 8 class BabycodeError(Exception): pass class BabycodeRenderError(BabycodeError): pass class UnknownASTElementError(BabycodeRenderError): def __init__(self, element_type, element=None): self.element_type = element_type self.element = element message = f'Unknown AST element: {element_type}' if element: message += f' (element: {element})' super().__init__(message) class BabycodeRenderResult: def __init__(self, result, mentions=[]): self.result = result self.mentions = mentions def __str__(self): return self.result class BabycodeRenderer: def __init__(self, tag_map, void_tag_map, emote_map, fragment=False): self.tag_map = tag_map self.void_tag_map = void_tag_map self.emote_map = emote_map self.fragment = fragment def make_mention(self, element): raise NotImplementedError def transform_para_whitespace(self, text): # markdown rules: # two spaces at end of line ->
text = re.sub(r' +\n', '
', text) # single newlines -> space (collapsed) text = re.sub(r'\n', ' ', text) return text def wrap_in_paragraphs(self, nodes, context_is_block=True, is_root=False): result = [] current_paragraph = [] is_first_para = is_root and self.fragment def flush_paragraph(): # TIL nonlocal exists nonlocal result, current_paragraph, is_first_para if not current_paragraph: return para_content = ''.join(current_paragraph) if para_content.strip(): # skip empty paragraphs if is_first_para: result.append(para_content) is_first_para = False else: result.append(f"

{para_content}

") current_paragraph.clear() for node in nodes: if isinstance(node, str): paras = re.split(r'\n\n+', node) for i, para in enumerate(paras): if i > 0 and context_is_block: flush_paragraph() if para: processed = self.transform_para_whitespace(para) current_paragraph.append(processed) else: inline = is_inline(node) if inline and context_is_block: # inline child within a paragraph context current_paragraph.append(self.fold(node)) elif not inline and context_is_block: # block child within a block context flush_paragraph() if is_root: # this is relevant for fragment. # fragment only applies to the first inline node(s). # if the first element is a block, reset "fragment mode". is_first_para = False result.append(self.fold(node)) else: # either inline in inline context, or block in inline context current_paragraph.append(self.fold(node)) if context_is_block: # flush final para if we're in a block context flush_paragraph() elif current_paragraph: # inline context - just append whatever we collected result.append(''.join(current_paragraph)) return ''.join(result) def fold(self, element): if isinstance(element, str): return element match element['type']: case 'bbcode': tag_name = element['name'] if is_inline(element): # inline tag # since its inline, all children should be processed inline content = "".join(self.fold(child) for child in element['children']) return self.tag_map[tag_name](content, element['attr']) else: # block tag if tag_name in {'ul', 'ol', 'code', 'img'}: # these handle their own internal structure content = ''.join( child if isinstance(child, str) else self.fold(child) for child in element['children'] ) return self.tag_map[tag_name](content, element['attr']) else: # block elements that can contain paragraphs content = self.wrap_in_paragraphs(element['children'], context_is_block=True, is_root=False) return self.tag_map[tag_name](content, element['attr']) case 'bbcode_void': return self.void_tag_map[element['name']](element['attr']) case 'link': return f"{element['url']}" case 'emote': return self.emote_map[element['name']] case 'rule': return '

' case 'mention': return self.make_mention(element) case _: raise UnknownASTElementError( element_type=element['type'], element=element ) def render(self, ast): out = self.wrap_in_paragraphs(ast, context_is_block=True, is_root=True) return out class HTMLRenderer(BabycodeRenderer): def __init__(self, fragment=False): super().__init__(TAGS, VOID_TAGS, EMOJI, fragment) self.mentions = [] def make_mention(self, e): from ..models import Users from flask import url_for, current_app with current_app.test_request_context('/'): target_user = Users.find({'username': e['name'].lower()}) if not target_user: return f"@{e['name']}" mention_data = { 'mention_text': f"@{e['name']}", 'mentioned_user_id': int(target_user.id), "start": e['start'], "end": e['end'], } if mention_data not in self.mentions: self.mentions.append(mention_data) return f"{'@' if not target_user.has_display_name() else ''}{target_user.get_readable_name()}" def render(self, ast): out = super().render(ast) return BabycodeRenderResult(out, self.mentions) class RSSXMLRenderer(BabycodeRenderer): def __init__(self, fragment=False): super().__init__(RSS_TAGS, VOID_TAGS, RSS_EMOJI, fragment) def make_mention(self, e): from ..models import Users from flask import url_for, current_app with current_app.test_request_context('/'): target_user = Users.find({'username': e['name'].lower()}) if not target_user: return f"@{e['name']}" return f'{target_user.get_readable_name()}' NAMED_COLORS = [ 'black', 'silver', 'gray', 'white', 'maroon', 'red', 'purple', 'fuchsia', 'green', 'lime', 'olive', 'yellow', 'navy', 'blue', 'teal', 'aqua', 'aliceblue', 'antiquewhite', 'aqua', 'aquamarine', 'azure', 'beige', 'bisque', 'black', 'blanchedalmond', 'blue', 'blueviolet', 'brown', 'burlywood', 'cadetblue', 'chartreuse', 'chocolate', 'coral', 'cornflowerblue', 'cornsilk', 'crimson', 'cyan', 'aqua', 'darkblue', 'darkcyan', 'darkgoldenrod', 'darkgray', 'darkgreen', 'darkgrey', 'darkkhaki', 'darkmagenta', 'darkolivegreen', 'darkorange', 'darkorchid', 'darkred', 'darksalmon', 'darkseagreen', 'darkslateblue', 'darkslategray', 'darkslategrey', 'darkturquoise', 'darkviolet', 'deeppink', 'deepskyblue', 'dimgray', 'dimgrey', 'dodgerblue', 'firebrick', 'floralwhite', 'forestgreen', 'fuchsia', 'gainsboro', 'ghostwhite', 'gold', 'goldenrod', 'gray', 'green', 'greenyellow', 'grey', 'gray', 'honeydew', 'hotpink', 'indianred', 'indigo', 'ivory', 'khaki', 'lavender', 'lavenderblush', 'lawngreen', 'lemonchiffon', 'lightblue', 'lightcoral', 'lightcyan', 'lightgoldenrodyellow', 'lightgray', 'lightgreen', 'lightgrey', 'lightpink', 'lightsalmon', 'lightseagreen', 'lightskyblue', 'lightslategray', 'lightslategrey', 'lightsteelblue', 'lightyellow', 'lime', 'limegreen', 'linen', 'magenta', 'fuchsia', 'maroon', 'mediumaquamarine', 'mediumblue', 'mediumorchid', 'mediumpurple', 'mediumseagreen', 'mediumslateblue', 'mediumspringgreen', 'mediumturquoise', 'mediumvioletred', 'midnightblue', 'mintcream', 'mistyrose', 'moccasin', 'navajowhite', 'navy', 'oldlace', 'olive', 'olivedrab', 'orange', 'orangered', 'orchid', 'palegoldenrod', 'palegreen', 'paleturquoise', 'palevioletred', 'papayawhip', 'peachpuff', 'peru', 'pink', 'plum', 'powderblue', 'purple', 'rebeccapurple', 'red', 'rosybrown', 'royalblue', 'saddlebrown', 'salmon', 'sandybrown', 'seagreen', 'seashell', 'sienna', 'silver', 'skyblue', 'slateblue', 'slategray', 'slategrey', 'snow', 'springgreen', 'steelblue', 'tan', 'teal', 'thistle', 'tomato', 'transparent', 'turquoise', 'violet', 'wheat', 'white', 'whitesmoke', 'yellow', 'yellowgreen', ] def make_emoji(name, code): return f'

' EMOJI = { 'angry': make_emoji('angry', 'angry'), '(': make_emoji('frown', '('), 'D': make_emoji('grin', 'D'), 'imp': make_emoji('imp', 'imp'), 'angryimp': make_emoji('impangry', 'angryimp'), 'impangry': make_emoji('impangry', 'impangry'), 'lobster': make_emoji('lobster', 'lobster'), '|': make_emoji('neutral', '|'), 'pensive': make_emoji('pensive', 'pensive'), 'scissors': make_emoji('scissors', 'scissors'), ')': make_emoji('smile', ')'), 'smiletear': make_emoji('smiletear', 'smiletear'), 'crytear': make_emoji('smiletear', 'crytear'), ',': make_emoji('sob', ','), 'T': make_emoji('sob', 'T'), 'cry': make_emoji('sob', 'cry'), 'sob': make_emoji('sob', 'sob'), 'o': make_emoji('surprised', 'o'), 'O': make_emoji('surprised', 'O'), 'hmm': make_emoji('think', 'hmm'), 'think': make_emoji('think', 'think'), 'thinking': make_emoji('think', 'thinking'), 'P': make_emoji('tongue', 'P'), 'p': make_emoji('tongue', 'p'), 'weary': make_emoji('weary', 'weary'), ';': make_emoji('wink', ';'), 'wink': make_emoji('wink', 'wink'), } RSS_EMOJI = { **EMOJI, 'angry': '😡', '(': '🙁', 'D': '😃', 'imp': '😈', 'angryimp': '👿', 'impangry': '👿', 'lobster': '🦞', '|': '😐', 'pensive': '😔', 'scissors': '✂️', ')': '🙂', 'smiletear': '🥲', 'crytear': '🥲', ',': '😭', 'T': '😭', 'cry': '😭', 'sob': '😭', 'o': '😮', 'O': '😮', 'hmm': '🤔', 'think': '🤔', 'thinking': '🤔', 'P': '😛', 'p': '😛', 'weary': '😩', ';': '😉', 'wink': '😉', } TEXT_ONLY = ["code"] def tag_code(children, attr): is_inline = children.find('\n') == -1 if is_inline: return f"{children}" else: input_code = children.strip() button = f"" unhighlighted = f"

code block{button}{input_code}

" if not attr: return unhighlighted try: lexer = get_lexer_by_name(attr.strip()) formatter = HtmlFormatter(nowrap=True) return f"

{lexer.name}{button}{highlight(Markup(input_code).unescape(), lexer, formatter)}

" except PygmentsClassNotFound: return unhighlighted def tag_list(children): list_body = re.sub(r" +\n", "
", children.strip()) list_body = re.sub(r"\n\n+", "\1", list_body) return " ".join([f"

{x}

" for x in list_body.split("\1") if x]) def tag_color(children, attr): if not attr: return f"[color]{children}[/color]" hex_re = r"^#?([0-9a-f]{6}|[0-9a-f]{3})$" potential_color = attr.lower().strip() if potential_color in NAMED_COLORS: return f"{children}" m = re.match(hex_re, potential_color) if m: return f"{children}" # return just the way it was if we can't parse it return f"[color={attr}]{children}[/color]" def tag_spoiler(children, attr): spoiler_name = attr if attr else "Spoiler" content = f"" container = f"""""" return container def tag_image(children, attr): img = f" $\"{children}\"$ " return f"

{img}

" TAGS = { "b": lambda children, attr: f"{children}", "i": lambda children, attr: f"{children}", "s": lambda children, attr: f"~~{children}~~", "u": lambda children, attr: f"{children}", "img": tag_image, "url": lambda children, attr: f"{children}", "quote": lambda children, attr: f"

{children}

", "code": tag_code, "ul": lambda children, attr: f"

{tag_list(children)}", "ol": lambda children, attr: f"

{tag_list(children)}", "big": lambda children, attr: f"{children}", "small": lambda children, attr: f"{children}", "color": tag_color, "center": lambda children, attr: f"

{children}

", "right": lambda children, attr: f"

{children}

", "spoiler": tag_spoiler, } def tag_code_rss(children, attr): is_inline = children.find('\n') == -1 if is_inline: return f'{children}' else: return f'

{children}

' def tag_url_rss(children, attr): if attr.startswith('/'): from flask import current_app uri = f"{current_app.config['PREFERRED_URL_SCHEME']}://{current_app.config['SERVER_NAME']}{attr}" return f"{children}" return f"{children}" def tag_image_rss(children, attr): if attr.startswith('/'): from flask import current_app uri = f"{current_app.config['PREFERRED_URL_SCHEME']}://{current_app.config['SERVER_NAME']}{attr}" return f'

' return f'

' RSS_TAGS = { **TAGS, 'img': tag_image_rss, 'url': tag_url_rss, 'spoiler': lambda children, attr: f'

{attr or "Spoiler"} (click to reveal)

{children}

', 'code': tag_code_rss, 'big': lambda children, attr: f'{children}', 'small': lambda children, attr: f'{children}' } VOID_TAGS = { 'lb': lambda attr: '[', 'rb': lambda attr: ']', 'at': lambda attr: '@', 'd': lambda attr: '-', } # [img] is considered block for the purposes of collapsing whitespace, # despite being potentially inline (since the resulting tag is inline, but creates a block container around itself and sibling images). # [code] has a special case in is_inline(). INLINE_TAGS = { 'b', 'i', 's', 'u', 'color', 'big', 'small', 'url', 'lb', 'rb', 'at', 'd' } def is_tag(e, tag=None): if e is None: return False if isinstance(e, str): return False if e['type'] != 'bbcode' and e['type'] != 'bbcode_void': return False if tag is None: return True return e['name'] == tag def is_text(e): return isinstance(e, str) def is_inline(e): if e is None: return False # i think if is_text(e): return True if is_tag(e): if is_tag(e, 'code'): # special case, since [code] can be inline OR block return '\n' not in e['children'][0] return e['name'] in INLINE_TAGS return e['type'] != 'rule' def should_collapse(text, surrounding): if not isinstance(text, str): return False if not text: return True if not text.strip() and '\n' not in text: return not is_inline(surrounding[0]) and not is_inline(surrounding[1]) return False def sanitize(s): return escape(s.strip().replace('\r\n', '\n').replace('\r', '\n')) def babycode_ast(s: str, banned_tags=[]): """ transforms a string of babycode into an AST. the AST is a list of strings or dicts. a string element is plain unformatted text. a dict element is a node that contains at least the key `type`. possible types are: - bbcode - bbcode_void - link - emote - rule - mention bbcode type elements have a children key that is a list of children of that node. the children are themselves elements (string or dict). """ allowed_tags = set(TAGS.keys()) if banned_tags is not None: for tag in banned_tags: allowed_tags.discard(tag) subj = sanitize(s) parser = Parser(subj) parser.valid_bbcode_tags = allowed_tags parser.void_bbcode_tags = set(VOID_TAGS) parser.bbcode_tags_only_text_children = TEXT_ONLY parser.mentions_allowed = '@mention' not in banned_tags parser.valid_emotes = EMOJI.keys() uncollapsed = parser.parse() elements = [] for i in range(len(uncollapsed)): e = uncollapsed[i] surrounding = ( uncollapsed[i - 1] if i-1 >= 0 else None, uncollapsed[i + 1] if i+1 < len(uncollapsed) else None ) if not should_collapse(e, surrounding): elements.append(e) return elements def babycode_to_html(s: str, banned_tags=[], fragment=False) -> BabycodeRenderResult: """ transforms a string of babycode into html. parameters: s (str) - babycode string banned_tags (list) - babycode tags to exclude from being parsed. they will remain as plain text in the transformation. fragment (bool) - skip adding an html p tag to the first element if it is inline. """ ast = babycode_ast(s, banned_tags) r = HTMLRenderer(fragment=fragment) return r.render(ast) def babycode_to_rssxml(s: str, banned_tags=[], fragment=False) -> str: """ transforms a string of babycode into rss-compatible x/html. parameters: s (str) - babycode string banned_tags (list) - babycode tags to exclude from being parsed. they will remain as plain text in the transformation. fragment (bool) - skip adding an html p tag to the first element if it is inline. """ ast = babycode_ast(s, banned_tags) r = RSSXMLRenderer(fragment=fragment) return r.render(ast)