diff --git a/app/lib/babycode_parser.py b/app/lib/babycode_parser.py new file mode 100644 index 0000000..4a2b8c3 --- /dev/null +++ b/app/lib/babycode_parser.py @@ -0,0 +1,241 @@ +# originally written in lua by kaesa + +import re + +PAT_EMOTE = r"[^\s:]" +PAT_BBCODE_TAG = r"\w" +PAT_BBCODE_ATTR = r"[^\s\]]" +PAT_LINK = r"https?:\/\/[\w\-_.?:\/=&~@#%]+[\w\-\/]" + +class Parser: + def __init__(self, src_str): + self.valid_bbcode_tags = [] + self.valid_emotes = [] + self.bbcode_tags_only_text_children = [], + self.source = src_str + self.position = 0 + self.position_stack = [] + + + def advance(self, count = 1): + self.position += count + + + def is_end_of_source(self, offset = 0): + return self.position + offset >= len(self.source) + + + def save_position(self): + self.position_stack.append(self.position) + + + def restore_position(self): + self.position = self.position_stack.pop() + + + def forget_position(self): + self.position_stack.pop() + + + def peek_char(self, offset = 0): + if self.is_end_of_source(offset): + return "" + return self.source[self.position + offset] + + + def get_char(self): + char = self.peek_char() + self.advance() + return char + + + def check_char(self, wanted): + char = self.peek_char() + + if char == wanted: + self.advance() + return True + + return False + + + def check_str(self, wanted): + self.save_position() + + # for each char in wanted + for i in range(len(wanted)): + if not self.check_char(wanted[i]): + self.restore_position() + return False + + self.forget_position() + return True + + + def match_pattern(self, pattern): + buf = "" + while not self.is_end_of_source(): + ch = self.peek_char() + + if not re.match(pattern, ch): + break + + self.advance() + buf = buf + ch + + return buf + + + def parse_emote(self): + self.save_position() + + if not self.check_char(":"): + self.restore_position() + return None + + name = self.match_pattern(PAT_EMOTE) + + if not self.check_char(":"): + self.restore_position() + return None + + if not name in self.valid_emotes: + self.restore_position() + return None + + self.forget_position() + return { + "type": "emote", + "name": name + } + + + def parse_bbcode_open(self): + self.save_position() + + if not self.check_char("["): + self.restore_position() + return None, None + + name = self.match_pattern(PAT_BBCODE_TAG) + + if name == "": + self.restore_position() + return None, None + + attr = None + + if self.check_char("="): + attr = self.match_pattern(PAT_BBCODE_ATTR) + + if not self.check_char("]"): + self.restore_position() + return None, None + + if not name in self.valid_bbcode_tags: + self.restore_position() + return None, None + + self.forget_position() + return name, attr + + + def parse_bbcode(self): + self.save_position() + + name, attr = self.parse_bbcode_open() + + if name is None: + self.restore_position() + return None + + children = [] + + while not self.is_end_of_source(): + if self.check_str(f"[/{name}]"): + break + + if name in self.bbcode_tags_only_text_children: + ch = self.get_char() + + if len(children) == 0: + children.append(ch) + else: + children[1] = children[1] + ch + else: + element = self.parse_element(children) + + if element is None: + self.restore_position() + return None + + children.append(element) + + self.forget_position() + return { + "type": "bbcode", + "name": name, + "attr": attr, + "children": children, + } + + + def parse_rule(self): + if not self.check_str("---"): + return None + + return { + "type": "rule" + } + + + def parse_link(self): + self.save_position() + + # extract printable chars (extreme hack edition) + word = self.match_pattern(r'[ -~]') + + if not re.match(PAT_LINK, word): + self.restore_position() + return None + + self.forget_position() + return { + "type": "link", + "url": word + } + + + def parse_element(self, siblings): + if self.is_end_of_source(): + return None + + element = self.parse_emote() \ + or self.parse_bbcode() \ + or self.parse_rule() \ + or self.parse_link() + + if element is None: + if len(siblings) > 0: + last = siblings[-1] + + if isinstance(last, str): + siblings.pop() + return last + self.get_char() + + return self.get_char() + + return element + + + def parse(self): + elements = [] + + while True: + element = self.parse_element(elements) + if element is None: + break + + elements.append(element) + + return elements