-- contributed by kaesa --- Pattern used for emote names (applied for every char). local PAT_EMOTE = "[^%s:]" --- Pattern used for bbcode tags (applied for every char). local PAT_BBCODE_TAG = "%w" --- Pattern used for bbcode tag attribute (applied for every char). local PAT_BBCODE_ATTR = "[^%s%]]" --- Pattern used to detect loose links. local PAT_LINK = "https?://[%w-_%.%?%.:/%+=&~%@#%%]+[%w-/]" --- @class Parser --- @field valid_bbcode_tags table Table of valid BBCode tags. --- @field valid_emotes table Table of valid emotes. --- @field bbcode_tags_only_text_children table Table of tags that might only containt text. --- @field source string Source to parse. --- @field position integer Current position of the parser. --- @field position_stack integer[] Position stack used for rewind parsing. --- --- Parser class. local Parser = {} --- Creates a new parser. --- --- @param src string --- @return Parser function Parser.new(src) local inst = { valid_bbcode_tags = {}, valid_emotes = {}, bbcode_tags_only_text_children = {}, source = src, position = 1, elements = {}, position_stack = {} } setmetatable(inst, { __index = Parser }) return inst end --- Advances the parser by COUNT characters. --- @param count integer? Set to 1 if nil. function Parser:advance(count) count = count or 1 self.position = self.position + count end --- Checks if the position is out of bounds of the source. --- @param offset integer? Set to 0 if nil. function Parser:is_end_of_source(offset) offset = offset or 0 return self.position + offset > #self.source end --- Saves the current position to the position stack. function Parser:save_position() table.insert(self.position_stack, self.position) end --- Restores the current position to the top of the position stack, and remove --- that position from the stack. function Parser:restore_position() self.position = table.remove(self.position_stack) end --- Forgets the top position in the position stack. function Parser:forget_position() table.remove(self.position_stack) end --- Retreives the character at the current position (plus optional offset). --- --- @param offset integer? Set to 0 if nil. --- @return string function Parser:peek_char(offset) offset = offset or 0 -- if the offset is out of bound if self:is_end_of_source(offset) then return "" end return self.source:sub(self.position + offset, self.position + offset) end --- Retreives the character at the current position and advance the position. --- --- @return string function Parser:get_char() local char = self:peek_char() self:advance() return char end --- Checks if the character at the current current position is WANTED. If so, --- advance the position, and returns true. Do nothing otherwise and returns --- false. --- --- @param wanted string The character to check with. --- @return boolean function Parser:check_char(wanted) local char = self:peek_char() if char == wanted then self:advance() return true end return false end --- Checks if WANTED is present at the current position in the source. If so, --- advance the position and returns true. Do nothing otherwise and returns --- false. --- --- @param wanted string --- @return boolean --- function Parser:check_str(wanted) self:save_position() -- For each character in WANTED for i = 1, #wanted do -- Checks if the character is present if not self:check_char(wanted:sub(i, i)) then self:restore_position() return false end end self:forget_position() return true end --- Checks if the string at the current position matches the given pattern. --- The pattern is matched for each character in a sequence. Returns the matched --- string. Advances the position of the parser. --- --- @param pattern string --- @return string --- function Parser:match_pattern(pattern) local buffer = "" while not self:is_end_of_source() do local ch = self:peek_char() if not ch:match(pattern) then break end self:advance() buffer = buffer .. ch end return buffer end --- Tries to parse an emote. Only recognizes emotes present in the `valid_emotes` --- field of the parser. --- --- Format of the table : --- { type = "emote", --- name = string } --- --- @return table? function Parser:parse_emote() self:save_position() -- if there is no beginning ":" if not self:check_char(":") then self:restore_position() return nil end -- extract the emote name local name = self:match_pattern(PAT_EMOTE) -- if there is no ending ":" if not self:check_char(":") then self:restore_position() return nil end -- if the emote name isnt valid if not self.valid_emotes[name] then self:restore_position() return nil end self:forget_position() return { type = "emote", name = name } end --- Tries to parse a bbcode openning tag. Only recognizes tags present in --- `valid_bbcode_tags` field of the parser. --- --- Returns the name of the tag, and its attribute (if any present). --- --- @return string?, string? function Parser:parse_bbcode_open() self:save_position() -- if there is no beginning "[" if not self:check_char("[") then self:restore_position() return nil end -- extract the tag name local name = self:match_pattern(PAT_BBCODE_TAG) -- if there is no tag name if name == "" then self:restore_position() return nil end local attribute = nil -- if there is an attribute given if self:check_char("=") then -- extract it attribute = self:match_pattern(PAT_BBCODE_ATTR) end -- if there is no closing "]" if not self:check_char("]") then self:restore_position() return nil end -- if the tag isnt valid if not self.valid_bbcode_tags[name] then self:restore_position() return nil end self:forget_position() return name, attribute end --- Tries to parse a bbcode tag. Only recognizes tags present in `valid_bbcode_tags` --- field of the parser. --- --- Format of the table : --- { type = "bbcode", --- name = string, --- attribute = string?, --- children = (string|table)[] } --- --- @return table? function Parser:parse_bbcode() self:save_position() local name, attribute = self:parse_bbcode_open() -- if there isnt a open bbcode tag here if name == nil then self:restore_position() return nil end local children = {} -- parse children elements of that tag while not self:is_end_of_source() do -- if there is a close tag here if self:check_str("[/" .. name .. "]") then break end -- if that tag only accept text children if self.bbcode_tags_only_text_children[name] then local ch = self:get_char() if #children == 0 then table.insert(children, ch) else children[1] = children[1] .. ch end else local element = self:parse_element(children) -- if the end of the source has been reached if element == nil then self:restore_position() return nil end table.insert(children, element) end end self:forget_position() return { type = "bbcode", name = name, attribute = attribute, children = children } end --- Tries to parse a ruler element. --- --- Format of the table : --- { type = "ruler" } --- --- @return table? function Parser:parse_ruler() if not self:check_str("---") then return nil end return { type = "ruler", } end --- Tries to parse a loose link. --- --- Format of the table : --- { type = "link", --- url = string } --- --- @return table? function Parser:parse_link() self:save_position() -- we extract a "word" (bunch of printable characters without spaces). local word = self:match_pattern("%g") -- if that "word" matches the link pattern if not word:match(PAT_LINK) then self:restore_position() return nil end self:forget_position() return { type = "link", url = word, } end --- Tries to parse an element. --- --- Returns either a table or a string. --- A string represent simple text. --- A table represent different kind of element that can be differienciated --- by its `type` field. --- --- Valid types : emote, bbcode, link, ruler. --- Each type has different fields. See `Parser:parse_*` functions for more --- info. --- --- Returns nil when the end of the source has been reached. --- --- @param sibblings (string|table)[] --- @return (table|string)? function Parser:parse_element(sibblings) if self:is_end_of_source() then return nil end local element = self:parse_emote() or self:parse_bbcode() or self:parse_ruler() or self:parse_link() if element == nil then if #sibblings > 0 then local last = sibblings[#sibblings] if type(last) == "string" then table.remove(sibblings) return last .. self:get_char() end end return self:get_char() end return element end --- Parses the whole source at once, returning all parsed elements. --- See `Parser:parse_element` for more information about the return value. --- --- @return (string|table)[] function Parser:parse() local elements = {} while true do local element = self:parse_element(elements) if element == nil then break end table.insert(elements, element) end return elements end return Parser