porom/lib/babycode-parser.lua

417 lines
9.2 KiB
Lua

-- contributed by kaesa
--- Pattern used for emote names (applied for every char).
local PAT_EMOTE = "[^%s:]"
--- Pattern used for bbcode tags (applied for every char).
local PAT_BBCODE_TAG = "%w"
--- Pattern used for bbcode tag attribute (applied for every char).
local PAT_BBCODE_ATTR = "[^%s%]]"
--- Pattern used to detect loose links.
local PAT_LINK = "https?://[%w-_%.%?%.:/%+=&~%@#%%]+[%w-/]"
--- @class Parser
--- @field valid_bbcode_tags table Table of valid BBCode tags.
--- @field valid_emotes table Table of valid emotes.
--- @field bbcode_tags_only_text_children table Table of tags that might only containt text.
--- @field source string Source to parse.
--- @field position integer Current position of the parser.
--- @field position_stack integer[] Position stack used for rewind parsing.
---
--- Parser class.
local Parser = {}
--- Creates a new parser.
---
--- @param src string
--- @return Parser
function Parser.new(src)
local inst = {
valid_bbcode_tags = {},
valid_emotes = {},
bbcode_tags_only_text_children = {},
source = src,
position = 1,
elements = {},
position_stack = {}
}
setmetatable(inst, { __index = Parser })
return inst
end
--- Advances the parser by COUNT characters.
--- @param count integer? Set to 1 if nil.
function Parser:advance(count)
count = count or 1
self.position = self.position + count
end
--- Checks if the position is out of bounds of the source.
--- @param offset integer? Set to 0 if nil.
function Parser:is_end_of_source(offset)
offset = offset or 0
return self.position + offset > #self.source
end
--- Saves the current position to the position stack.
function Parser:save_position()
table.insert(self.position_stack, self.position)
end
--- Restores the current position to the top of the position stack, and remove
--- that position from the stack.
function Parser:restore_position()
self.position = table.remove(self.position_stack)
end
--- Forgets the top position in the position stack.
function Parser:forget_position()
table.remove(self.position_stack)
end
--- Retreives the character at the current position (plus optional offset).
---
--- @param offset integer? Set to 0 if nil.
--- @return string
function Parser:peek_char(offset)
offset = offset or 0
-- if the offset is out of bound
if self:is_end_of_source(offset) then
return ""
end
return self.source:sub(self.position + offset, self.position + offset)
end
--- Retreives the character at the current position and advance the position.
---
--- @return string
function Parser:get_char()
local char = self:peek_char()
self:advance()
return char
end
--- Checks if the character at the current current position is WANTED. If so,
--- advance the position, and returns true. Do nothing otherwise and returns
--- false.
---
--- @param wanted string The character to check with.
--- @return boolean
function Parser:check_char(wanted)
local char = self:peek_char()
if char == wanted then
self:advance()
return true
end
return false
end
--- Checks if WANTED is present at the current position in the source. If so,
--- advance the position and returns true. Do nothing otherwise and returns
--- false.
---
--- @param wanted string
--- @return boolean
---
function Parser:check_str(wanted)
self:save_position()
-- For each character in WANTED
for i = 1, #wanted do
-- Checks if the character is present
if not self:check_char(wanted:sub(i, i)) then
self:restore_position()
return false
end
end
self:forget_position()
return true
end
--- Checks if the string at the current position matches the given pattern.
--- The pattern is matched for each character in a sequence. Returns the matched
--- string. Advances the position of the parser.
---
--- @param pattern string
--- @return string
---
function Parser:match_pattern(pattern)
local buffer = ""
while not self:is_end_of_source() do
local ch = self:peek_char()
if not ch:match(pattern) then
break
end
self:advance()
buffer = buffer .. ch
end
return buffer
end
--- Tries to parse an emote. Only recognizes emotes present in the `valid_emotes`
--- field of the parser.
---
--- Format of the table :
--- { type = "emote",
--- name = string }
---
--- @return table?
function Parser:parse_emote()
self:save_position()
-- if there is no beginning ":"
if not self:check_char(":") then
self:restore_position()
return nil
end
-- extract the emote name
local name = self:match_pattern(PAT_EMOTE)
-- if there is no ending ":"
if not self:check_char(":") then
self:restore_position()
return nil
end
-- if the emote name isnt valid
if not self.valid_emotes[name] then
self:restore_position()
return nil
end
self:forget_position()
return {
type = "emote",
name = name
}
end
--- Tries to parse a bbcode openning tag. Only recognizes tags present in
--- `valid_bbcode_tags` field of the parser.
---
--- Returns the name of the tag, and its attribute (if any present).
---
--- @return string?, string?
function Parser:parse_bbcode_open()
self:save_position()
-- if there is no beginning "["
if not self:check_char("[") then
self:restore_position()
return nil
end
-- extract the tag name
local name = self:match_pattern(PAT_BBCODE_TAG)
-- if there is no tag name
if name == "" then
self:restore_position()
return nil
end
local attribute = nil
-- if there is an attribute given
if self:check_char("=") then
-- extract it
attribute = self:match_pattern(PAT_BBCODE_ATTR)
end
-- if there is no closing "]"
if not self:check_char("]") then
self:restore_position()
return nil
end
-- if the tag isnt valid
if not self.valid_bbcode_tags[name] then
self:restore_position()
return nil
end
self:forget_position()
return name, attribute
end
--- Tries to parse a bbcode tag. Only recognizes tags present in `valid_bbcode_tags`
--- field of the parser.
---
--- Format of the table :
--- { type = "bbcode",
--- name = string,
--- attribute = string?,
--- children = (string|table)[] }
---
--- @return table?
function Parser:parse_bbcode()
self:save_position()
local name, attribute = self:parse_bbcode_open()
-- if there isnt a open bbcode tag here
if name == nil then
self:restore_position()
return nil
end
local children = {}
-- parse children elements of that tag
while not self:is_end_of_source() do
-- if there is a close tag here
if self:check_str("[/" .. name .. "]") then
break
end
-- if that tag only accept text children
if self.bbcode_tags_only_text_children[name] then
local ch = self:get_char()
if #children == 0 then
table.insert(children, ch)
else
children[1] = children[1] .. ch
end
else
local element = self:parse_element(children)
-- if the end of the source has been reached
if element == nil then
self:restore_position()
return nil
end
table.insert(children, element)
end
end
self:forget_position()
return {
type = "bbcode",
name = name,
attribute = attribute,
children = children
}
end
--- Tries to parse a ruler element.
---
--- Format of the table :
--- { type = "ruler" }
---
--- @return table?
function Parser:parse_ruler()
if not self:check_str("---") then
return nil
end
return {
type = "ruler",
}
end
--- Tries to parse a loose link.
---
--- Format of the table :
--- { type = "link",
--- url = string }
---
--- @return table?
function Parser:parse_link()
self:save_position()
-- we extract a "word" (bunch of printable characters without spaces).
local word = self:match_pattern("%g")
-- if that "word" matches the link pattern
if not word:match(PAT_LINK) then
self:restore_position()
return nil
end
self:forget_position()
return {
type = "link",
url = word,
}
end
--- Tries to parse an element.
---
--- Returns either a table or a string.
--- A string represent simple text.
--- A table represent different kind of element that can be differienciated
--- by its `type` field.
---
--- Valid types : emote, bbcode, link, ruler.
--- Each type has different fields. See `Parser:parse_*` functions for more
--- info.
---
--- Returns nil when the end of the source has been reached.
---
--- @param sibblings (string|table)[]
--- @return (table|string)?
function Parser:parse_element(sibblings)
if self:is_end_of_source() then
return nil
end
local element = self:parse_emote()
or self:parse_bbcode()
or self:parse_ruler()
or self:parse_link()
if element == nil then
if #sibblings > 0 then
local last = sibblings[#sibblings]
if type(last) == "string" then
table.remove(sibblings)
return last .. self:get_char()
end
end
return self:get_char()
end
return element
end
--- Parses the whole source at once, returning all parsed elements.
--- See `Parser:parse_element` for more information about the return value.
---
--- @return (string|table)[]
function Parser:parse()
local elements = {}
while true do
local element = self:parse_element(elements)
if element == nil then
break
end
table.insert(elements, element)
end
return elements
end
return Parser