417 lines
9.2 KiB
Lua
417 lines
9.2 KiB
Lua
-- contributed by kaesa
|
|
|
|
--- Pattern used for emote names (applied for every char).
|
|
local PAT_EMOTE = "[^%s:]"
|
|
--- Pattern used for bbcode tags (applied for every char).
|
|
local PAT_BBCODE_TAG = "%w"
|
|
--- Pattern used for bbcode tag attribute (applied for every char).
|
|
local PAT_BBCODE_ATTR = "[^%s%]]"
|
|
--- Pattern used to detect loose links.
|
|
local PAT_LINK = "https?://[%w-_%.%?%.:/%+=&~%@#%%]+[%w-/]"
|
|
|
|
|
|
|
|
--- @class Parser
|
|
--- @field valid_bbcode_tags table Table of valid BBCode tags.
|
|
--- @field valid_emotes table Table of valid emotes.
|
|
--- @field bbcode_tags_only_text_children table Table of tags that might only containt text.
|
|
--- @field source string Source to parse.
|
|
--- @field position integer Current position of the parser.
|
|
--- @field position_stack integer[] Position stack used for rewind parsing.
|
|
---
|
|
--- Parser class.
|
|
local Parser = {}
|
|
|
|
|
|
|
|
--- Creates a new parser.
|
|
---
|
|
--- @param src string
|
|
--- @return Parser
|
|
function Parser.new(src)
|
|
local inst = {
|
|
valid_bbcode_tags = {},
|
|
valid_emotes = {},
|
|
bbcode_tags_only_text_children = {},
|
|
source = src,
|
|
position = 1,
|
|
elements = {},
|
|
position_stack = {}
|
|
}
|
|
|
|
setmetatable(inst, { __index = Parser })
|
|
|
|
return inst
|
|
end
|
|
|
|
--- Advances the parser by COUNT characters.
|
|
--- @param count integer? Set to 1 if nil.
|
|
function Parser:advance(count)
|
|
count = count or 1
|
|
self.position = self.position + count
|
|
end
|
|
|
|
--- Checks if the position is out of bounds of the source.
|
|
--- @param offset integer? Set to 0 if nil.
|
|
function Parser:is_end_of_source(offset)
|
|
offset = offset or 0
|
|
return self.position + offset > #self.source
|
|
end
|
|
|
|
--- Saves the current position to the position stack.
|
|
function Parser:save_position()
|
|
table.insert(self.position_stack, self.position)
|
|
end
|
|
|
|
--- Restores the current position to the top of the position stack, and remove
|
|
--- that position from the stack.
|
|
function Parser:restore_position()
|
|
self.position = table.remove(self.position_stack)
|
|
end
|
|
|
|
--- Forgets the top position in the position stack.
|
|
function Parser:forget_position()
|
|
table.remove(self.position_stack)
|
|
end
|
|
|
|
--- Retreives the character at the current position (plus optional offset).
|
|
---
|
|
--- @param offset integer? Set to 0 if nil.
|
|
--- @return string
|
|
function Parser:peek_char(offset)
|
|
offset = offset or 0
|
|
|
|
-- if the offset is out of bound
|
|
if self:is_end_of_source(offset) then
|
|
return ""
|
|
end
|
|
|
|
return self.source:sub(self.position + offset, self.position + offset)
|
|
end
|
|
|
|
--- Retreives the character at the current position and advance the position.
|
|
---
|
|
--- @return string
|
|
function Parser:get_char()
|
|
local char = self:peek_char()
|
|
self:advance()
|
|
return char
|
|
end
|
|
|
|
--- Checks if the character at the current current position is WANTED. If so,
|
|
--- advance the position, and returns true. Do nothing otherwise and returns
|
|
--- false.
|
|
---
|
|
--- @param wanted string The character to check with.
|
|
--- @return boolean
|
|
function Parser:check_char(wanted)
|
|
local char = self:peek_char()
|
|
|
|
if char == wanted then
|
|
self:advance()
|
|
return true
|
|
end
|
|
|
|
return false
|
|
end
|
|
|
|
--- Checks if WANTED is present at the current position in the source. If so,
|
|
--- advance the position and returns true. Do nothing otherwise and returns
|
|
--- false.
|
|
---
|
|
--- @param wanted string
|
|
--- @return boolean
|
|
---
|
|
function Parser:check_str(wanted)
|
|
self:save_position()
|
|
|
|
-- For each character in WANTED
|
|
for i = 1, #wanted do
|
|
-- Checks if the character is present
|
|
if not self:check_char(wanted:sub(i, i)) then
|
|
self:restore_position()
|
|
return false
|
|
end
|
|
end
|
|
|
|
self:forget_position()
|
|
return true
|
|
end
|
|
|
|
--- Checks if the string at the current position matches the given pattern.
|
|
--- The pattern is matched for each character in a sequence. Returns the matched
|
|
--- string. Advances the position of the parser.
|
|
---
|
|
--- @param pattern string
|
|
--- @return string
|
|
---
|
|
function Parser:match_pattern(pattern)
|
|
local buffer = ""
|
|
|
|
while not self:is_end_of_source() do
|
|
local ch = self:peek_char()
|
|
|
|
if not ch:match(pattern) then
|
|
break
|
|
end
|
|
|
|
self:advance()
|
|
buffer = buffer .. ch
|
|
end
|
|
|
|
return buffer
|
|
end
|
|
|
|
--- Tries to parse an emote. Only recognizes emotes present in the `valid_emotes`
|
|
--- field of the parser.
|
|
---
|
|
--- Format of the table :
|
|
--- { type = "emote",
|
|
--- name = string }
|
|
---
|
|
--- @return table?
|
|
function Parser:parse_emote()
|
|
self:save_position()
|
|
|
|
-- if there is no beginning ":"
|
|
if not self:check_char(":") then
|
|
self:restore_position()
|
|
return nil
|
|
end
|
|
|
|
-- extract the emote name
|
|
local name = self:match_pattern(PAT_EMOTE)
|
|
|
|
-- if there is no ending ":"
|
|
if not self:check_char(":") then
|
|
self:restore_position()
|
|
return nil
|
|
end
|
|
|
|
-- if the emote name isnt valid
|
|
if not self.valid_emotes[name] then
|
|
self:restore_position()
|
|
return nil
|
|
end
|
|
|
|
self:forget_position()
|
|
return {
|
|
type = "emote",
|
|
name = name
|
|
}
|
|
end
|
|
|
|
--- Tries to parse a bbcode openning tag. Only recognizes tags present in
|
|
--- `valid_bbcode_tags` field of the parser.
|
|
---
|
|
--- Returns the name of the tag, and its attribute (if any present).
|
|
---
|
|
--- @return string?, string?
|
|
function Parser:parse_bbcode_open()
|
|
self:save_position()
|
|
|
|
-- if there is no beginning "["
|
|
if not self:check_char("[") then
|
|
self:restore_position()
|
|
return nil
|
|
end
|
|
|
|
-- extract the tag name
|
|
local name = self:match_pattern(PAT_BBCODE_TAG)
|
|
|
|
-- if there is no tag name
|
|
if name == "" then
|
|
self:restore_position()
|
|
return nil
|
|
end
|
|
|
|
|
|
local attribute = nil
|
|
|
|
-- if there is an attribute given
|
|
if self:check_char("=") then
|
|
-- extract it
|
|
attribute = self:match_pattern(PAT_BBCODE_ATTR)
|
|
end
|
|
|
|
-- if there is no closing "]"
|
|
if not self:check_char("]") then
|
|
self:restore_position()
|
|
return nil
|
|
end
|
|
|
|
-- if the tag isnt valid
|
|
if not self.valid_bbcode_tags[name] then
|
|
self:restore_position()
|
|
return nil
|
|
end
|
|
|
|
self:forget_position()
|
|
return name, attribute
|
|
end
|
|
|
|
--- Tries to parse a bbcode tag. Only recognizes tags present in `valid_bbcode_tags`
|
|
--- field of the parser.
|
|
---
|
|
--- Format of the table :
|
|
--- { type = "bbcode",
|
|
--- name = string,
|
|
--- attribute = string?,
|
|
--- children = (string|table)[] }
|
|
---
|
|
--- @return table?
|
|
function Parser:parse_bbcode()
|
|
self:save_position()
|
|
|
|
local name, attribute = self:parse_bbcode_open()
|
|
|
|
-- if there isnt a open bbcode tag here
|
|
if name == nil then
|
|
self:restore_position()
|
|
return nil
|
|
end
|
|
|
|
local children = {}
|
|
|
|
-- parse children elements of that tag
|
|
while not self:is_end_of_source() do
|
|
-- if there is a close tag here
|
|
if self:check_str("[/" .. name .. "]") then
|
|
break
|
|
end
|
|
|
|
-- if that tag only accept text children
|
|
if self.bbcode_tags_only_text_children[name] then
|
|
local ch = self:get_char()
|
|
|
|
if #children == 0 then
|
|
table.insert(children, ch)
|
|
else
|
|
children[1] = children[1] .. ch
|
|
end
|
|
else
|
|
local element = self:parse_element(children)
|
|
|
|
-- if the end of the source has been reached
|
|
if element == nil then
|
|
self:restore_position()
|
|
return nil
|
|
end
|
|
|
|
table.insert(children, element)
|
|
end
|
|
end
|
|
|
|
self:forget_position()
|
|
return {
|
|
type = "bbcode",
|
|
name = name,
|
|
attribute = attribute,
|
|
children = children
|
|
}
|
|
end
|
|
|
|
--- Tries to parse a ruler element.
|
|
---
|
|
--- Format of the table :
|
|
--- { type = "ruler" }
|
|
---
|
|
--- @return table?
|
|
function Parser:parse_ruler()
|
|
if not self:check_str("---") then
|
|
return nil
|
|
end
|
|
|
|
return {
|
|
type = "ruler",
|
|
}
|
|
end
|
|
|
|
--- Tries to parse a loose link.
|
|
---
|
|
--- Format of the table :
|
|
--- { type = "link",
|
|
--- url = string }
|
|
---
|
|
--- @return table?
|
|
function Parser:parse_link()
|
|
self:save_position()
|
|
|
|
-- we extract a "word" (bunch of printable characters without spaces).
|
|
local word = self:match_pattern("%g")
|
|
|
|
-- if that "word" matches the link pattern
|
|
if not word:match(PAT_LINK) then
|
|
self:restore_position()
|
|
return nil
|
|
end
|
|
|
|
self:forget_position()
|
|
return {
|
|
type = "link",
|
|
url = word,
|
|
}
|
|
end
|
|
|
|
--- Tries to parse an element.
|
|
---
|
|
--- Returns either a table or a string.
|
|
--- A string represent simple text.
|
|
--- A table represent different kind of element that can be differienciated
|
|
--- by its `type` field.
|
|
---
|
|
--- Valid types : emote, bbcode, link, ruler.
|
|
--- Each type has different fields. See `Parser:parse_*` functions for more
|
|
--- info.
|
|
---
|
|
--- Returns nil when the end of the source has been reached.
|
|
---
|
|
--- @param sibblings (string|table)[]
|
|
--- @return (table|string)?
|
|
function Parser:parse_element(sibblings)
|
|
if self:is_end_of_source() then
|
|
return nil
|
|
end
|
|
|
|
local element = self:parse_emote()
|
|
or self:parse_bbcode()
|
|
or self:parse_ruler()
|
|
or self:parse_link()
|
|
|
|
if element == nil then
|
|
if #sibblings > 0 then
|
|
local last = sibblings[#sibblings]
|
|
|
|
if type(last) == "string" then
|
|
table.remove(sibblings)
|
|
return last .. self:get_char()
|
|
end
|
|
end
|
|
|
|
return self:get_char()
|
|
end
|
|
|
|
return element
|
|
end
|
|
|
|
--- Parses the whole source at once, returning all parsed elements.
|
|
--- See `Parser:parse_element` for more information about the return value.
|
|
---
|
|
--- @return (string|table)[]
|
|
function Parser:parse()
|
|
local elements = {}
|
|
|
|
while true do
|
|
local element = self:parse_element(elements)
|
|
if element == nil then
|
|
break
|
|
end
|
|
|
|
table.insert(elements, element)
|
|
end
|
|
|
|
return elements
|
|
end
|
|
|
|
return Parser
|