diff --git a/lib/babycode-emoji.lua b/lib/babycode-emoji.lua
index c3e6f64..de71e4a 100644
--- a/lib/babycode-emoji.lua
+++ b/lib/babycode-emoji.lua
@@ -1,36 +1,37 @@
-local emoji_template = " "
+local emoji_template = "
"
+local emoji_pat = "%$NAME"
return {
- ["angry"] = emoji_template:gsub("%$", "angry"),
+ ["angry"] = emoji_template:gsub(emoji_pat, "angry"),
- ["("] = emoji_template:gsub("%$", "frown"),
+ ["("] = emoji_template:gsub(emoji_pat, "frown"),
- ["D"] = emoji_template:gsub("%$", "grin"),
+ ["D"] = emoji_template:gsub(emoji_pat, "grin"),
- ["imp"] = emoji_template:gsub("%$", "imp"),
+ ["imp"] = emoji_template:gsub(emoji_pat, "imp"),
- ["angryimp"] = emoji_template:gsub("%$", "impangry"),
- ["impangry"] = emoji_template:gsub("%$", "impangry"),
+ ["angryimp"] = emoji_template:gsub(emoji_pat, "impangry"),
+ ["impangry"] = emoji_template:gsub(emoji_pat, "impangry"),
- ["|"] = emoji_template:gsub("%$", "neutral"),
+ ["|"] = emoji_template:gsub(emoji_pat, "neutral"),
- [")"] = emoji_template:gsub("%$", "smile"),
+ [")"] = emoji_template:gsub(emoji_pat, "smile"),
- [","] = emoji_template:gsub("%$", "sob"),
- ["T"] = emoji_template:gsub("%$", "sob"),
- ["cry"] = emoji_template:gsub("%$", "sob"),
- ["sob"] = emoji_template:gsub("%$", "sob"),
+ [","] = emoji_template:gsub(emoji_pat, "sob"),
+ ["T"] = emoji_template:gsub(emoji_pat, "sob"),
+ ["cry"] = emoji_template:gsub(emoji_pat, "sob"),
+ ["sob"] = emoji_template:gsub(emoji_pat, "sob"),
- ["o"] = emoji_template:gsub("%$", "surprised"),
- ["O"] = emoji_template:gsub("%$", "surprised"),
+ ["o"] = emoji_template:gsub(emoji_pat, "surprised"),
+ ["O"] = emoji_template:gsub(emoji_pat, "surprised"),
- ["hmm"] = emoji_template:gsub("%$", "think"),
- ["think"] = emoji_template:gsub("%$", "think"),
- ["thinking"] = emoji_template:gsub("%$", "think"),
+ ["hmm"] = emoji_template:gsub(emoji_pat, "think"),
+ ["think"] = emoji_template:gsub(emoji_pat, "think"),
+ ["thinking"] = emoji_template:gsub(emoji_pat, "think"),
- ["P"] = emoji_template:gsub("%$", "tongue"),
- ["p"] = emoji_template:gsub("%$", "tongue"),
+ ["P"] = emoji_template:gsub(emoji_pat, "tongue"),
+ ["p"] = emoji_template:gsub(emoji_pat, "tongue"),
- [";"] = emoji_template:gsub("%$", "wink"),
- ["wink"] = emoji_template:gsub("%$", "wink"),
+ [";"] = emoji_template:gsub(emoji_pat, "wink"),
+ ["wink"] = emoji_template:gsub(emoji_pat, "wink"),
}
diff --git a/lib/babycode-parser.lua b/lib/babycode-parser.lua
new file mode 100644
index 0000000..6d72f72
--- /dev/null
+++ b/lib/babycode-parser.lua
@@ -0,0 +1,416 @@
+-- contributed by kaesa
+
+--- Pattern used for emote names (applied for every char).
+local PAT_EMOTE = "[^%s:]"
+--- Pattern used for bbcode tags (applied for every char).
+local PAT_BBCODE_TAG = "%w"
+--- Pattern used for bbcode tag attribute (applied for every char).
+local PAT_BBCODE_ATTR = "[^%s%]]"
+--- Pattern used to detect loose links.
+local PAT_LINK = "https?://[%w-_%.%?%.:/%+=&~%@#%%]+[%w-/]"
+
+
+
+--- @class Parser
+--- @field valid_bbcode_tags table Table of valid BBCode tags.
+--- @field valid_emotes table Table of valid emotes.
+--- @field bbcode_tags_only_text_children table Table of tags that might only containt text.
+--- @field source string Source to parse.
+--- @field position integer Current position of the parser.
+--- @field position_stack integer[] Position stack used for rewind parsing.
+---
+--- Parser class.
+local Parser = {}
+
+
+
+--- Creates a new parser.
+---
+--- @param src string
+--- @return Parser
+function Parser.new(src)
+ local inst = {
+ valid_bbcode_tags = {},
+ valid_emotes = {},
+ bbcode_tags_only_text_children = {},
+ source = src,
+ position = 1,
+ elements = {},
+ position_stack = {}
+ }
+
+ setmetatable(inst, { __index = Parser })
+
+ return inst
+end
+
+--- Advances the parser by COUNT characters.
+--- @param count integer? Set to 1 if nil.
+function Parser:advance(count)
+ count = count or 1
+ self.position = self.position + count
+end
+
+--- Checks if the position is out of bounds of the source.
+--- @param offset integer? Set to 0 if nil.
+function Parser:is_end_of_source(offset)
+ offset = offset or 0
+ return self.position + offset > #self.source
+end
+
+--- Saves the current position to the position stack.
+function Parser:save_position()
+ table.insert(self.position_stack, self.position)
+end
+
+--- Restores the current position to the top of the position stack, and remove
+--- that position from the stack.
+function Parser:restore_position()
+ self.position = table.remove(self.position_stack)
+end
+
+--- Forgets the top position in the position stack.
+function Parser:forget_position()
+ table.remove(self.position_stack)
+end
+
+--- Retreives the character at the current position (plus optional offset).
+---
+--- @param offset integer? Set to 0 if nil.
+--- @return string
+function Parser:peek_char(offset)
+ offset = offset or 0
+
+ -- if the offset is out of bound
+ if self:is_end_of_source(offset) then
+ return ""
+ end
+
+ return self.source:sub(self.position + offset, self.position + offset)
+end
+
+--- Retreives the character at the current position and advance the position.
+---
+--- @return string
+function Parser:get_char()
+ local char = self:peek_char()
+ self:advance()
+ return char
+end
+
+--- Checks if the character at the current current position is WANTED. If so,
+--- advance the position, and returns true. Do nothing otherwise and returns
+--- false.
+---
+--- @param wanted string The character to check with.
+--- @return boolean
+function Parser:check_char(wanted)
+ local char = self:peek_char()
+
+ if char == wanted then
+ self:advance()
+ return true
+ end
+
+ return false
+end
+
+--- Checks if WANTED is present at the current position in the source. If so,
+--- advance the position and returns true. Do nothing otherwise and returns
+--- false.
+---
+--- @param wanted string
+--- @return boolean
+---
+function Parser:check_str(wanted)
+ self:save_position()
+
+ -- For each character in WANTED
+ for i = 1, #wanted do
+ -- Checks if the character is present
+ if not self:check_char(wanted:sub(i, i)) then
+ self:restore_position()
+ return false
+ end
+ end
+
+ self:forget_position()
+ return true
+end
+
+--- Checks if the string at the current position matches the given pattern.
+--- The pattern is matched for each character in a sequence. Returns the matched
+--- string. Advances the position of the parser.
+---
+--- @param pattern string
+--- @return string
+---
+function Parser:match_pattern(pattern)
+ local buffer = ""
+
+ while not self:is_end_of_source() do
+ local ch = self:peek_char()
+
+ if not ch:match(pattern) then
+ break
+ end
+
+ self:advance()
+ buffer = buffer .. ch
+ end
+
+ return buffer
+end
+
+--- Tries to parse an emote. Only recognizes emotes present in the `valid_emotes`
+--- field of the parser.
+---
+--- Format of the table :
+--- { type = "emote",
+--- name = string }
+---
+--- @return table?
+function Parser:parse_emote()
+ self:save_position()
+
+ -- if there is no beginning ":"
+ if not self:check_char(":") then
+ self:restore_position()
+ return nil
+ end
+
+ -- extract the emote name
+ local name = self:match_pattern(PAT_EMOTE)
+
+ -- if there is no ending ":"
+ if not self:check_char(":") then
+ self:restore_position()
+ return nil
+ end
+
+ -- if the emote name isnt valid
+ if not self.valid_emotes[name] then
+ self:restore_position()
+ return nil
+ end
+
+ self:forget_position()
+ return {
+ type = "emote",
+ name = name
+ }
+end
+
+--- Tries to parse a bbcode openning tag. Only recognizes tags present in
+--- `valid_bbcode_tags` field of the parser.
+---
+--- Returns the name of the tag, and its attribute (if any present).
+---
+--- @return string?, string?
+function Parser:parse_bbcode_open()
+ self:save_position()
+
+ -- if there is no beginning "["
+ if not self:check_char("[") then
+ self:restore_position()
+ return nil
+ end
+
+ -- extract the tag name
+ local name = self:match_pattern(PAT_BBCODE_TAG)
+
+ -- if there is no tag name
+ if name == "" then
+ self:restore_position()
+ return nil
+ end
+
+
+ local attribute = nil
+
+ -- if there is an attribute given
+ if self:check_char("=") then
+ -- extract it
+ attribute = self:match_pattern(PAT_BBCODE_ATTR)
+ end
+
+ -- if there is no closing "]"
+ if not self:check_char("]") then
+ self:restore_position()
+ return nil
+ end
+
+ -- if the tag isnt valid
+ if not self.valid_bbcode_tags[name] then
+ self:restore_position()
+ return nil
+ end
+
+ self:forget_position()
+ return name, attribute
+end
+
+--- Tries to parse a bbcode tag. Only recognizes tags present in `valid_bbcode_tags`
+--- field of the parser.
+---
+--- Format of the table :
+--- { type = "bbcode",
+--- name = string,
+--- attribute = string?,
+--- children = (string|table)[] }
+---
+--- @return table?
+function Parser:parse_bbcode()
+ self:save_position()
+
+ local name, attribute = self:parse_bbcode_open()
+
+ -- if there isnt a open bbcode tag here
+ if name == nil then
+ self:restore_position()
+ return nil
+ end
+
+ local children = {}
+
+ -- parse children elements of that tag
+ while not self:is_end_of_source() do
+ -- if there is a close tag here
+ if self:check_str("[/" .. name .. "]") then
+ break
+ end
+
+ -- if that tag only accept text children
+ if self.bbcode_tags_only_text_children[name] then
+ local ch = self:get_char()
+
+ if #children == 0 then
+ table.insert(children, ch)
+ else
+ children[1] = children[1] .. ch
+ end
+ else
+ local element = self:parse_element(children)
+
+ -- if the end of the source has been reached
+ if element == nil then
+ self:restore_position()
+ return nil
+ end
+
+ table.insert(children, element)
+ end
+ end
+
+ self:forget_position()
+ return {
+ type = "bbcode",
+ name = name,
+ attribute = attribute,
+ children = children
+ }
+end
+
+--- Tries to parse a ruler element.
+---
+--- Format of the table :
+--- { type = "ruler" }
+---
+--- @return table?
+function Parser:parse_ruler()
+ if not self:check_str("---") then
+ return nil
+ end
+
+ return {
+ type = "ruler",
+ }
+end
+
+--- Tries to parse a loose link.
+---
+--- Format of the table :
+--- { type = "link",
+--- url = string }
+---
+--- @return table?
+function Parser:parse_link()
+ self:save_position()
+
+ -- we extract a "word" (bunch of printable characters without spaces).
+ local word = self:match_pattern("%g")
+
+ -- if that "word" matches the link pattern
+ if not word:match(PAT_LINK) then
+ self:restore_position()
+ return nil
+ end
+
+ self:forget_position()
+ return {
+ type = "link",
+ url = word,
+ }
+end
+
+--- Tries to parse an element.
+---
+--- Returns either a table or a string.
+--- A string represent simple text.
+--- A table represent different kind of element that can be differienciated
+--- by its `type` field.
+---
+--- Valid types : emote, bbcode, link, ruler.
+--- Each type has different fields. See `Parser:parse_*` functions for more
+--- info.
+---
+--- Returns nil when the end of the source has been reached.
+---
+--- @param sibblings (string|table)[]
+--- @return (table|string)?
+function Parser:parse_element(sibblings)
+ if self:is_end_of_source() then
+ return nil
+ end
+
+ local element = self:parse_emote()
+ or self:parse_bbcode()
+ or self:parse_ruler()
+ or self:parse_link()
+
+ if element == nil then
+ if #sibblings > 0 then
+ local last = sibblings[#sibblings]
+
+ if type(last) == "string" then
+ table.remove(sibblings)
+ return last .. self:get_char()
+ end
+ end
+
+ return self:get_char()
+ end
+
+ return element
+end
+
+--- Parses the whole source at once, returning all parsed elements.
+--- See `Parser:parse_element` for more information about the return value.
+---
+--- @return (string|table)[]
+function Parser:parse()
+ local elements = {}
+
+ while true do
+ local element = self:parse_element(elements)
+ if element == nil then
+ break
+ end
+
+ table.insert(elements, element)
+ end
+
+ return elements
+end
+
+return Parser
diff --git a/lib/babycode.lua b/lib/babycode.lua
index f0319f1..dca9962 100644
--- a/lib/babycode.lua
+++ b/lib/babycode.lua
@@ -3,6 +3,8 @@ local babycode = {}
local string_trim = require("lapis.util").trim
local emoji = require("lib.babycode-emoji")
+local Parser = require("lib.babycode-parser")
+
local function s_split(s, delimiter, max_matches, trim, allow_empty)
local result = {}
if s == "" then
@@ -55,116 +57,94 @@ local function s_split(s, delimiter, max_matches, trim, allow_empty)
return result
end
-local function get_list_items(list_body, escape_html)
- list_body = list_body:gsub(" +%s*\r?\n", "
")
- list_body = list_body:gsub("(%S)(\r?\n\r?\n)\r?\n*", "%1\1")
+local function list(tag, children)
+ local list_body = children:gsub(" +\n", "
"):gsub("\n\n+", "\1")
local list_items = s_split(list_body, "\1")
local lis = ""
for _, li in ipairs(list_items) do
- local rendered = babycode.to_html(li, escape_html)
- lis = lis .. "
$S", + code = function(children) + local is_inline = children:match("\n") == nil + if is_inline then + return "
" .. children .. "
"
+ else
+ local t = string_trim(children)
+ local button = (""):format(t)
+ return ""..button..""..t.."
"
+ end
+ end,
+ ul = function(children)
+ return list("ul", children)
+ end,
+ ol = function(children)
+ return list("ol", children)
+ end,
+}
+
+local text_only = {
+ code = true,
+}
+
---renders babycode to html
---@param s string input babycode
----@param escape_html fun(s: string): string function that escapes html
-function babycode.to_html(s, escape_html)
- if not s or s == "" then return "" end
- local text = escape_html(s)
- -- extract code blocks and store them as placeholders
- -- don't want to process bbcode embedded into a code block
- local code_blocks = {}
- local inline_codes = {}
- text = text:gsub("%[code%](.-)%[/code%]", function(code)
- local is_inline = code:match("\n") == nil
- if is_inline then
- table.insert(inline_codes, code)
- return "\1ICODE:"..#inline_codes.."\1"
- else
- -- strip leading and trailing newlines, preserve others
- local m, _ = code:gsub("^%s*(.-)%s*$", "%1")
- table.insert(code_blocks, m)
- return "\1CODE:"..#code_blocks.."\1"
+---@param html_escape fun(s: string): string function to escape html
+function babycode.to_html(s, html_escape)
+ -- normalize line ending chars
+ local subj = string_trim(html_escape(s)):gsub("\r\n", "\n"):gsub("\r", "\n")
+ local parser = Parser.new(subj)
+ parser.valid_bbcode_tags = tags
+ parser.valid_emotes = emoji
+ parser.bbcode_tags_only_text_children = text_only
+
+ local elements = parser:parse()
+ local out = ""
+ local function fold(element, nobr)
+ if type(element) == "string" then
+ if nobr then
+ return element
+ end
+ return element:gsub(" +\n", "" .. button .. ""..code.."
"
- end)
-
- text = text:gsub("\1ICODE:(%d+)\1", function (n)
- local code = inline_codes[tonumber(n)]
- return "" .. code .. "
"
- end)
-
- return text
+ end
+ for _, e in ipairs(elements) do
+ out = out .. fold(e, false)
+ end
+ return out
end
return babycode