diff --git a/lib/babycode-emoji.lua b/lib/babycode-emoji.lua index c3e6f64..de71e4a 100644 --- a/lib/babycode-emoji.lua +++ b/lib/babycode-emoji.lua @@ -1,36 +1,37 @@ -local emoji_template = " \"$\" " +local emoji_template = " \"$NAME\" " +local emoji_pat = "%$NAME" return { - ["angry"] = emoji_template:gsub("%$", "angry"), + ["angry"] = emoji_template:gsub(emoji_pat, "angry"), - ["("] = emoji_template:gsub("%$", "frown"), + ["("] = emoji_template:gsub(emoji_pat, "frown"), - ["D"] = emoji_template:gsub("%$", "grin"), + ["D"] = emoji_template:gsub(emoji_pat, "grin"), - ["imp"] = emoji_template:gsub("%$", "imp"), + ["imp"] = emoji_template:gsub(emoji_pat, "imp"), - ["angryimp"] = emoji_template:gsub("%$", "impangry"), - ["impangry"] = emoji_template:gsub("%$", "impangry"), + ["angryimp"] = emoji_template:gsub(emoji_pat, "impangry"), + ["impangry"] = emoji_template:gsub(emoji_pat, "impangry"), - ["|"] = emoji_template:gsub("%$", "neutral"), + ["|"] = emoji_template:gsub(emoji_pat, "neutral"), - [")"] = emoji_template:gsub("%$", "smile"), + [")"] = emoji_template:gsub(emoji_pat, "smile"), - [","] = emoji_template:gsub("%$", "sob"), - ["T"] = emoji_template:gsub("%$", "sob"), - ["cry"] = emoji_template:gsub("%$", "sob"), - ["sob"] = emoji_template:gsub("%$", "sob"), + [","] = emoji_template:gsub(emoji_pat, "sob"), + ["T"] = emoji_template:gsub(emoji_pat, "sob"), + ["cry"] = emoji_template:gsub(emoji_pat, "sob"), + ["sob"] = emoji_template:gsub(emoji_pat, "sob"), - ["o"] = emoji_template:gsub("%$", "surprised"), - ["O"] = emoji_template:gsub("%$", "surprised"), + ["o"] = emoji_template:gsub(emoji_pat, "surprised"), + ["O"] = emoji_template:gsub(emoji_pat, "surprised"), - ["hmm"] = emoji_template:gsub("%$", "think"), - ["think"] = emoji_template:gsub("%$", "think"), - ["thinking"] = emoji_template:gsub("%$", "think"), + ["hmm"] = emoji_template:gsub(emoji_pat, "think"), + ["think"] = emoji_template:gsub(emoji_pat, "think"), + ["thinking"] = emoji_template:gsub(emoji_pat, "think"), - ["P"] = emoji_template:gsub("%$", "tongue"), - ["p"] = emoji_template:gsub("%$", "tongue"), + ["P"] = emoji_template:gsub(emoji_pat, "tongue"), + ["p"] = emoji_template:gsub(emoji_pat, "tongue"), - [";"] = emoji_template:gsub("%$", "wink"), - ["wink"] = emoji_template:gsub("%$", "wink"), + [";"] = emoji_template:gsub(emoji_pat, "wink"), + ["wink"] = emoji_template:gsub(emoji_pat, "wink"), } diff --git a/lib/babycode-parser.lua b/lib/babycode-parser.lua new file mode 100644 index 0000000..6d72f72 --- /dev/null +++ b/lib/babycode-parser.lua @@ -0,0 +1,416 @@ +-- contributed by kaesa + +--- Pattern used for emote names (applied for every char). +local PAT_EMOTE = "[^%s:]" +--- Pattern used for bbcode tags (applied for every char). +local PAT_BBCODE_TAG = "%w" +--- Pattern used for bbcode tag attribute (applied for every char). +local PAT_BBCODE_ATTR = "[^%s%]]" +--- Pattern used to detect loose links. +local PAT_LINK = "https?://[%w-_%.%?%.:/%+=&~%@#%%]+[%w-/]" + + + +--- @class Parser +--- @field valid_bbcode_tags table Table of valid BBCode tags. +--- @field valid_emotes table Table of valid emotes. +--- @field bbcode_tags_only_text_children table Table of tags that might only containt text. +--- @field source string Source to parse. +--- @field position integer Current position of the parser. +--- @field position_stack integer[] Position stack used for rewind parsing. +--- +--- Parser class. +local Parser = {} + + + +--- Creates a new parser. +--- +--- @param src string +--- @return Parser +function Parser.new(src) + local inst = { + valid_bbcode_tags = {}, + valid_emotes = {}, + bbcode_tags_only_text_children = {}, + source = src, + position = 1, + elements = {}, + position_stack = {} + } + + setmetatable(inst, { __index = Parser }) + + return inst +end + +--- Advances the parser by COUNT characters. +--- @param count integer? Set to 1 if nil. +function Parser:advance(count) + count = count or 1 + self.position = self.position + count +end + +--- Checks if the position is out of bounds of the source. +--- @param offset integer? Set to 0 if nil. +function Parser:is_end_of_source(offset) + offset = offset or 0 + return self.position + offset > #self.source +end + +--- Saves the current position to the position stack. +function Parser:save_position() + table.insert(self.position_stack, self.position) +end + +--- Restores the current position to the top of the position stack, and remove +--- that position from the stack. +function Parser:restore_position() + self.position = table.remove(self.position_stack) +end + +--- Forgets the top position in the position stack. +function Parser:forget_position() + table.remove(self.position_stack) +end + +--- Retreives the character at the current position (plus optional offset). +--- +--- @param offset integer? Set to 0 if nil. +--- @return string +function Parser:peek_char(offset) + offset = offset or 0 + + -- if the offset is out of bound + if self:is_end_of_source(offset) then + return "" + end + + return self.source:sub(self.position + offset, self.position + offset) +end + +--- Retreives the character at the current position and advance the position. +--- +--- @return string +function Parser:get_char() + local char = self:peek_char() + self:advance() + return char +end + +--- Checks if the character at the current current position is WANTED. If so, +--- advance the position, and returns true. Do nothing otherwise and returns +--- false. +--- +--- @param wanted string The character to check with. +--- @return boolean +function Parser:check_char(wanted) + local char = self:peek_char() + + if char == wanted then + self:advance() + return true + end + + return false +end + +--- Checks if WANTED is present at the current position in the source. If so, +--- advance the position and returns true. Do nothing otherwise and returns +--- false. +--- +--- @param wanted string +--- @return boolean +--- +function Parser:check_str(wanted) + self:save_position() + + -- For each character in WANTED + for i = 1, #wanted do + -- Checks if the character is present + if not self:check_char(wanted:sub(i, i)) then + self:restore_position() + return false + end + end + + self:forget_position() + return true +end + +--- Checks if the string at the current position matches the given pattern. +--- The pattern is matched for each character in a sequence. Returns the matched +--- string. Advances the position of the parser. +--- +--- @param pattern string +--- @return string +--- +function Parser:match_pattern(pattern) + local buffer = "" + + while not self:is_end_of_source() do + local ch = self:peek_char() + + if not ch:match(pattern) then + break + end + + self:advance() + buffer = buffer .. ch + end + + return buffer +end + +--- Tries to parse an emote. Only recognizes emotes present in the `valid_emotes` +--- field of the parser. +--- +--- Format of the table : +--- { type = "emote", +--- name = string } +--- +--- @return table? +function Parser:parse_emote() + self:save_position() + + -- if there is no beginning ":" + if not self:check_char(":") then + self:restore_position() + return nil + end + + -- extract the emote name + local name = self:match_pattern(PAT_EMOTE) + + -- if there is no ending ":" + if not self:check_char(":") then + self:restore_position() + return nil + end + + -- if the emote name isnt valid + if not self.valid_emotes[name] then + self:restore_position() + return nil + end + + self:forget_position() + return { + type = "emote", + name = name + } +end + +--- Tries to parse a bbcode openning tag. Only recognizes tags present in +--- `valid_bbcode_tags` field of the parser. +--- +--- Returns the name of the tag, and its attribute (if any present). +--- +--- @return string?, string? +function Parser:parse_bbcode_open() + self:save_position() + + -- if there is no beginning "[" + if not self:check_char("[") then + self:restore_position() + return nil + end + + -- extract the tag name + local name = self:match_pattern(PAT_BBCODE_TAG) + + -- if there is no tag name + if name == "" then + self:restore_position() + return nil + end + + + local attribute = nil + + -- if there is an attribute given + if self:check_char("=") then + -- extract it + attribute = self:match_pattern(PAT_BBCODE_ATTR) + end + + -- if there is no closing "]" + if not self:check_char("]") then + self:restore_position() + return nil + end + + -- if the tag isnt valid + if not self.valid_bbcode_tags[name] then + self:restore_position() + return nil + end + + self:forget_position() + return name, attribute +end + +--- Tries to parse a bbcode tag. Only recognizes tags present in `valid_bbcode_tags` +--- field of the parser. +--- +--- Format of the table : +--- { type = "bbcode", +--- name = string, +--- attribute = string?, +--- children = (string|table)[] } +--- +--- @return table? +function Parser:parse_bbcode() + self:save_position() + + local name, attribute = self:parse_bbcode_open() + + -- if there isnt a open bbcode tag here + if name == nil then + self:restore_position() + return nil + end + + local children = {} + + -- parse children elements of that tag + while not self:is_end_of_source() do + -- if there is a close tag here + if self:check_str("[/" .. name .. "]") then + break + end + + -- if that tag only accept text children + if self.bbcode_tags_only_text_children[name] then + local ch = self:get_char() + + if #children == 0 then + table.insert(children, ch) + else + children[1] = children[1] .. ch + end + else + local element = self:parse_element(children) + + -- if the end of the source has been reached + if element == nil then + self:restore_position() + return nil + end + + table.insert(children, element) + end + end + + self:forget_position() + return { + type = "bbcode", + name = name, + attribute = attribute, + children = children + } +end + +--- Tries to parse a ruler element. +--- +--- Format of the table : +--- { type = "ruler" } +--- +--- @return table? +function Parser:parse_ruler() + if not self:check_str("---") then + return nil + end + + return { + type = "ruler", + } +end + +--- Tries to parse a loose link. +--- +--- Format of the table : +--- { type = "link", +--- url = string } +--- +--- @return table? +function Parser:parse_link() + self:save_position() + + -- we extract a "word" (bunch of printable characters without spaces). + local word = self:match_pattern("%g") + + -- if that "word" matches the link pattern + if not word:match(PAT_LINK) then + self:restore_position() + return nil + end + + self:forget_position() + return { + type = "link", + url = word, + } +end + +--- Tries to parse an element. +--- +--- Returns either a table or a string. +--- A string represent simple text. +--- A table represent different kind of element that can be differienciated +--- by its `type` field. +--- +--- Valid types : emote, bbcode, link, ruler. +--- Each type has different fields. See `Parser:parse_*` functions for more +--- info. +--- +--- Returns nil when the end of the source has been reached. +--- +--- @param sibblings (string|table)[] +--- @return (table|string)? +function Parser:parse_element(sibblings) + if self:is_end_of_source() then + return nil + end + + local element = self:parse_emote() + or self:parse_bbcode() + or self:parse_ruler() + or self:parse_link() + + if element == nil then + if #sibblings > 0 then + local last = sibblings[#sibblings] + + if type(last) == "string" then + table.remove(sibblings) + return last .. self:get_char() + end + end + + return self:get_char() + end + + return element +end + +--- Parses the whole source at once, returning all parsed elements. +--- See `Parser:parse_element` for more information about the return value. +--- +--- @return (string|table)[] +function Parser:parse() + local elements = {} + + while true do + local element = self:parse_element(elements) + if element == nil then + break + end + + table.insert(elements, element) + end + + return elements +end + +return Parser diff --git a/lib/babycode.lua b/lib/babycode.lua index f0319f1..dca9962 100644 --- a/lib/babycode.lua +++ b/lib/babycode.lua @@ -3,6 +3,8 @@ local babycode = {} local string_trim = require("lapis.util").trim local emoji = require("lib.babycode-emoji") +local Parser = require("lib.babycode-parser") + local function s_split(s, delimiter, max_matches, trim, allow_empty) local result = {} if s == "" then @@ -55,116 +57,94 @@ local function s_split(s, delimiter, max_matches, trim, allow_empty) return result end -local function get_list_items(list_body, escape_html) - list_body = list_body:gsub(" +%s*\r?\n", "
") - list_body = list_body:gsub("(%S)(\r?\n\r?\n)\r?\n*", "%1\1") +local function list(tag, children) + local list_body = children:gsub(" +\n", "
"):gsub("\n\n+", "\1") local list_items = s_split(list_body, "\1") local lis = "" for _, li in ipairs(list_items) do - local rendered = babycode.to_html(li, escape_html) - lis = lis .. "
  • " .. rendered .. "
  • " + lis = lis .. "
  • " .. li .. "
  • " end - return lis + return "<" .. tag .. ">" .. lis .. "" end +local tags = { + b = "$S", + i = "$S", + s = "$S", + img = "
    %S
    ", + url = "$S", + quote = "
    $S
    ", + code = function(children) + local is_inline = children:match("\n") == nil + if is_inline then + return "" .. children .. "" + else + local t = string_trim(children) + local button = (""):format(t) + return "
    "..button..""..t.."
    " + end + end, + ul = function(children) + return list("ul", children) + end, + ol = function(children) + return list("ol", children) + end, +} + +local text_only = { + code = true, +} + ---renders babycode to html ---@param s string input babycode ----@param escape_html fun(s: string): string function that escapes html -function babycode.to_html(s, escape_html) - if not s or s == "" then return "" end - local text = escape_html(s) - -- extract code blocks and store them as placeholders - -- don't want to process bbcode embedded into a code block - local code_blocks = {} - local inline_codes = {} - text = text:gsub("%[code%](.-)%[/code%]", function(code) - local is_inline = code:match("\n") == nil - if is_inline then - table.insert(inline_codes, code) - return "\1ICODE:"..#inline_codes.."\1" - else - -- strip leading and trailing newlines, preserve others - local m, _ = code:gsub("^%s*(.-)%s*$", "%1") - table.insert(code_blocks, m) - return "\1CODE:"..#code_blocks.."\1" +---@param html_escape fun(s: string): string function to escape html +function babycode.to_html(s, html_escape) + -- normalize line ending chars + local subj = string_trim(html_escape(s)):gsub("\r\n", "\n"):gsub("\r", "\n") + local parser = Parser.new(subj) + parser.valid_bbcode_tags = tags + parser.valid_emotes = emoji + parser.bbcode_tags_only_text_children = text_only + + local elements = parser:parse() + local out = "" + local function fold(element, nobr) + if type(element) == "string" then + if nobr then + return element + end + return element:gsub(" +\n", "
    "):gsub("\n\n+", "

    ") end - end) - - text = text:gsub("%[ul%](.-)%[/ul%]", function(list_body) - return "" - end) - text = text:gsub("%[ol%](.-)%[/ol%]", function(list_body) - return "
      " .. get_list_items(list_body, escape_html) .. "
    " - end) - - -- images - local images = {} - text = text:gsub("%[img=(.-)%](.-)%[/img%]", function (img, alt) - table.insert(images, {img = img, alt = alt}) - return "\1IMG:"..#images.."\1" - end) - - -- normalize newlines, attempt #4 - text = text:gsub(" +%s*\r?\n", "
    ") - text = text:gsub("(%S)(\r?\n\r?\n)\r?\n*", "%1

    ") - - local url_tags = {} - -- replace `[url=https://example.com]Example[/url] tags - text = text:gsub("%[url=([^%]]+)%](.-)%[/url%]", function(url, label) - table.insert(url_tags, {url = url, label = label}) - return "\1URL:"..#url_tags.."\1" - end) - - -- bold, italics, strikethrough - text = text:gsub("%[b%](.-)%[/b%]", "%1") - text = text:gsub("%[i%](.-)%[/i%]", "%1") - text = text:gsub("%[s%](.-)%[/s%]", "%1") - - -- these can be nested, so replace open and closed separately - text = text:gsub("%[(/?)quote%]", "<%1blockquote>") - - text = text:gsub(":(.-):", function(code) - if emoji[code] then - return emoji[code] - else - return code + if element.type == "bbcode" then + local c = "" + for _, child in ipairs(element.children) do + local _nobr = element.name == "code" or element.name == "ul" or element.name == "ol" + c = c .. fold(child, _nobr) + end + local res = "" + if type(tags[element.name]) == "string" then + res = (tags[element.name]):gsub("%$S", c) + if element.attribute then + res = res:gsub("%$A", element.attribute) + end + return res + elseif type(tags[element.name]) == "function" then + res = tags[element.name](c, element.attribute) + end + return res + elseif element.type == "link" then + return ""..element.url.."" + elseif element.type == "emote" then + return emoji[element.name] + elseif element.type == "ruler" then + return "
    " end - end) - - -- replace loose links - text = text:gsub("(https?://[%w-_%.%?%.:/%+=&~%@#%%]+[%w-/])", function(url) - if not text:find(']*>'..url..'') then - return ''..url..'' - end - return url - end) - - text = text:gsub("\1URL:(%d+)\1", function(n) - local url = url_tags[tonumber(n)] - return ("%s"):format(url.url, url.label) - end) - - -- rule - text = text:gsub("\n+%-%-%-", "
    ") - - --
    %2
    - text = text:gsub("\1IMG:(%d+)\1", function (n) - local img = images[tonumber(n)] - return ("
    \"%s\"
    "):format(img.img, img.alt) - end) - -- replace code block placeholders back with their original contents - text = text:gsub("\1CODE:(%d+)\1", function(n) - local code = code_blocks[tonumber(n)] - local button = (""):format(code) - return "
    " .. button .. ""..code.."
    " - end) - - text = text:gsub("\1ICODE:(%d+)\1", function (n) - local code = inline_codes[tonumber(n)] - return "" .. code .. "" - end) - - return text + end + for _, e in ipairs(elements) do + out = out .. fold(e, false) + end + return out end return babycode