add babycode parser, courtesy of kaesa

This commit is contained in:
Lera Elvoé 2025-05-30 22:59:21 +03:00
parent 3e9f771ad3
commit 1a37ccfd86
Signed by: yagich
SSH Key Fingerprint: SHA256:6xjGb6uA7lAVcULa7byPEN//rQ0wPoG+UzYVMfZnbvc
3 changed files with 519 additions and 122 deletions

View File

@ -1,36 +1,37 @@
local emoji_template = " <img class=emoji src=\"/emoji/$.png\" alt=\"$\" title=\"$\"> "
local emoji_template = " <img class=emoji src=\"/emoji/$NAME.png\" alt=\"$NAME\" title=\"$NAME\"> "
local emoji_pat = "%$NAME"
return {
["angry"] = emoji_template:gsub("%$", "angry"),
["angry"] = emoji_template:gsub(emoji_pat, "angry"),
["("] = emoji_template:gsub("%$", "frown"),
["("] = emoji_template:gsub(emoji_pat, "frown"),
["D"] = emoji_template:gsub("%$", "grin"),
["D"] = emoji_template:gsub(emoji_pat, "grin"),
["imp"] = emoji_template:gsub("%$", "imp"),
["imp"] = emoji_template:gsub(emoji_pat, "imp"),
["angryimp"] = emoji_template:gsub("%$", "impangry"),
["impangry"] = emoji_template:gsub("%$", "impangry"),
["angryimp"] = emoji_template:gsub(emoji_pat, "impangry"),
["impangry"] = emoji_template:gsub(emoji_pat, "impangry"),
["|"] = emoji_template:gsub("%$", "neutral"),
["|"] = emoji_template:gsub(emoji_pat, "neutral"),
[")"] = emoji_template:gsub("%$", "smile"),
[")"] = emoji_template:gsub(emoji_pat, "smile"),
[","] = emoji_template:gsub("%$", "sob"),
["T"] = emoji_template:gsub("%$", "sob"),
["cry"] = emoji_template:gsub("%$", "sob"),
["sob"] = emoji_template:gsub("%$", "sob"),
[","] = emoji_template:gsub(emoji_pat, "sob"),
["T"] = emoji_template:gsub(emoji_pat, "sob"),
["cry"] = emoji_template:gsub(emoji_pat, "sob"),
["sob"] = emoji_template:gsub(emoji_pat, "sob"),
["o"] = emoji_template:gsub("%$", "surprised"),
["O"] = emoji_template:gsub("%$", "surprised"),
["o"] = emoji_template:gsub(emoji_pat, "surprised"),
["O"] = emoji_template:gsub(emoji_pat, "surprised"),
["hmm"] = emoji_template:gsub("%$", "think"),
["think"] = emoji_template:gsub("%$", "think"),
["thinking"] = emoji_template:gsub("%$", "think"),
["hmm"] = emoji_template:gsub(emoji_pat, "think"),
["think"] = emoji_template:gsub(emoji_pat, "think"),
["thinking"] = emoji_template:gsub(emoji_pat, "think"),
["P"] = emoji_template:gsub("%$", "tongue"),
["p"] = emoji_template:gsub("%$", "tongue"),
["P"] = emoji_template:gsub(emoji_pat, "tongue"),
["p"] = emoji_template:gsub(emoji_pat, "tongue"),
[";"] = emoji_template:gsub("%$", "wink"),
["wink"] = emoji_template:gsub("%$", "wink"),
[";"] = emoji_template:gsub(emoji_pat, "wink"),
["wink"] = emoji_template:gsub(emoji_pat, "wink"),
}

416
lib/babycode-parser.lua Normal file
View File

@ -0,0 +1,416 @@
-- contributed by kaesa
--- Pattern used for emote names (applied for every char).
local PAT_EMOTE = "[^%s:]"
--- Pattern used for bbcode tags (applied for every char).
local PAT_BBCODE_TAG = "%w"
--- Pattern used for bbcode tag attribute (applied for every char).
local PAT_BBCODE_ATTR = "[^%s%]]"
--- Pattern used to detect loose links.
local PAT_LINK = "https?://[%w-_%.%?%.:/%+=&~%@#%%]+[%w-/]"
--- @class Parser
--- @field valid_bbcode_tags table Table of valid BBCode tags.
--- @field valid_emotes table Table of valid emotes.
--- @field bbcode_tags_only_text_children table Table of tags that might only containt text.
--- @field source string Source to parse.
--- @field position integer Current position of the parser.
--- @field position_stack integer[] Position stack used for rewind parsing.
---
--- Parser class.
local Parser = {}
--- Creates a new parser.
---
--- @param src string
--- @return Parser
function Parser.new(src)
local inst = {
valid_bbcode_tags = {},
valid_emotes = {},
bbcode_tags_only_text_children = {},
source = src,
position = 1,
elements = {},
position_stack = {}
}
setmetatable(inst, { __index = Parser })
return inst
end
--- Advances the parser by COUNT characters.
--- @param count integer? Set to 1 if nil.
function Parser:advance(count)
count = count or 1
self.position = self.position + count
end
--- Checks if the position is out of bounds of the source.
--- @param offset integer? Set to 0 if nil.
function Parser:is_end_of_source(offset)
offset = offset or 0
return self.position + offset > #self.source
end
--- Saves the current position to the position stack.
function Parser:save_position()
table.insert(self.position_stack, self.position)
end
--- Restores the current position to the top of the position stack, and remove
--- that position from the stack.
function Parser:restore_position()
self.position = table.remove(self.position_stack)
end
--- Forgets the top position in the position stack.
function Parser:forget_position()
table.remove(self.position_stack)
end
--- Retreives the character at the current position (plus optional offset).
---
--- @param offset integer? Set to 0 if nil.
--- @return string
function Parser:peek_char(offset)
offset = offset or 0
-- if the offset is out of bound
if self:is_end_of_source(offset) then
return ""
end
return self.source:sub(self.position + offset, self.position + offset)
end
--- Retreives the character at the current position and advance the position.
---
--- @return string
function Parser:get_char()
local char = self:peek_char()
self:advance()
return char
end
--- Checks if the character at the current current position is WANTED. If so,
--- advance the position, and returns true. Do nothing otherwise and returns
--- false.
---
--- @param wanted string The character to check with.
--- @return boolean
function Parser:check_char(wanted)
local char = self:peek_char()
if char == wanted then
self:advance()
return true
end
return false
end
--- Checks if WANTED is present at the current position in the source. If so,
--- advance the position and returns true. Do nothing otherwise and returns
--- false.
---
--- @param wanted string
--- @return boolean
---
function Parser:check_str(wanted)
self:save_position()
-- For each character in WANTED
for i = 1, #wanted do
-- Checks if the character is present
if not self:check_char(wanted:sub(i, i)) then
self:restore_position()
return false
end
end
self:forget_position()
return true
end
--- Checks if the string at the current position matches the given pattern.
--- The pattern is matched for each character in a sequence. Returns the matched
--- string. Advances the position of the parser.
---
--- @param pattern string
--- @return string
---
function Parser:match_pattern(pattern)
local buffer = ""
while not self:is_end_of_source() do
local ch = self:peek_char()
if not ch:match(pattern) then
break
end
self:advance()
buffer = buffer .. ch
end
return buffer
end
--- Tries to parse an emote. Only recognizes emotes present in the `valid_emotes`
--- field of the parser.
---
--- Format of the table :
--- { type = "emote",
--- name = string }
---
--- @return table?
function Parser:parse_emote()
self:save_position()
-- if there is no beginning ":"
if not self:check_char(":") then
self:restore_position()
return nil
end
-- extract the emote name
local name = self:match_pattern(PAT_EMOTE)
-- if there is no ending ":"
if not self:check_char(":") then
self:restore_position()
return nil
end
-- if the emote name isnt valid
if not self.valid_emotes[name] then
self:restore_position()
return nil
end
self:forget_position()
return {
type = "emote",
name = name
}
end
--- Tries to parse a bbcode openning tag. Only recognizes tags present in
--- `valid_bbcode_tags` field of the parser.
---
--- Returns the name of the tag, and its attribute (if any present).
---
--- @return string?, string?
function Parser:parse_bbcode_open()
self:save_position()
-- if there is no beginning "["
if not self:check_char("[") then
self:restore_position()
return nil
end
-- extract the tag name
local name = self:match_pattern(PAT_BBCODE_TAG)
-- if there is no tag name
if name == "" then
self:restore_position()
return nil
end
local attribute = nil
-- if there is an attribute given
if self:check_char("=") then
-- extract it
attribute = self:match_pattern(PAT_BBCODE_ATTR)
end
-- if there is no closing "]"
if not self:check_char("]") then
self:restore_position()
return nil
end
-- if the tag isnt valid
if not self.valid_bbcode_tags[name] then
self:restore_position()
return nil
end
self:forget_position()
return name, attribute
end
--- Tries to parse a bbcode tag. Only recognizes tags present in `valid_bbcode_tags`
--- field of the parser.
---
--- Format of the table :
--- { type = "bbcode",
--- name = string,
--- attribute = string?,
--- children = (string|table)[] }
---
--- @return table?
function Parser:parse_bbcode()
self:save_position()
local name, attribute = self:parse_bbcode_open()
-- if there isnt a open bbcode tag here
if name == nil then
self:restore_position()
return nil
end
local children = {}
-- parse children elements of that tag
while not self:is_end_of_source() do
-- if there is a close tag here
if self:check_str("[/" .. name .. "]") then
break
end
-- if that tag only accept text children
if self.bbcode_tags_only_text_children[name] then
local ch = self:get_char()
if #children == 0 then
table.insert(children, ch)
else
children[1] = children[1] .. ch
end
else
local element = self:parse_element(children)
-- if the end of the source has been reached
if element == nil then
self:restore_position()
return nil
end
table.insert(children, element)
end
end
self:forget_position()
return {
type = "bbcode",
name = name,
attribute = attribute,
children = children
}
end
--- Tries to parse a ruler element.
---
--- Format of the table :
--- { type = "ruler" }
---
--- @return table?
function Parser:parse_ruler()
if not self:check_str("---") then
return nil
end
return {
type = "ruler",
}
end
--- Tries to parse a loose link.
---
--- Format of the table :
--- { type = "link",
--- url = string }
---
--- @return table?
function Parser:parse_link()
self:save_position()
-- we extract a "word" (bunch of printable characters without spaces).
local word = self:match_pattern("%g")
-- if that "word" matches the link pattern
if not word:match(PAT_LINK) then
self:restore_position()
return nil
end
self:forget_position()
return {
type = "link",
url = word,
}
end
--- Tries to parse an element.
---
--- Returns either a table or a string.
--- A string represent simple text.
--- A table represent different kind of element that can be differienciated
--- by its `type` field.
---
--- Valid types : emote, bbcode, link, ruler.
--- Each type has different fields. See `Parser:parse_*` functions for more
--- info.
---
--- Returns nil when the end of the source has been reached.
---
--- @param sibblings (string|table)[]
--- @return (table|string)?
function Parser:parse_element(sibblings)
if self:is_end_of_source() then
return nil
end
local element = self:parse_emote()
or self:parse_bbcode()
or self:parse_ruler()
or self:parse_link()
if element == nil then
if #sibblings > 0 then
local last = sibblings[#sibblings]
if type(last) == "string" then
table.remove(sibblings)
return last .. self:get_char()
end
end
return self:get_char()
end
return element
end
--- Parses the whole source at once, returning all parsed elements.
--- See `Parser:parse_element` for more information about the return value.
---
--- @return (string|table)[]
function Parser:parse()
local elements = {}
while true do
local element = self:parse_element(elements)
if element == nil then
break
end
table.insert(elements, element)
end
return elements
end
return Parser

View File

@ -3,6 +3,8 @@ local babycode = {}
local string_trim = require("lapis.util").trim
local emoji = require("lib.babycode-emoji")
local Parser = require("lib.babycode-parser")
local function s_split(s, delimiter, max_matches, trim, allow_empty)
local result = {}
if s == "" then
@ -55,116 +57,94 @@ local function s_split(s, delimiter, max_matches, trim, allow_empty)
return result
end
local function get_list_items(list_body, escape_html)
list_body = list_body:gsub(" +%s*\r?\n", "<br>")
list_body = list_body:gsub("(%S)(\r?\n\r?\n)\r?\n*", "%1\1")
local function list(tag, children)
local list_body = children:gsub(" +\n", "<br>"):gsub("\n\n+", "\1")
local list_items = s_split(list_body, "\1")
local lis = ""
for _, li in ipairs(list_items) do
local rendered = babycode.to_html(li, escape_html)
lis = lis .. "<li>" .. rendered .. "</li>"
lis = lis .. "<li>" .. li .. "</li>"
end
return lis
return "<" .. tag .. ">" .. lis .. "</" .. tag .. ">"
end
local tags = {
b = "<strong>$S</strong>",
i = "<em>$S</em>",
s = "<del>$S</del>",
img = "<div class=\"post-img-container\"><img class=\"block-img\" src=$A alt=%S></div>",
url = "<a href=\"$A\">$S</a>",
quote = "<blockquote>$S</blockquote>",
code = function(children)
local is_inline = children:match("\n") == nil
if is_inline then
return "<code class=\"inline-code\">" .. children .. "</code>"
else
local t = string_trim(children)
local button = ("<button type=button class=\"copy-code\" value=\"%s\">Copy</button>"):format(t)
return "<pre><span class=\"copy-code-container\">"..button.."</span><code>"..t.."</code></pre>"
end
end,
ul = function(children)
return list("ul", children)
end,
ol = function(children)
return list("ol", children)
end,
}
local text_only = {
code = true,
}
---renders babycode to html
---@param s string input babycode
---@param escape_html fun(s: string): string function that escapes html
function babycode.to_html(s, escape_html)
if not s or s == "" then return "" end
local text = escape_html(s)
-- extract code blocks and store them as placeholders
-- don't want to process bbcode embedded into a code block
local code_blocks = {}
local inline_codes = {}
text = text:gsub("%[code%](.-)%[/code%]", function(code)
local is_inline = code:match("\n") == nil
if is_inline then
table.insert(inline_codes, code)
return "\1ICODE:"..#inline_codes.."\1"
else
-- strip leading and trailing newlines, preserve others
local m, _ = code:gsub("^%s*(.-)%s*$", "%1")
table.insert(code_blocks, m)
return "\1CODE:"..#code_blocks.."\1"
---@param html_escape fun(s: string): string function to escape html
function babycode.to_html(s, html_escape)
-- normalize line ending chars
local subj = string_trim(html_escape(s)):gsub("\r\n", "\n"):gsub("\r", "\n")
local parser = Parser.new(subj)
parser.valid_bbcode_tags = tags
parser.valid_emotes = emoji
parser.bbcode_tags_only_text_children = text_only
local elements = parser:parse()
local out = ""
local function fold(element, nobr)
if type(element) == "string" then
if nobr then
return element
end
return element:gsub(" +\n", "<br>"):gsub("\n\n+", "<br><br>")
end
end)
text = text:gsub("%[ul%](.-)%[/ul%]", function(list_body)
return "<ul>" .. get_list_items(list_body, escape_html) .. "</ul>"
end)
text = text:gsub("%[ol%](.-)%[/ol%]", function(list_body)
return "<ol>" .. get_list_items(list_body, escape_html) .. "</ol>"
end)
-- images
local images = {}
text = text:gsub("%[img=(.-)%](.-)%[/img%]", function (img, alt)
table.insert(images, {img = img, alt = alt})
return "\1IMG:"..#images.."\1"
end)
-- normalize newlines, attempt #4
text = text:gsub(" +%s*\r?\n", "<br>")
text = text:gsub("(%S)(\r?\n\r?\n)\r?\n*", "%1<br><br>")
local url_tags = {}
-- replace `[url=https://example.com]Example[/url] tags
text = text:gsub("%[url=([^%]]+)%](.-)%[/url%]", function(url, label)
table.insert(url_tags, {url = url, label = label})
return "\1URL:"..#url_tags.."\1"
end)
-- bold, italics, strikethrough
text = text:gsub("%[b%](.-)%[/b%]", "<strong>%1</strong>")
text = text:gsub("%[i%](.-)%[/i%]", "<em>%1</em>")
text = text:gsub("%[s%](.-)%[/s%]", "<del>%1</del>")
-- these can be nested, so replace open and closed separately
text = text:gsub("%[(/?)quote%]", "<%1blockquote>")
text = text:gsub(":(.-):", function(code)
if emoji[code] then
return emoji[code]
else
return code
if element.type == "bbcode" then
local c = ""
for _, child in ipairs(element.children) do
local _nobr = element.name == "code" or element.name == "ul" or element.name == "ol"
c = c .. fold(child, _nobr)
end
local res = ""
if type(tags[element.name]) == "string" then
res = (tags[element.name]):gsub("%$S", c)
if element.attribute then
res = res:gsub("%$A", element.attribute)
end
return res
elseif type(tags[element.name]) == "function" then
res = tags[element.name](c, element.attribute)
end
return res
elseif element.type == "link" then
return "<a href=\""..element.url.."\">"..element.url.."</a>"
elseif element.type == "emote" then
return emoji[element.name]
elseif element.type == "ruler" then
return "<hr>"
end
end)
-- replace loose links
text = text:gsub("(https?://[%w-_%.%?%.:/%+=&~%@#%%]+[%w-/])", function(url)
if not text:find('<a[^>]*>'..url..'</a>') then
return '<a href="'..url..'">'..url..'</a>'
end
return url
end)
text = text:gsub("\1URL:(%d+)\1", function(n)
local url = url_tags[tonumber(n)]
return ("<a href=%s>%s</a>"):format(url.url, url.label)
end)
-- rule
text = text:gsub("\n+%-%-%-", "<hr>")
-- <div class=\"post-img-container\"><img src=%1 alt=%2></div>
text = text:gsub("\1IMG:(%d+)\1", function (n)
local img = images[tonumber(n)]
return ("<div class=\"block-img-container\"><img class=\"block-img\" src=\"%s\" alt=\"%s\"></div>"):format(img.img, img.alt)
end)
-- replace code block placeholders back with their original contents
text = text:gsub("\1CODE:(%d+)\1", function(n)
local code = code_blocks[tonumber(n)]
local button = ("<button type=button class=\"copy-code\" value=\"%s\">Copy</button>"):format(code)
return "<pre><span class=\"copy-code-container\">" .. button .. "</span><code>"..code.."</code></pre>"
end)
text = text:gsub("\1ICODE:(%d+)\1", function (n)
local code = inline_codes[tonumber(n)]
return "<code class=\"inline-code\">" .. code .. "</code>"
end)
return text
end
for _, e in ipairs(elements) do
out = out .. fold(e, false)
end
return out
end
return babycode