"" "^\"\"(.*)$" Where following text is a CSS class name
-- preformatted: .. "^%.%.(.*)$" Where following text is a CSS class name or @lua function name
-- block-comments: (unrendered) {{ "^{{$"
-- lists: , *# "^([*#]+)%s*(.*)$" any combination of *'s and #'s at the beginning of the line
-- definitions: : "^:(text):(def)"
-- paragraph: If none of the above apply
function new_stack()
local s = {}
s.stack = {}
s.push = function(self, item)
self.stack[#self.stack +1] = item
end
s.pop = function(self)
local n = #self.stack
local item = self.stack[n]
self.stack[n] = nil
return item
end
s.concat = function(self, separator)
local concat = table.concat(self.stack, separator)
return concat
end
s.gather = function(self, separator)
local concat = table.concat(self.stack, separator)
self.stack = {}
return concat
end
s.is_empty = function(self)
if #self.stack == 0 then return true end
return false
end
s.depth = function(self)
return #self.stack
end
return s
end
--
-- parse_indent()
--
-- This function will return a number of spaces to indent the beginning a line
-- as a multiple of some number of spaces. The default is parse_indent_cache[1].
--
parse_indent_str = " "
parse_indent = function(x)
return string.rep(parse_indent_str, x)
end
-- **************************************************
--
-- new_parser()
--
-- This routine will return a new parser table that can be
-- invoked to read lines of text and process them.
--
-- The indenting is written as though parse.body is at a
-- global leve, although it is all defined -within- the
-- function new_parser(). This was done erroneously and
-- could be modified if it would be cleaner to have everything
-- properly indented one tab stop.
--
function new_parser(read_line_func, read_line_arg)
local parse = {}
parse.warn = function(self, ...)
print("WARNING: " .. ...)
end
parse.error = function(self, ...)
error("ERROR: " .. ...)
end
parse.save_line = new_stack()
parse.read_line_t = { ["func"] = read_line_func, ["arg"] = read_line_arg }
parse.read_line = function(self)
local s = self.save_line
local r = self.read_line_t
if s:is_empty() then
return r.func(r.arg)
end
return s:pop()
end
parse.body = new_stack()
parse.wrap_class = function(self, markup, text, class)
local class_markup = "" -- Assume a blank string
if class then
class = class:gsub("%-%-.*$", "") -- A comment within a class declaration?
class = class:gsub("^%s*", "")
class = class:gsub("%s*$", "")
if class ~= "" then
class_markup = ' class="' .. class .. '"'
end
end
return "<" .. markup .. class_markup .. ">" .. text .. "" .. markup .. ">"
end
parse.inline = function (self, block)
block = block:gsub("%'%'%'(.-)%'%'%'", function(t) return "" .. t .. "" end)
block = block:gsub("%'%'(.-)%'%'", function(t) return "" .. t .. "" end)
return block
end
parse.wrap_block_class = function(self, wrap, block, class)
local inlined = self:inline(block)
local wrapped = self:wrap_class(wrap, inlined, class)
return wrapped
end
parse.headings = function(self, line)
local s, e, hn, heading = line:find("^(=+)%s*(.*)$")
if not s then return false end
if #hn > 6 then
self:warn("A heading with more than 6 levels, only 1..6 are supported; reverting to 6")
hn="======"
end
return true, self:wrap_block_class("h"..#hn, heading)
end
parse.horiz_rule = function(self, line)
local s, e = line:find("^%-%-%-+$")
if not s then return false end
return true, "
"
end
--
-- blockquote, block comments and preformatted text
--
-- All are block oriented, beginning with some line of symbols
-- and terminated with a line of symbols. block_until is used
-- to grab all the lines within the block.
--
-- Blocks do not nest in any manner. In addition, preformatted
-- text has no inline parsing. Because block comments are
-- ignored, there is no additional parsing. Block quotes have
-- inline parsing, but there is no possibility of lists, tables,
-- or headings within block quotes.
--
parse.block_stack = new_stack()
parse.block_until = function(self, pattern)
local block = self.block_stack
block:push("")
local line = ""
repeat
line = self:read_line()
if not line or line:find(pattern) then
block:push("")
return block:gather("\n")
end
block:push(line)
until false -- Loop forever
end
parse.block_quote = function(self, line)
local s, e, class = line:find('^""(.*)$')
if not s then return false end
local block = self:block_until('^""$')
return true, self:wrap_block_class("blockquote", block, class)
end
parse.preformatted = function(self, line)
local s, e, class = line:find("^%.%.(.*)$")
if not s then return false end
local block = self:block_until("^%.%.$")
return true, self:wrap_block_class("pre", block, class)
end
parse.block_comments = function(self, line)
local s, e = line:find("^{{$")
if not s then return false end
local block = self:block_until("^}}$")
return true, nil
end
parse.is_blank = function(self, line)
local s = line:find("^%s*$")
if s then return true end
return false
end
--
-- list()
--
-- This function needs handle the following cases:
--
-- 1) We don't currently have any outstanding list and a single list identifier (i.e., # or *) comes in
-- A. Generate or and begin collecting a block
-- 2) We currently have an outstanding list, but this one lengthens it by some amount
-- A. We have to generate subsequent s or s to match
-- 3) We currently have an outstanding list, but this one decreases it by some amount
-- A. We need to generate subsequent
s or s to match
--
parse.list = {}
parse.list.token_stack = new_stack()
parse.list.html_stack = new_stack()
parse.list.text_stack = new_stack()
parse.list.tag = { ["open"] = { ["#"] = "", ["*"] = "" },
["close"] = { ["#"] = "
", ["*"] = "
" } }
parse.list.indent = function(self)
return parse_indent(self.token_stack:depth())
end
parse.is_list = function(self, line)
local s = line:find("^([*#]+)%s*(.*)$")
if not s then return false end
return true
end
parse.parse_list = function(self, line)
if not line:find("^([*#]+)%s*(.*)$") then return false end
self:parse_list2(line)
return true, self.list.html_stack:gather("\n")
end
parse.parse_list2 = function(self, line, html)
local list = self.list
local html_stack = list.html_stack
local text_stack = list.text_stack
local token_stack = list.token_stack
local tags = list.tag
local s, e, markup, text
if line then
s, e, markup, text = line:find("^([*#]+)%s*(.*)$")
end
if not s then
self.save_line:push(line)
while not token_stack:is_empty() do
local x = token_stack:pop()
local html = list:indent() .. tags.close[x]
html_stack:push(html)
end
if html then
html_stack:push(html)
end
return
end
if html then self:error("ERROR: parse_list2() should never be called with a non-nil html value for any line that is a valid list:\n" ..
"line = '" .. line .. "'\n" ..
"html = '" .. html .. "'") end
local omarkup = token_stack:concat()
if omarkup == "" then
local x = markup:sub(1,1)
local html = list:indent() .. tags.open[x]
token_stack:push(x)
html_stack:push(html)
return self:parse_list2(line)
end
if omarkup ~= markup then
-- Handle if new list is longer, shorter, different
if #markup > #omarkup and markup:sub(1,#omarkup) == omarkup then
-- New markup is greater and otherwise equal to existing markup
-- indent by one element and re-call
local x = markup:sub(#omarkup +1, #omarkup +1)
local html = list:indent() .. tags.open[x]
token_stack:push(x)
html_stack:push(html)
return self:parse_list2(line)
end
-- Either new list is greater than existing list and the base is different
-- Or, new list is shorter than existing list.
-- Either way, close the last list element of the existing list
-- and re-call. Ultimately, the bases will resolve themselves (possibly
-- by passing through the state in which all previous list opening tags have
-- been closed out). For example, consider the case:
-- "* A simple meal"
-- "# First add eggs"
-- The above should render as,
--
-- - First add eggs
-- Note that to get the ordered state, all previous token pushes
-- will have been popped. But, that's OK, this state machine can
-- handle that just fine.
--
local x = token_stack:pop()
local html = list:indent() .. tags.close[x]
html_stack:push(html)
return self:parse_list2(line)
end
text_stack:push(text)
line = self:read_line()
while line and not self:is_blank(line) do
local h
h, html = self:headings(line)
if not h then h, html = self:horiz_rule(line) end
if not h then h, html = self:block_quote(line) end
if not h then h, html = self:preformatted(line) end
if not h then h, html = self:block_comments(line) end
if h then
-- line is now stale, get a new line
line = self:read_line()
else
h = self:is_list(line)
end
if h then break end
text_stack:push(line)
line = self:read_line()
end
local text = text_stack:gather("\n")
local wrapped = self:wrap_block_class("li", text)
html_stack:push(list:indent() .. wrapped)
while line and self:is_blank(line) do
line = self:read_line()
end
return self:parse_list2(line, html)
end
parse.pgraph = new_stack()
parse.parse = function(self)
local process = function()
local para = self.pgraph:gather("\n")
if not para or para == "" then return end
local html = self:wrap_block_class("p", para)
self.body:push(html)
end
repeat
local line = self:read_line()
if not line then break end
local h, html = self:headings(line)
if not h then h, html = self:horiz_rule(line) end
if not h then h, html = self:block_quote(line) end
if not h then h, html = self:preformatted(line) end
if not h then h, html = self:block_comments(line) end
if not h then h, html = self:parse_list(line) end
if h then
process()
self.body:push(html)
else
self.pgraph:push(line)
end
until false -- Loop forever
process()
local html = self.body:gather("\n")
return html
end
return parse
end -- end of new_parser() which is a parser generator
test = {}
test.markup = {
"{{",
"In this file, I will test some of the markup features",
"}}",
"= Hello",
"----",
"== World!",
'""',
"This is some offset block quote",
'""',
"This is the first paragraph,",
"and it spans multiple lines",
"before starting a list",
"* A",
"{{",
"Here I am testing comments within lists",
"}}",
"* B",
"* C",
"*# 1",
"*# 2",
"*# 3",
"and some more text for C.3",
"* D",
"# One",
"#* One A",
"#** One A a",
"#* One B",
"#** One B a",
"# Two",
"* Fresh",
"..forth",
": add ( n n -- N) + ;",
"..",
"",
"And the last paragraph of the markup."
}
test.i = 0
test.read = function(self) self.i = self.i + 1; return self.markup[self.i] end
print("-- The input 'file':")
for n, l in ipairs(test.markup) do
print (l)
end
print("-- Processing ...")
p = new_parser(test.read, test)
print("-- The output:")
print (p.parse(p))