vis

a vi-like editor based on Plan 9's structural regular expressions
markdown.lua

(6696B)
      1 -- Copyright 2006-2025 Mitchell. See LICENSE.
      2 -- Markdown LPeg lexer.
      3 
      4 local lexer = lexer
      5 local P, S, B = lpeg.P, lpeg.S, lpeg.B
      6 
      7 local lex = lexer.new(..., {no_user_word_lists = true})
      8 
      9 -- Distinguish between horizontal and vertical space so html start rule has a chance to match.
     10 lex:modify_rule('whitespace', lex:tag(lexer.WHITESPACE, S(' \t')^1 + S('\r\n')^1))
     11 
     12 -- Block elements.
     13 local function h(n)
     14 	return lex:tag(string.format('%s.h%s', lexer.HEADING, n),
     15 		lexer.to_eol(lexer.starts_line(string.rep('#', n))))
     16 end
     17 lex:add_rule('header', h(6) + h(5) + h(4) + h(3) + h(2) + h(1))
     18 
     19 lex:add_rule('hr',
     20 	lex:tag('hr', lpeg.Cmt(lexer.starts_line(lpeg.C(S('*-_')), true), function(input, index, c)
     21 		local line = input:match('[^\r\n]*', index):gsub('[ \t]', '')
     22 		if line:find('[^' .. c .. ']') or #line < 2 then return nil end
     23 		return (select(2, input:find('\r?\n', index)) or #input) + 1 -- include \n for eolfilled styles
     24 	end)))
     25 
     26 lex:add_rule('list', lex:tag(lexer.LIST,
     27 	lexer.starts_line(lexer.digit^1 * '.' + S('*+-'), true) * S(' \t')))
     28 
     29 local hspace = lexer.space - '\n'
     30 local blank_line = '\n' * hspace^0 * ('\n' + P(-1))
     31 
     32 local code_line = lexer.starts_line((B('    ') + B('\t')) * lpeg.P(function(input, index)
     33 	-- Backtrack to the start of the current paragraph, which is either after a blank line,
     34 	-- at the start of a higher level of indentation, or at the start of the buffer.
     35 	local line, blank_line = lexer.line_from_position(index), false
     36 	while line > 0 do
     37 		local s, e = lexer.line_start[line], lexer.line_end[line]
     38 		blank_line = s == e or lexer.text_range(s, e - s + 1):find('^%s+$')
     39 		if blank_line then break end
     40 		local indent_amount = lexer.indent_amount[line]
     41 		line = line - 1
     42 		if line > 0 and lexer.indent_amount[line] > indent_amount then break end
     43 	end
     44 
     45 	-- If the start of the paragraph does not being with a '    ' or '\t', then this line
     46 	-- is a continuation of the current paragraph, not a code block.
     47 	local text = lexer.text_range(lexer.line_start[line + 1], 4)
     48 	if not text:find('^\t') and text ~= '    ' then return false end
     49 
     50 	-- If the current paragraph is a code block, then so is this line.
     51 	if line <= 1 then return true end
     52 
     53 	-- Backtrack to see if this line is in a list item. If so, it is not a code block.
     54 	while line > 1 do
     55 		line = line - 1
     56 		local s, e = lexer.line_start[line], lexer.line_end[line]
     57 		local blank = s == e or lexer.text_range(s, e - s + 1):find('^%s+$')
     58 		if not blank and lexer.indent_amount[line] == 0 then break end
     59 	end
     60 	text = lexer.text_range(lexer.line_start[line], 8) -- note: only 2 is needed for unordered lists
     61 	if text:find('^[*+-][ \t]') then return false end
     62 	if text:find('^%d+%.[ \t]') then return false end
     63 
     64 	return true -- if all else fails, it is probably a code block
     65 end) * lexer.to_eol(), true)
     66 
     67 local code_block = lexer.range(lexer.starts_line('```', true),
     68 	'\n' * hspace^0 * '```' * hspace^0 * ('\n' + P(-1))) +
     69 	lexer.range(lexer.starts_line('~~~', true), '\n' * hspace^0 * '~~~' * hspace^0 * ('\n' + P(-1)))
     70 
     71 local code_inline = lpeg.Cmt(lpeg.C(P('`')^1), function(input, index, bt)
     72 	-- `foo`, ``foo``, ``foo`bar``, `foo``bar` are all allowed.
     73 	local _, e = input:find('[^`]' .. bt .. '%f[^`]', index)
     74 	return (e or #input) + 1
     75 end)
     76 
     77 lex:add_rule('block_code', lex:tag(lexer.CODE, code_line + code_block + code_inline))
     78 
     79 lex:add_rule('blockquote', lex:tag(lexer.STRING, lexer.starts_line('>', true)))
     80 
     81 -- Span elements.
     82 lex:add_rule('escape', lex:tag(lexer.DEFAULT, P('\\') * 1))
     83 
     84 local link_text = lexer.range('[', ']', true)
     85 local link_target =
     86 	'(' * (lexer.any - S(') \t'))^0 * (S(' \t')^1 * lexer.range('"', false, false))^-1 * ')'
     87 local link_url = 'http' * P('s')^-1 * '://' * (lexer.any - lexer.space)^1 +
     88 	('<' * lexer.alpha^2 * ':' * (lexer.any - lexer.space - '>')^1 * '>')
     89 lex:add_rule('link', lex:tag(lexer.LINK, P('!')^-1 * link_text * link_target + link_url))
     90 
     91 local link_ref = lex:tag(lexer.REFERENCE, link_text * S(' \t')^0 * lexer.range('[', ']', true))
     92 local ref_link_label = lex:tag(lexer.REFERENCE, lexer.range('[', ']', true) * ':')
     93 local ws = lex:get_rule('whitespace')
     94 local ref_link_url = lex:tag(lexer.LINK, (lexer.any - lexer.space)^1)
     95 local ref_link_title = lex:tag(lexer.STRING, lexer.range('"', true, false) +
     96 	lexer.range("'", true, false) + lexer.range('(', ')', true))
     97 lex:add_rule('link_ref', link_ref + ref_link_label * ws * ref_link_url * (ws * ref_link_title)^-1)
     98 
     99 local punct_space = lexer.punct + lexer.space
    100 
    101 -- Handles flanking delimiters as described in
    102 -- https://github.github.com/gfm/#emphasis-and-strong-emphasis in the cases where simple
    103 -- delimited ranges are not sufficient.
    104 local function flanked_range(s, not_inword)
    105 	local fl_char = lexer.any - s - lexer.space
    106 	local left_fl = B(punct_space - s) * s * #fl_char + s * #(fl_char - lexer.punct)
    107 	local right_fl = B(lexer.punct) * s * #(punct_space - s) + B(fl_char) * s
    108 	return left_fl * (lexer.any - blank_line - (not_inword and s * #punct_space or s))^0 * right_fl
    109 end
    110 
    111 local asterisk_strong = flanked_range('**')
    112 local underscore_strong = (B(punct_space) + #lexer.starts_line('_')) * flanked_range('__', true) *
    113 	#(punct_space + -1)
    114 lex:add_rule('strong', lex:tag(lexer.BOLD, asterisk_strong + underscore_strong))
    115 
    116 local asterisk_em = flanked_range('*')
    117 local underscore_em = (B(punct_space) + #lexer.starts_line('_')) * flanked_range('_', true) *
    118 	#(punct_space + -1)
    119 lex:add_rule('em', lex:tag(lexer.ITALIC, asterisk_em + underscore_em))
    120 
    121 -- Embedded HTML.
    122 local html = lexer.load('html')
    123 local start_rule = lexer.starts_line(P(' ')^-3) * #P('<') * html:get_rule('tag') -- P(' ')^4 starts code_line
    124 local end_rule = #blank_line * ws
    125 lex:embed(html, start_rule, end_rule)
    126 
    127 local FOLD_HEADER, FOLD_BASE = lexer.FOLD_HEADER, lexer.FOLD_BASE
    128 -- Fold '#' headers.
    129 function lex:fold(text, start_line, start_level)
    130 	local levels = {}
    131 	local line_num = start_line
    132 	if start_level > FOLD_HEADER then start_level = start_level - FOLD_HEADER end
    133 	for line in (text .. '\n'):gmatch('(.-)\r?\n') do
    134 		local header = line:match('^%s*(#*)')
    135 		-- If the previous line was a header, this line's level has been pre-defined.
    136 		-- Otherwise, use the previous line's level, or if starting to fold, use the start level.
    137 		local level = levels[line_num] or levels[line_num - 1] or start_level
    138 		if level > FOLD_HEADER then level = level - FOLD_HEADER end
    139 		-- If this line is a header, set its level to be one less than the header level
    140 		-- (so it can be a fold point) and mark it as a fold point.
    141 		if #header > 0 then
    142 			level = FOLD_BASE + #header - 1 + FOLD_HEADER
    143 			levels[line_num + 1] = FOLD_BASE + #header
    144 		end
    145 		levels[line_num] = level
    146 		line_num = line_num + 1
    147 	end
    148 	return levels
    149 end
    150 
    151 return lex