vis

a vi-like editor based on Plan 9's structural regular expressions

git clone https://9o.is/git/vis.git

rest.lua

(8753B)


      1 -- Copyright 2006-2025 Mitchell. See LICENSE.
      2 -- reStructuredText LPeg lexer.
      3 
      4 local lexer = lexer
      5 local starts_line = lexer.starts_line
      6 local P, S = lpeg.P, lpeg.S
      7 
      8 local lex = lexer.new(...)
      9 
     10 -- Literal block.
     11 local block = '::' * (lexer.newline + -1) * function(input, index)
     12 	local rest = input:sub(index)
     13 	local level, quote = #rest:match('^([ \t]*)')
     14 	for pos, indent, line in rest:gmatch('()[ \t]*()([^\r\n]+)') do
     15 		local no_indent = (indent - pos < level and line ~= ' ' or level == 0)
     16 		local quoted = no_indent and line:find(quote or '^%s*%W')
     17 		if quoted and not quote then quote = '^%s*%' .. line:match('^%s*(%W)') end
     18 		if no_indent and not quoted and pos > 1 then return index + pos - 1 end
     19 	end
     20 	return #input + 1
     21 end
     22 lex:add_rule('literal_block', lex:tag(lexer.LABEL .. '.literal', block))
     23 
     24 -- Lists.
     25 local option_word = lexer.alnum * (lexer.alnum + '-')^0
     26 local option = S('-/') * option_word * (' ' * option_word)^-1 +
     27 	('--' * option_word * ('=' * option_word)^-1)
     28 local option_list = option * (',' * lexer.space^1 * option)^-1
     29 local bullet_list = S('*+-') -- TODO: '•‣⁃', as lpeg does not support UTF-8
     30 local enum_list = P('(')^-1 * (lexer.digit^1 + S('ivxlcmIVXLCM')^1 + lexer.alnum + '#') * S('.)')
     31 local field_list = ':' * (lexer.any - ':')^1 * P(':')^-1
     32 lex:add_rule('list',
     33 	#(lexer.space^0 * (S('*+-:/') + enum_list)) * starts_line(lex:tag(lexer.LIST, lexer.space^0 *
     34 		(option_list + bullet_list + enum_list + field_list) * lexer.space)))
     35 
     36 local any_indent = S(' \t')^0
     37 local word = lexer.alpha * (lexer.alnum + S('-.+'))^0
     38 local prefix = any_indent * '.. '
     39 
     40 -- Explicit markup blocks.
     41 local footnote_label = '[' * (lexer.digit^1 + '#' * word^-1 + '*') * ']'
     42 local footnote = lex:tag(lexer.LABEL .. '.footnote', prefix * footnote_label * lexer.space)
     43 local citation_label = '[' * word * ']'
     44 local citation = lex:tag(lexer.LABEL .. '.citation', prefix * citation_label * lexer.space)
     45 local link = lex:tag(lexer.LABEL .. '.link', prefix * '_' *
     46 	(lexer.range('`') + (P('\\') * 1 + lexer.nonnewline - ':')^1) * ':' * lexer.space)
     47 lex:add_rule('markup_block', #prefix * starts_line(footnote + citation + link))
     48 
     49 -- Sphinx code block.
     50 local indented_block = function(input, index)
     51 	local rest = input:sub(index)
     52 	local level = #rest:match('^([ \t]*)')
     53 	for pos, indent, line in rest:gmatch('()[ \t]*()([^\r\n]+)') do
     54 		if indent - pos < level and line ~= ' ' or level == 0 and pos > 1 then return index + pos - 1 end
     55 	end
     56 	return #input + 1
     57 end
     58 local code_block =
     59 	prefix * 'code-block::' * S(' \t')^1 * lexer.nonnewline^0 * (lexer.newline + -1) * indented_block
     60 lex:add_rule('code_block', #prefix * lex:tag(lexer.LABEL .. '.code', starts_line(code_block)))
     61 
     62 -- Directives.
     63 local known_directive = lex:tag(lexer.KEYWORD,
     64 	prefix * lex:word_match(lexer.KEYWORD) * '::' * lexer.space)
     65 local sphinx_directive = lex:tag(lexer.KEYWORD .. '.sphinx', prefix *
     66 	lex:word_match(lexer.KEYWORD .. '.sphinx') * '::' * lexer.space)
     67 local unknown_directive = lex:tag(lexer.KEYWORD .. '.unknown', prefix * word * '::' * lexer.space)
     68 lex:add_rule('directive',
     69 	#prefix * starts_line(known_directive + sphinx_directive + unknown_directive))
     70 
     71 -- Substitution definitions.
     72 lex:add_rule('substitution',
     73 	#prefix * lex:tag(lexer.FUNCTION, starts_line(prefix * lexer.range('|') * lexer.space^1 * word *
     74 		'::' * lexer.space)))
     75 
     76 -- Comments.
     77 local line_comment = lexer.to_eol(prefix)
     78 local bprefix = any_indent * '..'
     79 local block_comment = bprefix * lexer.newline * indented_block
     80 lex:add_rule('comment', #bprefix * lex:tag(lexer.COMMENT, starts_line(line_comment + block_comment)))
     81 
     82 -- Section titles (2 or more characters).
     83 local adornment_chars = lpeg.C(S('!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'))
     84 local adornment = lpeg.C(adornment_chars^2 * any_indent) * (lexer.newline + -1)
     85 local overline = lpeg.Cmt(starts_line(adornment), function(input, index, adm, c)
     86 	if not adm:find('^%' .. c .. '+%s*$') then return nil end
     87 	local rest = input:sub(index)
     88 	local lines = 1
     89 	for line, e in rest:gmatch('([^\r\n]+)()') do
     90 		if lines > 1 and line:match('^(%' .. c .. '+)%s*$') == adm then return index + e - 1 end
     91 		if lines > 3 or #line > #adm then return nil end
     92 		lines = lines + 1
     93 	end
     94 	return #input + 1
     95 end)
     96 local underline = lpeg.Cmt(starts_line(adornment), function(_, index, adm, c)
     97 	local pos = adm:match('^%' .. c .. '+%s*()$')
     98 	return pos and index - #adm + pos - 1 or nil
     99 end)
    100 -- Token needs to be a predefined one in order for folder to work.
    101 lex:add_rule('title', lex:tag(lexer.HEADING, overline + underline))
    102 
    103 -- Line block.
    104 lex:add_rule('line_block_char', lex:tag(lexer.OPERATOR, starts_line(any_indent * '|')))
    105 
    106 -- Whitespace.
    107 lex:add_rule('whitespace', lex:tag(lexer.WHITESPACE, S(' \t')^1 + lexer.newline^1))
    108 
    109 -- Inline markup.
    110 local strong = lex:tag(lexer.BOLD, lexer.range('**'))
    111 local em = lex:tag(lexer.ITALIC, lexer.range('*'))
    112 local inline_literal = lex:tag(lexer.CODE .. '.inline', lexer.range('``'))
    113 local postfix_link = (word + lexer.range('`')) * '_' * P('_')^-1
    114 local prefix_link = '_' * lexer.range('`')
    115 local link_ref = lex:tag(lexer.LINK, postfix_link + prefix_link)
    116 local role = lex:tag(lexer.FUNCTION_BUILTIN, ':' * word * ':' * (word * ':')^-1)
    117 local interpreted = role^-1 * lex:tag(lexer.EMBEDDED, lexer.range('`')) * role^-1
    118 local footnote_ref = lex:tag(lexer.REFERENCE, footnote_label * '_')
    119 local citation_ref = lex:tag(lexer.REFERENCE, citation_label * '_')
    120 local substitution_ref = lex:tag(lexer.FUNCTION, lexer.range('|', true) * ('_' * P('_')^-1)^-1)
    121 local link = lex:tag(lexer.LINK,
    122 	lexer.alpha * (lexer.alnum + S('-.'))^1 * ':' * (lexer.alnum + S('/.+-%@'))^1)
    123 lex:add_rule('inline_markup',
    124 	(strong + em + inline_literal + link_ref + interpreted + footnote_ref + citation_ref +
    125 		substitution_ref + link) * -lexer.alnum)
    126 
    127 -- Other.
    128 lex:add_rule('non_space', lex:tag(lexer.DEFAULT, lexer.alnum * (lexer.any - lexer.space)^0))
    129 lex:add_rule('escape', lex:tag(lexer.DEFAULT, '\\' * lexer.any))
    130 
    131 -- Section-based folding.
    132 local sphinx_levels = {
    133 	['#'] = 0, ['*'] = 1, ['='] = 2, ['-'] = 3, ['^'] = 4, ['"'] = 5
    134 }
    135 
    136 function lex:fold(text, start_line, start_level)
    137 	local folds, line_starts = {}, {}
    138 	for pos in (text .. '\n'):gmatch('().-\r?\n') do line_starts[#line_starts + 1] = pos end
    139 	local style_at, CONSTANT, level = lexer.style_at, lexer.CONSTANT, start_level
    140 	local sphinx = lexer.property_int['fold.scintillua.rest.by.sphinx.convention'] > 0
    141 	local FOLD_BASE = lexer.FOLD_BASE
    142 	local FOLD_HEADER, FOLD_BLANK = lexer.FOLD_HEADER, lexer.FOLD_BLANK
    143 	for i = 1, #line_starts do
    144 		local pos, next_pos = line_starts[i], line_starts[i + 1]
    145 		local c = text:sub(pos, pos)
    146 		local line_num = start_line + i - 1
    147 		folds[line_num] = level
    148 		if style_at[pos - 1] == CONSTANT and c:find('^[^%w%s]') then
    149 			local sphinx_level = FOLD_BASE + (sphinx_levels[c] or #sphinx_levels)
    150 			level = not sphinx and level - 1 or sphinx_level
    151 			if level < FOLD_BASE then level = FOLD_BASE end
    152 			folds[line_num - 1], folds[line_num] = level, level + FOLD_HEADER
    153 			level = (not sphinx and level or sphinx_level) + 1
    154 		elseif c == '\r' or c == '\n' then
    155 			folds[line_num] = level + FOLD_BLANK
    156 		end
    157 	end
    158 	return folds
    159 end
    160 
    161 --[[ Embedded languages.
    162 local bash = lexer.load('bash')
    163 local bash_indent_level
    164 local start_rule =
    165   #(prefix * 'code-block' * '::' * lexer.space^1 * 'bash' * (lexer.newline + -1)) *
    166   sphinx_directive * lex:tag(lexer.EMBEDDED, P(function(input, index)
    167     bash_indent_level = #input:match('^([ \t]*)', index)
    168     return index
    169   end))]]
    170 
    171 -- Word lists
    172 lex:set_word_list(lexer.KEYWORD, {
    173 	-- Admonitions
    174 	'attention', 'caution', 'danger', 'error', 'hint', 'important', 'note', 'tip', 'warning',
    175 	'admonition',
    176 	-- Images
    177 	'image', 'figure',
    178 	-- Body elements
    179 	'topic', 'sidebar', 'line-block', 'parsed-literal', 'code', 'math', 'rubric', 'epigraph',
    180 	'highlights', 'pull-quote', 'compound', 'container',
    181 	-- Table
    182 	'table', 'csv-table', 'list-table',
    183 	-- Document parts
    184 	'contents', 'sectnum', 'section-autonumbering', 'header', 'footer',
    185 	-- References
    186 	'target-notes', 'footnotes', 'citations',
    187 	-- HTML-specific
    188 	'meta',
    189 	-- Directives for substitution definitions
    190 	'replace', 'unicode', 'date',
    191 	-- Miscellaneous
    192 	'include', 'raw', 'class', 'role', 'default-role', 'title', 'restructuredtext-test-directive'
    193 })
    194 
    195 lex:set_word_list(lexer.KEYWORD .. '.sphinx', {
    196 	-- The TOC tree.
    197 	'toctree',
    198 	-- Paragraph-level markup.
    199 	'note', 'warning', 'versionadded', 'versionchanged', 'deprecated', 'seealso', 'rubric',
    200 	'centered', 'hlist', 'glossary', 'productionlist',
    201 	-- Showing code examples.
    202 	'highlight', 'literalinclude',
    203 	-- Miscellaneous
    204 	'sectionauthor', 'index', 'only', 'tabularcolumns'
    205 })
    206 
    207 -- lexer.property['fold.by.sphinx.convention'] = '0'
    208 lexer.property['scintillua.comment'] = '.. '
    209 
    210 return lex