vis

a vi-like editor based on Plan 9's structural regular expressions

git clone https://9o.is/git/vis.git

txt2tags.lua

(5347B)


      1 -- Copyright 2019-2025 Julien L. See LICENSE.
      2 -- txt2tags LPeg lexer.
      3 -- (developed and tested with Txt2tags Markup Rules
      4 -- [https://txt2tags.org/doc/english/rules.t2t])
      5 -- Contributed by Julien L.
      6 
      7 local lexer = require('lexer')
      8 local token, word_match = lexer.token, lexer.word_match
      9 local P, S = lpeg.P, lpeg.S
     10 local nonspace = lexer.any - lexer.space
     11 
     12 local lex = lexer.new('txt2tags')
     13 
     14 -- Whitespace.
     15 local ws = token(lexer.WHITESPACE, (lexer.space - lexer.newline)^1)
     16 
     17 -- Titles
     18 local alphanumeric = lexer.alnum + S('_-')
     19 local header_label = token('header_label_start', '[') * token(lexer.LABEL, alphanumeric^1) *
     20 	token('header_label_end', ']')
     21 local function h(level)
     22 	local equal = string.rep('=', level) * (lexer.nonnewline - '=')^1 * string.rep('=', level)
     23 	local plus = string.rep('+', level) * (lexer.nonnewline - '+')^1 * string.rep('+', level)
     24 	return token('h' .. level, equal + plus) * header_label^-1
     25 end
     26 local header = h(5) + h(4) + h(3) + h(2) + h(1)
     27 
     28 -- Comments.
     29 local line_comment = lexer.to_eol(lexer.starts_line('%'))
     30 local block_comment = lexer.range(lexer.starts_line('%%%'))
     31 local comment = token(lexer.COMMENT, block_comment + line_comment)
     32 
     33 -- Inline.
     34 local function span(name, delimiter)
     35 	return token(name, (delimiter * nonspace * delimiter * S(delimiter)^0) +
     36 		(delimiter * nonspace * (lexer.nonnewline - nonspace * delimiter)^0 * nonspace * delimiter *
     37 			S(delimiter)^0))
     38 end
     39 local bold = span(lexer.BOLD, '**')
     40 local italic = span(lexer.ITALIC, '//')
     41 local underline = span(lexer.UNDERLINE, '__')
     42 local strike = span('strike', '--')
     43 local mono = span(lexer.CODE, '``')
     44 local raw = span(lexer.DEFAULT, '""')
     45 local tagged = span('tagged', "''")
     46 local inline = bold + italic + underline + strike + mono + raw + tagged
     47 
     48 -- Link.
     49 local email = token(lexer.LINK,
     50 	(nonspace - '@')^1 * '@' * (nonspace - '.')^1 * ('.' * (nonspace - S('.?'))^1)^1 *
     51 		('?' * nonspace^1)^-1)
     52 local host = token(lexer.LINK,
     53 	word_match('www ftp', true) * (nonspace - '.')^0 * '.' * (nonspace - '.')^1 * '.' *
     54 		(nonspace - S(',.'))^1)
     55 local url = token(lexer.LINK,
     56 	(nonspace - '://')^1 * '://' * (nonspace - ',' - '.')^1 * ('.' * (nonspace - S(',./?#'))^1)^1 *
     57 		('/' * (nonspace - S('./?#'))^0 * ('.' * (nonspace - S(',.?#'))^1)^0)^0 *
     58 		('?' * (nonspace - '#')^1)^-1 * ('#' * nonspace^0)^-1)
     59 local label_with_address = token(lexer.LABEL, '[') * lexer.space^0 *
     60 	token(lexer.LABEL, ((nonspace - ']')^1 * lexer.space^1)^1) * token(lexer.LINK, (nonspace - ']')^1) *
     61 	token(lexer.LABEL, ']')
     62 local link = label_with_address + url + host + email
     63 
     64 -- Line.
     65 local line = token('line', S('-=_')^20)
     66 
     67 -- Image.
     68 local image_only = token('image_start', '[') * token('image', (nonspace - ']')^1) *
     69 	token('image_end', ']')
     70 local image_link = token('image_link_start', '[') * image_only *
     71 	token('image_link_sep', lexer.space^1) * token(lexer.LINK, (nonspace - ']')^1) *
     72 	token('image_link_end', ']')
     73 local image = image_link + image_only
     74 
     75 -- Macro.
     76 local macro = token(lexer.PREPROCESSOR, '%%' * (nonspace - '(')^1 * lexer.range('(', ')', true)^-1)
     77 
     78 -- Verbatim.
     79 local verbatim_line = lexer.to_eol(lexer.starts_line('```') * S(' \t'))
     80 local verbatim_block = lexer.range(lexer.starts_line('```'))
     81 local verbatim_area = token(lexer.CODE, verbatim_block + verbatim_line)
     82 
     83 -- Raw.
     84 local raw_line = lexer.to_eol(lexer.starts_line('"""') * S(' \t'))
     85 local raw_block = lexer.range(lexer.starts_line('"""'))
     86 local raw_area = token(lexer.DEFAULT, raw_block + raw_line)
     87 
     88 -- Tagged.
     89 local tagged_line = lexer.to_eol(lexer.starts_line('\'\'\'') * S(' \t'))
     90 local tagged_block = lexer.range(lexer.starts_line('\'\'\''))
     91 local tagged_area = token('tagged_area', tagged_block + tagged_line)
     92 
     93 -- Table.
     94 local table_sep = token('table_sep', '|')
     95 local cell_content = inline + link + image + macro + token('cell_content', lexer.nonnewline - ' |')
     96 local header_cell_content = token('header_cell_content', lexer.nonnewline - ' |')
     97 local field_sep = ' ' * table_sep^1 * ' '
     98 local table_row_end = P(' ')^0 * table_sep^0
     99 local table_row = lexer.starts_line(P(' ')^0 * table_sep) * cell_content^0 *
    100 	(field_sep * cell_content^0)^0 * table_row_end
    101 local table_row_header =
    102 	lexer.starts_line(P(' ')^0 * table_sep * table_sep) * header_cell_content^0 *
    103 		(field_sep * header_cell_content^0)^0 * table_row_end
    104 local table = table_row_header + table_row
    105 
    106 lex:add_rule('table', table)
    107 lex:add_rule('link', link)
    108 lex:add_rule('line', line)
    109 lex:add_rule('header', header)
    110 lex:add_rule('comment', comment)
    111 lex:add_rule('whitespace', ws)
    112 lex:add_rule('image', image)
    113 lex:add_rule('macro', macro)
    114 lex:add_rule('inline', inline)
    115 lex:add_rule('verbatim_area', verbatim_area)
    116 lex:add_rule('raw_area', raw_area)
    117 lex:add_rule('tagged_area', tagged_area)
    118 
    119 lex:add_style('line', {bold = true})
    120 local font_size = tonumber(lexer.property_expanded['style.default']:match('size:(%d+)')) or 10
    121 for n = 5, 1, -1 do
    122 	lex:add_style('h' .. n, {fore = lexer.colors.red, size = font_size + (6 - n)})
    123 end
    124 lex:add_style('image', {fore = lexer.colors.green})
    125 lex:add_style('strike', {italics = true}) -- a strike style is not available
    126 lex:add_style('tagged', lexer.styles.embedded)
    127 lex:add_style('tagged_area', lexer.styles.embedded) -- in consistency with tagged
    128 lex:add_style('table_sep', {fore = lexer.colors.green})
    129 lex:add_style('header_cell_content', {fore = lexer.colors.green})
    130 
    131 return lex