vis
a vi-like editor based on Plan 9's structural regular expressions
git clone https://9o.is/git/vis.git
txt2tags.lua
(5347B)
1 -- Copyright 2019-2025 Julien L. See LICENSE.
2 -- txt2tags LPeg lexer.
3 -- (developed and tested with Txt2tags Markup Rules
4 -- [https://txt2tags.org/doc/english/rules.t2t])
5 -- Contributed by Julien L.
6
7 local lexer = require('lexer')
8 local token, word_match = lexer.token, lexer.word_match
9 local P, S = lpeg.P, lpeg.S
10 local nonspace = lexer.any - lexer.space
11
12 local lex = lexer.new('txt2tags')
13
14 -- Whitespace.
15 local ws = token(lexer.WHITESPACE, (lexer.space - lexer.newline)^1)
16
17 -- Titles
18 local alphanumeric = lexer.alnum + S('_-')
19 local header_label = token('header_label_start', '[') * token(lexer.LABEL, alphanumeric^1) *
20 token('header_label_end', ']')
21 local function h(level)
22 local equal = string.rep('=', level) * (lexer.nonnewline - '=')^1 * string.rep('=', level)
23 local plus = string.rep('+', level) * (lexer.nonnewline - '+')^1 * string.rep('+', level)
24 return token('h' .. level, equal + plus) * header_label^-1
25 end
26 local header = h(5) + h(4) + h(3) + h(2) + h(1)
27
28 -- Comments.
29 local line_comment = lexer.to_eol(lexer.starts_line('%'))
30 local block_comment = lexer.range(lexer.starts_line('%%%'))
31 local comment = token(lexer.COMMENT, block_comment + line_comment)
32
33 -- Inline.
34 local function span(name, delimiter)
35 return token(name, (delimiter * nonspace * delimiter * S(delimiter)^0) +
36 (delimiter * nonspace * (lexer.nonnewline - nonspace * delimiter)^0 * nonspace * delimiter *
37 S(delimiter)^0))
38 end
39 local bold = span(lexer.BOLD, '**')
40 local italic = span(lexer.ITALIC, '//')
41 local underline = span(lexer.UNDERLINE, '__')
42 local strike = span('strike', '--')
43 local mono = span(lexer.CODE, '``')
44 local raw = span(lexer.DEFAULT, '""')
45 local tagged = span('tagged', "''")
46 local inline = bold + italic + underline + strike + mono + raw + tagged
47
48 -- Link.
49 local email = token(lexer.LINK,
50 (nonspace - '@')^1 * '@' * (nonspace - '.')^1 * ('.' * (nonspace - S('.?'))^1)^1 *
51 ('?' * nonspace^1)^-1)
52 local host = token(lexer.LINK,
53 word_match('www ftp', true) * (nonspace - '.')^0 * '.' * (nonspace - '.')^1 * '.' *
54 (nonspace - S(',.'))^1)
55 local url = token(lexer.LINK,
56 (nonspace - '://')^1 * '://' * (nonspace - ',' - '.')^1 * ('.' * (nonspace - S(',./?#'))^1)^1 *
57 ('/' * (nonspace - S('./?#'))^0 * ('.' * (nonspace - S(',.?#'))^1)^0)^0 *
58 ('?' * (nonspace - '#')^1)^-1 * ('#' * nonspace^0)^-1)
59 local label_with_address = token(lexer.LABEL, '[') * lexer.space^0 *
60 token(lexer.LABEL, ((nonspace - ']')^1 * lexer.space^1)^1) * token(lexer.LINK, (nonspace - ']')^1) *
61 token(lexer.LABEL, ']')
62 local link = label_with_address + url + host + email
63
64 -- Line.
65 local line = token('line', S('-=_')^20)
66
67 -- Image.
68 local image_only = token('image_start', '[') * token('image', (nonspace - ']')^1) *
69 token('image_end', ']')
70 local image_link = token('image_link_start', '[') * image_only *
71 token('image_link_sep', lexer.space^1) * token(lexer.LINK, (nonspace - ']')^1) *
72 token('image_link_end', ']')
73 local image = image_link + image_only
74
75 -- Macro.
76 local macro = token(lexer.PREPROCESSOR, '%%' * (nonspace - '(')^1 * lexer.range('(', ')', true)^-1)
77
78 -- Verbatim.
79 local verbatim_line = lexer.to_eol(lexer.starts_line('```') * S(' \t'))
80 local verbatim_block = lexer.range(lexer.starts_line('```'))
81 local verbatim_area = token(lexer.CODE, verbatim_block + verbatim_line)
82
83 -- Raw.
84 local raw_line = lexer.to_eol(lexer.starts_line('"""') * S(' \t'))
85 local raw_block = lexer.range(lexer.starts_line('"""'))
86 local raw_area = token(lexer.DEFAULT, raw_block + raw_line)
87
88 -- Tagged.
89 local tagged_line = lexer.to_eol(lexer.starts_line('\'\'\'') * S(' \t'))
90 local tagged_block = lexer.range(lexer.starts_line('\'\'\''))
91 local tagged_area = token('tagged_area', tagged_block + tagged_line)
92
93 -- Table.
94 local table_sep = token('table_sep', '|')
95 local cell_content = inline + link + image + macro + token('cell_content', lexer.nonnewline - ' |')
96 local header_cell_content = token('header_cell_content', lexer.nonnewline - ' |')
97 local field_sep = ' ' * table_sep^1 * ' '
98 local table_row_end = P(' ')^0 * table_sep^0
99 local table_row = lexer.starts_line(P(' ')^0 * table_sep) * cell_content^0 *
100 (field_sep * cell_content^0)^0 * table_row_end
101 local table_row_header =
102 lexer.starts_line(P(' ')^0 * table_sep * table_sep) * header_cell_content^0 *
103 (field_sep * header_cell_content^0)^0 * table_row_end
104 local table = table_row_header + table_row
105
106 lex:add_rule('table', table)
107 lex:add_rule('link', link)
108 lex:add_rule('line', line)
109 lex:add_rule('header', header)
110 lex:add_rule('comment', comment)
111 lex:add_rule('whitespace', ws)
112 lex:add_rule('image', image)
113 lex:add_rule('macro', macro)
114 lex:add_rule('inline', inline)
115 lex:add_rule('verbatim_area', verbatim_area)
116 lex:add_rule('raw_area', raw_area)
117 lex:add_rule('tagged_area', tagged_area)
118
119 lex:add_style('line', {bold = true})
120 local font_size = tonumber(lexer.property_expanded['style.default']:match('size:(%d+)')) or 10
121 for n = 5, 1, -1 do
122 lex:add_style('h' .. n, {fore = lexer.colors.red, size = font_size + (6 - n)})
123 end
124 lex:add_style('image', {fore = lexer.colors.green})
125 lex:add_style('strike', {italics = true}) -- a strike style is not available
126 lex:add_style('tagged', lexer.styles.embedded)
127 lex:add_style('tagged_area', lexer.styles.embedded) -- in consistency with tagged
128 lex:add_style('table_sep', {fore = lexer.colors.green})
129 lex:add_style('header_cell_content', {fore = lexer.colors.green})
130
131 return lex