vis
a vi-like editor based on Plan 9's structural regular expressions
git clone https://9o.is/git/vis.git
perl.lua
(7370B)
1 -- Copyright 2006-2025 Mitchell. See LICENSE.
2 -- Perl LPeg lexer.
3
4 local lexer = lexer
5 local P, S = lpeg.P, lpeg.S
6
7 local lex = lexer.new(...)
8
9 -- Keywords.
10 lex:add_rule('keyword', lex:tag(lexer.KEYWORD, lex:word_match(lexer.KEYWORD)))
11
12 -- Markers.
13 lex:add_rule('marker', lex:tag(lexer.COMMENT, lexer.word_match('__DATA__ __END__') * lexer.any^0))
14
15 -- Strings.
16 local delimiter_matches = {['('] = ')', ['['] = ']', ['{'] = '}', ['<'] = '>'}
17 local literal_delimited = P(function(input, index) -- for single delimiter sets
18 local delimiter = input:sub(index, index)
19 if not delimiter:find('%w') then -- only non alpha-numerics
20 local patt
21 if delimiter_matches[delimiter] then
22 -- Handle nested delimiter/matches in strings.
23 local s, e = delimiter, delimiter_matches[delimiter]
24 patt = lexer.range(s, e, false, true, true)
25 else
26 patt = lexer.range(delimiter)
27 end
28 local match_pos = lpeg.match(patt, input, index)
29 return match_pos or #input + 1
30 end
31 end)
32 local literal_delimited2 = P(function(input, index) -- for 2 delimiter sets
33 local delimiter = input:sub(index, index)
34 -- Only consider non-alpha-numerics and non-spaces as delimiters. The non-spaces are used to
35 -- ignore operators like "-s".
36 if not delimiter:find('[%w ]') then
37 local patt
38 if delimiter_matches[delimiter] then
39 -- Handle nested delimiter/matches in strings.
40 local s, e = delimiter, delimiter_matches[delimiter]
41 patt = lexer.range(s, e, false, true, true)
42 else
43 patt = lexer.range(delimiter)
44 end
45 local first_match_pos = lpeg.match(patt, input, index)
46 local final_match_pos = lpeg.match(patt, input, first_match_pos - 1)
47 if not final_match_pos then -- using (), [], {}, or <> notation
48 final_match_pos = lpeg.match(lexer.space^0 * patt, input, first_match_pos)
49 end
50 if final_match_pos and final_match_pos < index then final_match_pos = index end
51 return final_match_pos or #input + 1
52 end
53 end)
54
55 local sq_str = lexer.range("'")
56 local dq_str = lexer.range('"')
57 local cmd_str = lexer.range('`')
58 local heredoc = '<<' * P(function(input, index)
59 local s, e, delimiter = input:find('([%a_][%w_]*)[\n\r\f;]+', index)
60 if s == index and delimiter then
61 local end_heredoc = '[\n\r\f]+'
62 e = select(2, input:find(end_heredoc .. delimiter, e))
63 return e and e + 1 or #input + 1
64 end
65 end)
66 local lit_str = 'q' * P('q')^-1 * literal_delimited
67 local lit_array = 'qw' * literal_delimited
68 local lit_cmd = 'qx' * literal_delimited
69 local string = lex:tag(lexer.STRING,
70 sq_str + dq_str + cmd_str + heredoc + lit_str + lit_array + lit_cmd)
71 local regex_str = lexer.after_set('-<>+*!~\\=%&|^?:;([{', lexer.range('/', true) * S('imosx')^0)
72 local lit_regex = 'qr' * literal_delimited * S('imosx')^0
73 local lit_match = 'm' * literal_delimited * S('cgimosx')^0
74 local lit_sub = 's' * literal_delimited2 * S('ecgimosx')^0
75 local lit_tr = (P('tr') + 'y') * literal_delimited2 * S('cds')^0
76 local regex = lex:tag(lexer.REGEX, regex_str + lit_regex + lit_match + lit_sub + lit_tr)
77 lex:add_rule('string', string + regex)
78
79 -- Functions.
80 lex:add_rule('function_builtin',
81 lex:tag(lexer.FUNCTION_BUILTIN, lex:word_match(lexer.FUNCTION_BUILTIN)) *
82 #(lexer.space^0 * P('(')^-1))
83 local func = lex:tag(lexer.FUNCTION, lexer.word)
84 local method = lpeg.B('->') * lex:tag(lexer.FUNCTION_METHOD, lexer.word)
85 lex:add_rule('function', (method + func) * #(lexer.space^0 * '('))
86
87 -- Constants.
88 lex:add_rule('constant', lex:tag(lexer.CONSTANT_BUILTIN, lex:word_match(lexer.CONSTANT_BUILTIN)))
89
90 -- Identifiers.
91 lex:add_rule('identifier', lex:tag(lexer.IDENTIFIER, lexer.word))
92
93 -- Comments.
94 local line_comment = lexer.to_eol('#', true)
95 local block_comment = lexer.range(lexer.starts_line('=' * lexer.alpha), lexer.starts_line('=cut'))
96 lex:add_rule('comment', lex:tag(lexer.COMMENT, block_comment + line_comment))
97
98 -- Numbers.
99 lex:add_rule('number', lex:tag(lexer.NUMBER, lexer.number_('_')))
100
101 -- Variables.
102 local builtin_var_s = '$' *
103 (lpeg.R('09') + S('!"$%&\'()+,-./:;<=>?@\\]_`|~') + '^' * S('ACDEFHILMNOPRSTVWX')^-1 + 'ARGV')
104 local builtin_var_a = '@' * (S('+-_F') + 'ARGV' + 'INC' + 'ISA')
105 local builtin_var_h = '%' * (S('+-!') + '^' * S('H')^-1 + 'ENV' + 'INC' + 'SIG')
106 lex:add_rule('variable_builtin',
107 lex:tag(lexer.VARIABLE_BUILTIN, builtin_var_s + builtin_var_a + builtin_var_h))
108 local special_var = '$' *
109 ('^' * S('ADEFHILMOPSTWX')^-1 + S('\\"[]\'&`+*.,;=%~?@<>(|/!-') + ':' * (lexer.any - ':') +
110 (P('$') * -lexer.word) + lexer.digit^1)
111 local plain_var = ('$#' + S('$@%')) * P('$')^0 * lexer.word + '$#'
112 lex:add_rule('variable', lex:tag(lexer.VARIABLE, special_var + plain_var))
113
114 -- Operators.
115 lex:add_rule('operator', lex:tag(lexer.OPERATOR, S('-<>+*!~\\=/%&|^.,?:;()[]{}')))
116
117 -- Fold points.
118 lex:add_fold_point(lexer.OPERATOR, '[', ']')
119 lex:add_fold_point(lexer.OPERATOR, '{', '}')
120
121 -- Word lists.
122 lex:set_word_list(lexer.KEYWORD, {
123 'STDIN', 'STDOUT', 'STDERR', 'BEGIN', 'END', 'CHECK', 'INIT', --
124 'require', 'use', --
125 'break', 'continue', 'do', 'each', 'else', 'elsif', 'foreach', 'for', 'if', 'last', 'local', 'my',
126 'next', 'our', 'package', 'return', 'sub', 'unless', 'until', 'while', '__FILE__', '__LINE__',
127 '__PACKAGE__', --
128 'and', 'or', 'not', 'eq', 'ne', 'lt', 'gt', 'le', 'ge'
129 })
130
131 lex:set_word_list(lexer.FUNCTION_BUILTIN, {
132 'abs', 'accept', 'alarm', 'atan2', 'bind', 'binmode', 'bless', 'caller', 'chdir', 'chmod',
133 'chomp', 'chop', 'chown', 'chr', 'chroot', 'closedir', 'close', 'connect', 'cos', 'crypt',
134 'dbmclose', 'dbmopen', 'defined', 'delete', 'die', 'dump', 'each', 'endgrent', 'endhostent',
135 'endnetent', 'endprotoent', 'endpwent', 'endservent', 'eof', 'eval', 'exec', 'exists', 'exit',
136 'exp', 'fcntl', 'fileno', 'flock', 'fork', 'format', 'formline', 'getc', 'getgrent', 'getgrgid',
137 'getgrnam', 'gethostbyaddr', 'gethostbyname', 'gethostent', 'getlogin', 'getnetbyaddr',
138 'getnetbyname', 'getnetent', 'getpeername', 'getpgrp', 'getppid', 'getpriority', 'getprotobyname',
139 'getprotobynumber', 'getprotoent', 'getpwent', 'getpwnam', 'getpwuid', 'getservbyname',
140 'getservbyport', 'getservent', 'getsockname', 'getsockopt', 'glob', 'gmtime', 'goto', 'grep',
141 'hex', 'import', 'index', 'int', 'ioctl', 'join', 'keys', 'kill', 'lcfirst', 'lc', 'length',
142 'link', 'listen', 'localtime', 'log', 'lstat', 'map', 'mkdir', 'msgctl', 'msgget', 'msgrcv',
143 'msgsnd', 'new', 'oct', 'opendir', 'open', 'ord', 'pack', 'pipe', 'pop', 'pos', 'printf', 'print',
144 'prototype', 'push', 'quotemeta', 'rand', 'readdir', 'read', 'readlink', 'recv', 'redo', 'ref',
145 'rename', 'reset', 'reverse', 'rewinddir', 'rindex', 'rmdir', 'scalar', 'seekdir', 'seek',
146 'select', 'semctl', 'semget', 'semop', 'send', 'setgrent', 'sethostent', 'setnetent', 'setpgrp',
147 'setpriority', 'setprotoent', 'setpwent', 'setservent', 'setsockopt', 'shift', 'shmctl', 'shmget',
148 'shmread', 'shmwrite', 'shutdown', 'sin', 'sleep', 'socket', 'socketpair', 'sort', 'splice',
149 'split', 'sprintf', 'sqrt', 'srand', 'stat', 'study', 'substr', 'symlink', 'syscall', 'sysread',
150 'sysseek', 'system', 'syswrite', 'telldir', 'tell', 'tied', 'tie', 'time', 'times', 'truncate',
151 'ucfirst', 'uc', 'umask', 'undef', 'unlink', 'unpack', 'unshift', 'untie', 'utime', 'values',
152 'vec', 'wait', 'waitpid', 'wantarray', 'warn', 'write'
153 })
154
155 lex:set_word_list(lexer.CONSTANT_BUILTIN, {
156 'ARGV', 'ARGVOUT', 'DATA', 'ENV', 'INC', 'SIG', 'STDERR', 'STDIN', 'STDOUT'
157 })
158
159 lexer.property['scintillua.comment'] = '#'
160
161 return lex