vis

a vi-like editor based on Plan 9's structural regular expressions

git clone https://9o.is/git/vis.git

perl.lua

(7370B)


      1 -- Copyright 2006-2025 Mitchell. See LICENSE.
      2 -- Perl LPeg lexer.
      3 
      4 local lexer = lexer
      5 local P, S = lpeg.P, lpeg.S
      6 
      7 local lex = lexer.new(...)
      8 
      9 -- Keywords.
     10 lex:add_rule('keyword', lex:tag(lexer.KEYWORD, lex:word_match(lexer.KEYWORD)))
     11 
     12 -- Markers.
     13 lex:add_rule('marker', lex:tag(lexer.COMMENT, lexer.word_match('__DATA__ __END__') * lexer.any^0))
     14 
     15 -- Strings.
     16 local delimiter_matches = {['('] = ')', ['['] = ']', ['{'] = '}', ['<'] = '>'}
     17 local literal_delimited = P(function(input, index) -- for single delimiter sets
     18 	local delimiter = input:sub(index, index)
     19 	if not delimiter:find('%w') then -- only non alpha-numerics
     20 		local patt
     21 		if delimiter_matches[delimiter] then
     22 			-- Handle nested delimiter/matches in strings.
     23 			local s, e = delimiter, delimiter_matches[delimiter]
     24 			patt = lexer.range(s, e, false, true, true)
     25 		else
     26 			patt = lexer.range(delimiter)
     27 		end
     28 		local match_pos = lpeg.match(patt, input, index)
     29 		return match_pos or #input + 1
     30 	end
     31 end)
     32 local literal_delimited2 = P(function(input, index) -- for 2 delimiter sets
     33 	local delimiter = input:sub(index, index)
     34 	-- Only consider non-alpha-numerics and non-spaces as delimiters. The non-spaces are used to
     35 	-- ignore operators like "-s".
     36 	if not delimiter:find('[%w ]') then
     37 		local patt
     38 		if delimiter_matches[delimiter] then
     39 			-- Handle nested delimiter/matches in strings.
     40 			local s, e = delimiter, delimiter_matches[delimiter]
     41 			patt = lexer.range(s, e, false, true, true)
     42 		else
     43 			patt = lexer.range(delimiter)
     44 		end
     45 		local first_match_pos = lpeg.match(patt, input, index)
     46 		local final_match_pos = lpeg.match(patt, input, first_match_pos - 1)
     47 		if not final_match_pos then -- using (), [], {}, or <> notation
     48 			final_match_pos = lpeg.match(lexer.space^0 * patt, input, first_match_pos)
     49 		end
     50 		if final_match_pos and final_match_pos < index then final_match_pos = index end
     51 		return final_match_pos or #input + 1
     52 	end
     53 end)
     54 
     55 local sq_str = lexer.range("'")
     56 local dq_str = lexer.range('"')
     57 local cmd_str = lexer.range('`')
     58 local heredoc = '<<' * P(function(input, index)
     59 	local s, e, delimiter = input:find('([%a_][%w_]*)[\n\r\f;]+', index)
     60 	if s == index and delimiter then
     61 		local end_heredoc = '[\n\r\f]+'
     62 		e = select(2, input:find(end_heredoc .. delimiter, e))
     63 		return e and e + 1 or #input + 1
     64 	end
     65 end)
     66 local lit_str = 'q' * P('q')^-1 * literal_delimited
     67 local lit_array = 'qw' * literal_delimited
     68 local lit_cmd = 'qx' * literal_delimited
     69 local string = lex:tag(lexer.STRING,
     70 	sq_str + dq_str + cmd_str + heredoc + lit_str + lit_array + lit_cmd)
     71 local regex_str = lexer.after_set('-<>+*!~\\=%&|^?:;([{', lexer.range('/', true) * S('imosx')^0)
     72 local lit_regex = 'qr' * literal_delimited * S('imosx')^0
     73 local lit_match = 'm' * literal_delimited * S('cgimosx')^0
     74 local lit_sub = 's' * literal_delimited2 * S('ecgimosx')^0
     75 local lit_tr = (P('tr') + 'y') * literal_delimited2 * S('cds')^0
     76 local regex = lex:tag(lexer.REGEX, regex_str + lit_regex + lit_match + lit_sub + lit_tr)
     77 lex:add_rule('string', string + regex)
     78 
     79 -- Functions.
     80 lex:add_rule('function_builtin',
     81 	lex:tag(lexer.FUNCTION_BUILTIN, lex:word_match(lexer.FUNCTION_BUILTIN)) *
     82 		#(lexer.space^0 * P('(')^-1))
     83 local func = lex:tag(lexer.FUNCTION, lexer.word)
     84 local method = lpeg.B('->') * lex:tag(lexer.FUNCTION_METHOD, lexer.word)
     85 lex:add_rule('function', (method + func) * #(lexer.space^0 * '('))
     86 
     87 -- Constants.
     88 lex:add_rule('constant', lex:tag(lexer.CONSTANT_BUILTIN, lex:word_match(lexer.CONSTANT_BUILTIN)))
     89 
     90 -- Identifiers.
     91 lex:add_rule('identifier', lex:tag(lexer.IDENTIFIER, lexer.word))
     92 
     93 -- Comments.
     94 local line_comment = lexer.to_eol('#', true)
     95 local block_comment = lexer.range(lexer.starts_line('=' * lexer.alpha), lexer.starts_line('=cut'))
     96 lex:add_rule('comment', lex:tag(lexer.COMMENT, block_comment + line_comment))
     97 
     98 -- Numbers.
     99 lex:add_rule('number', lex:tag(lexer.NUMBER, lexer.number_('_')))
    100 
    101 -- Variables.
    102 local builtin_var_s = '$' *
    103 	(lpeg.R('09') + S('!"$%&\'()+,-./:;<=>?@\\]_`|~') + '^' * S('ACDEFHILMNOPRSTVWX')^-1 + 'ARGV')
    104 local builtin_var_a = '@' * (S('+-_F') + 'ARGV' + 'INC' + 'ISA')
    105 local builtin_var_h = '%' * (S('+-!') + '^' * S('H')^-1 + 'ENV' + 'INC' + 'SIG')
    106 lex:add_rule('variable_builtin',
    107 	lex:tag(lexer.VARIABLE_BUILTIN, builtin_var_s + builtin_var_a + builtin_var_h))
    108 local special_var = '$' *
    109 	('^' * S('ADEFHILMOPSTWX')^-1 + S('\\"[]\'&`+*.,;=%~?@<>(|/!-') + ':' * (lexer.any - ':') +
    110 		(P('$') * -lexer.word) + lexer.digit^1)
    111 local plain_var = ('$#' + S('$@%')) * P('$')^0 * lexer.word + '$#'
    112 lex:add_rule('variable', lex:tag(lexer.VARIABLE, special_var + plain_var))
    113 
    114 -- Operators.
    115 lex:add_rule('operator', lex:tag(lexer.OPERATOR, S('-<>+*!~\\=/%&|^.,?:;()[]{}')))
    116 
    117 -- Fold points.
    118 lex:add_fold_point(lexer.OPERATOR, '[', ']')
    119 lex:add_fold_point(lexer.OPERATOR, '{', '}')
    120 
    121 -- Word lists.
    122 lex:set_word_list(lexer.KEYWORD, {
    123 	'STDIN', 'STDOUT', 'STDERR', 'BEGIN', 'END', 'CHECK', 'INIT', --
    124 	'require', 'use', --
    125 	'break', 'continue', 'do', 'each', 'else', 'elsif', 'foreach', 'for', 'if', 'last', 'local', 'my',
    126 	'next', 'our', 'package', 'return', 'sub', 'unless', 'until', 'while', '__FILE__', '__LINE__',
    127 	'__PACKAGE__', --
    128 	'and', 'or', 'not', 'eq', 'ne', 'lt', 'gt', 'le', 'ge'
    129 })
    130 
    131 lex:set_word_list(lexer.FUNCTION_BUILTIN, {
    132 	'abs', 'accept', 'alarm', 'atan2', 'bind', 'binmode', 'bless', 'caller', 'chdir', 'chmod',
    133 	'chomp', 'chop', 'chown', 'chr', 'chroot', 'closedir', 'close', 'connect', 'cos', 'crypt',
    134 	'dbmclose', 'dbmopen', 'defined', 'delete', 'die', 'dump', 'each', 'endgrent', 'endhostent',
    135 	'endnetent', 'endprotoent', 'endpwent', 'endservent', 'eof', 'eval', 'exec', 'exists', 'exit',
    136 	'exp', 'fcntl', 'fileno', 'flock', 'fork', 'format', 'formline', 'getc', 'getgrent', 'getgrgid',
    137 	'getgrnam', 'gethostbyaddr', 'gethostbyname', 'gethostent', 'getlogin', 'getnetbyaddr',
    138 	'getnetbyname', 'getnetent', 'getpeername', 'getpgrp', 'getppid', 'getpriority', 'getprotobyname',
    139 	'getprotobynumber', 'getprotoent', 'getpwent', 'getpwnam', 'getpwuid', 'getservbyname',
    140 	'getservbyport', 'getservent', 'getsockname', 'getsockopt', 'glob', 'gmtime', 'goto', 'grep',
    141 	'hex', 'import', 'index', 'int', 'ioctl', 'join', 'keys', 'kill', 'lcfirst', 'lc', 'length',
    142 	'link', 'listen', 'localtime', 'log', 'lstat', 'map', 'mkdir', 'msgctl', 'msgget', 'msgrcv',
    143 	'msgsnd', 'new', 'oct', 'opendir', 'open', 'ord', 'pack', 'pipe', 'pop', 'pos', 'printf', 'print',
    144 	'prototype', 'push', 'quotemeta', 'rand', 'readdir', 'read', 'readlink', 'recv', 'redo', 'ref',
    145 	'rename', 'reset', 'reverse', 'rewinddir', 'rindex', 'rmdir', 'scalar', 'seekdir', 'seek',
    146 	'select', 'semctl', 'semget', 'semop', 'send', 'setgrent', 'sethostent', 'setnetent', 'setpgrp',
    147 	'setpriority', 'setprotoent', 'setpwent', 'setservent', 'setsockopt', 'shift', 'shmctl', 'shmget',
    148 	'shmread', 'shmwrite', 'shutdown', 'sin', 'sleep', 'socket', 'socketpair', 'sort', 'splice',
    149 	'split', 'sprintf', 'sqrt', 'srand', 'stat', 'study', 'substr', 'symlink', 'syscall', 'sysread',
    150 	'sysseek', 'system', 'syswrite', 'telldir', 'tell', 'tied', 'tie', 'time', 'times', 'truncate',
    151 	'ucfirst', 'uc', 'umask', 'undef', 'unlink', 'unpack', 'unshift', 'untie', 'utime', 'values',
    152 	'vec', 'wait', 'waitpid', 'wantarray', 'warn', 'write'
    153 })
    154 
    155 lex:set_word_list(lexer.CONSTANT_BUILTIN, {
    156 	'ARGV', 'ARGVOUT', 'DATA', 'ENV', 'INC', 'SIG', 'STDERR', 'STDIN', 'STDOUT'
    157 })
    158 
    159 lexer.property['scintillua.comment'] = '#'
    160 
    161 return lex