vis

a vi-like editor based on Plan 9's structural regular expressions

git clone https://9o.is/git/vis.git

ruby.lua

(5277B)


      1 -- Copyright 2006-2025 Mitchell. See LICENSE.
      2 -- Ruby LPeg lexer.
      3 
      4 local lexer = lexer
      5 local P, S = lpeg.P, lpeg.S
      6 
      7 local lex = lexer.new(...)
      8 
      9 -- Keywords.
     10 lex:add_rule('keyword', lex:tag(lexer.KEYWORD, lex:word_match(lexer.KEYWORD)))
     11 
     12 -- Functions.
     13 local builtin_func = lex:tag(lexer.FUNCTION_BUILTIN, lex:word_match(lexer.FUNCTION_BUILTIN))
     14 lex:add_rule('function', -lpeg.B('.') * builtin_func * -S('.:|'))
     15 
     16 -- Identifiers.
     17 local word_char = lexer.alnum + S('_!?')
     18 local word = (lexer.alpha + '_') * word_char^0
     19 lex:add_rule('identifier', lex:tag(lexer.IDENTIFIER, word))
     20 
     21 -- Comments.
     22 local line_comment = lexer.to_eol('#', true)
     23 local block_comment = lexer.range(lexer.starts_line('=begin'), lexer.starts_line('=end'))
     24 lex:add_rule('comment', lex:tag(lexer.COMMENT, block_comment + line_comment))
     25 
     26 -- Strings.
     27 local delimiter_matches = {['('] = ')', ['['] = ']', ['{'] = '}'}
     28 local literal_delimited = P(function(input, index)
     29 	local delimiter = input:sub(index, index)
     30 	if not delimiter:find('[%w\r\n\f\t ]') then -- only non alpha-numerics
     31 		local match_pos, patt
     32 		if delimiter_matches[delimiter] then
     33 			-- Handle nested delimiter/matches in strings.
     34 			local s, e = delimiter, delimiter_matches[delimiter]
     35 			patt = lexer.range(s, e, false, true, true)
     36 		else
     37 			patt = lexer.range(delimiter)
     38 		end
     39 		match_pos = lpeg.match(patt, input, index)
     40 		return match_pos or #input + 1
     41 	end
     42 end)
     43 
     44 local cmd_str = lexer.range('`')
     45 local lit_cmd = '%x' * literal_delimited
     46 local lit_array = '%w' * literal_delimited
     47 local sq_str = lexer.range("'")
     48 local dq_str = lexer.range('"')
     49 local lit_str = '%' * S('qQ')^-1 * literal_delimited
     50 local heredoc = '<<' * P(function(input, index)
     51 	local s, e, indented, _, delimiter = input:find('([%-~]?)(["`]?)([%a_][%w_]*)%2[\n\r\f;]+', index)
     52 	if s == index and delimiter then
     53 		local end_heredoc = (#indented > 0 and '[\n\r\f]+ *' or '[\n\r\f]+')
     54 		s, e = input:find(end_heredoc .. delimiter, e)
     55 		return e and e + 1 or #input + 1
     56 	end
     57 end)
     58 local string = lex:tag(lexer.STRING, (sq_str + dq_str + lit_str + heredoc + cmd_str + lit_cmd +
     59 	lit_array) * S('f')^-1)
     60 -- TODO: regex_str fails with `obj.method /patt/` syntax.
     61 local regex_str = lexer.after_set('!%^&*([{-=+|:;,?<>~', lexer.range('/', true) * S('iomx')^0)
     62 local lit_regex = '%r' * literal_delimited * S('iomx')^0
     63 local regex = lex:tag(lexer.REGEX, regex_str + lit_regex)
     64 lex:add_rule('string', string + regex)
     65 
     66 -- Numbers.
     67 local numeric_literal = '?' * (lexer.any - lexer.space) * -word_char -- TODO: meta, control, etc.
     68 lex:add_rule('number', lex:tag(lexer.NUMBER, lexer.number_('_') * S('ri')^-1 + numeric_literal))
     69 
     70 -- Variables.
     71 local global_var = '$' *
     72 	(word + S('!@L+`\'=~/\\,.;<>_*"$?:') + lexer.digit + '-' * S('0FadiIKlpvw'))
     73 local class_var = '@@' * word
     74 local inst_var = '@' * word
     75 lex:add_rule('variable', lex:tag(lexer.VARIABLE, global_var + class_var + inst_var))
     76 
     77 -- Symbols.
     78 lex:add_rule('symbol', lex:tag(lexer.STRING .. '.symbol', ':' * P(function(input, index)
     79 	if input:sub(index - 2, index - 2) ~= ':' then return true end
     80 end) * (word_char^1 + sq_str + dq_str)))
     81 
     82 -- Operators.
     83 lex:add_rule('operator', lex:tag(lexer.OPERATOR, S('!%^&*()[]{}-=+/|:;.,?<>~')))
     84 
     85 -- Fold points.
     86 local function disambiguate(text, pos, line, s)
     87 	return line:sub(1, s - 1):match('^%s*$') and not text:sub(1, pos - 1):match('\\[ \t]*\r?\n$') and
     88 		1 or 0
     89 end
     90 lex:add_fold_point(lexer.KEYWORD, 'begin', 'end')
     91 lex:add_fold_point(lexer.KEYWORD, 'class', 'end')
     92 lex:add_fold_point(lexer.KEYWORD, 'def', 'end')
     93 lex:add_fold_point(lexer.KEYWORD, 'do', 'end')
     94 lex:add_fold_point(lexer.KEYWORD, 'for', 'end')
     95 lex:add_fold_point(lexer.KEYWORD, 'module', 'end')
     96 lex:add_fold_point(lexer.KEYWORD, 'case', 'end')
     97 lex:add_fold_point(lexer.KEYWORD, 'if', disambiguate)
     98 lex:add_fold_point(lexer.KEYWORD, 'while', disambiguate)
     99 lex:add_fold_point(lexer.KEYWORD, 'unless', disambiguate)
    100 lex:add_fold_point(lexer.KEYWORD, 'until', disambiguate)
    101 lex:add_fold_point(lexer.OPERATOR, '(', ')')
    102 lex:add_fold_point(lexer.OPERATOR, '[', ']')
    103 lex:add_fold_point(lexer.OPERATOR, '{', '}')
    104 lex:add_fold_point(lexer.COMMENT, '=begin', '=end')
    105 
    106 -- Word lists.
    107 lex:set_word_list(lexer.KEYWORD, {
    108 	'BEGIN', 'END', 'alias', 'and', 'begin', 'break', 'case', 'class', 'def', 'defined?', 'do',
    109 	'else', 'elsif', 'end', 'ensure', 'false', 'for', 'if', 'in', 'module', 'next', 'nil', 'not',
    110 	'or', 'redo', 'rescue', 'retry', 'return', 'self', 'super', 'then', 'true', 'undef', 'unless',
    111 	'until', 'when', 'while', 'yield', '__FILE__', '__LINE__'
    112 })
    113 
    114 lex:set_word_list(lexer.FUNCTION_BUILTIN, {
    115 	'at_exit', 'autoload', 'binding', 'caller', 'catch', 'chop', 'chop!', 'chomp', 'chomp!', 'eval',
    116 	'exec', 'exit', 'exit!', 'extend', 'fail', 'fork', 'format', 'gets', 'global_variables', 'gsub',
    117 	'gsub!', 'include', 'iterator?', 'lambda', 'load', 'local_variables', 'loop', 'module_function',
    118 	'open', 'p', 'print', 'printf', 'proc', 'putc', 'puts', 'raise', 'rand', 'readline', 'readlines',
    119 	'require', 'require_relative', 'select', 'sleep', 'split', 'sprintf', 'srand', 'sub', 'sub!',
    120 	'syscall', 'system', 'test', 'trace_var', 'trap', 'untrace_var'
    121 })
    122 
    123 lexer.property['scintillua.comment'] = '#'
    124 lexer.property['scintillua.word.chars'] =
    125 	'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_?!'
    126 
    127 return lex