vis

a vi-like editor based on Plan 9's structural regular expressions

git clone https://9o.is/git/vis.git

yaml.lua

(4277B)


      1 -- Copyright 2006-2025 Mitchell. See LICENSE.
      2 -- YAML LPeg lexer.
      3 -- It does not keep track of indentation perfectly.
      4 
      5 local lexer = lexer
      6 local word_match = lexer.word_match
      7 local P, S, B = lpeg.P, lpeg.S, lpeg.B
      8 
      9 local lex = lexer.new(..., {fold_by_indentation = true})
     10 
     11 -- Distinguish between horizontal and vertical space so indenting tabs can be marked as errors.
     12 local tab_indent = lex:tag(lexer.ERROR .. '.indent', lexer.starts_line('\t', true))
     13 lex:modify_rule('whitespace', tab_indent + lex:tag(lexer.WHITESPACE, S(' \r\n')^1 + P('\t')^1))
     14 
     15 -- Document boundaries.
     16 lex:add_rule('doc_bounds', lex:tag(lexer.OPERATOR, lexer.starts_line(P('---') + '...')))
     17 
     18 -- Keys.
     19 local word = (lexer.alnum + '-')^1
     20 lex:add_rule('key', -P('- ') * lex:tag(lexer.STRING, word * (S(' \t_')^1 * word^-1)^0) *
     21 	#P(':' * lexer.space))
     22 
     23 -- Collections.
     24 lex:add_rule('collection', lex:tag(lexer.OPERATOR,
     25 	lexer.after_set('?-:\n', S('?-') * #P(' '), ' \t') + ':' * #P(lexer.space) + S('[]{}') + ',' *
     26 		#P(' ')))
     27 
     28 -- Alias indicators.
     29 local anchor = lex:tag(lexer.OPERATOR, '&') * lex:tag(lexer.LABEL, word)
     30 local alias = lex:tag(lexer.OPERATOR, '*') * lex:tag(lexer.LABEL, word)
     31 lex:add_rule('alias', anchor + alias)
     32 
     33 -- Tags.
     34 local explicit_tag = '!!' * word_match{
     35 	'map', 'omap', 'pairs', 'set', 'seq', -- collection
     36 	'binary', 'bool', 'float', 'int', 'merge', 'null', 'str', 'timestamp', 'value', 'yaml' -- scalar
     37 }
     38 local verbatim_tag = '!' * lexer.range('<', '>', true)
     39 local short_tag = '!' * word * ('!' * (1 - lexer.space)^1)^-1
     40 lex:add_rule('tag', lex:tag(lexer.TYPE, explicit_tag + verbatim_tag + short_tag))
     41 
     42 -- Comments.
     43 lex:add_rule('comment', lex:tag(lexer.COMMENT, lexer.to_eol('#')))
     44 
     45 -- Reserved.
     46 lex:add_rule('reserved',
     47 	B(S(':,') * ' ') * lex:tag(lexer.ERROR, S('@`') + lexer.starts_line(S('@`'))))
     48 
     49 -- Constants.
     50 local scalar_end = #(S(' \t')^0 * lexer.newline + S(',]}') + -1)
     51 lex:add_rule('constant',
     52 	lex:tag(lexer.CONSTANT_BUILTIN, word_match('null true false', true)) * scalar_end)
     53 
     54 -- Strings.
     55 local sq_str = lexer.range("'")
     56 local dq_str = lexer.range('"')
     57 lex:add_rule('string', lex:tag(lexer.STRING, sq_str + dq_str) * (scalar_end + #P(':' * lexer.space)))
     58 
     59 -- Timestamps.
     60 local year = lexer.digit * lexer.digit * lexer.digit * lexer.digit
     61 local month = lexer.digit * lexer.digit^-1
     62 local day = lexer.digit * lexer.digit^-1
     63 local date = year * '-' * month * '-' * day
     64 local hours = lexer.digit * lexer.digit^-1
     65 local minutes = lexer.digit * lexer.digit
     66 local seconds = lexer.digit * lexer.digit
     67 local fraction = '.' * lexer.digit^0
     68 local time = hours * ':' * minutes * ':' * seconds * fraction^-1
     69 local zone = 'Z' + S(' \t')^-1 * S('-+') * hours * (':' * minutes)^-1
     70 lex:add_rule('timestamp', lex:tag(lexer.NUMBER .. '.timestamp',
     71 	date * (S('tT \t') * time * zone^-1)^-1) * scalar_end)
     72 
     73 -- Numbers.
     74 local special_num = S('+-')^-1 * '.' * word_match('inf nan', true)
     75 local number = lexer.number + special_num
     76 lex:add_rule('number', (B(lexer.alnum) * lex:tag(lexer.DEFAULT, number) +
     77 	lex:tag(lexer.NUMBER, number)) * scalar_end)
     78 
     79 -- Scalars.
     80 local block_indicator = S('|>') * (S('-+') * lexer.digit^-1 + lexer.digit * S('-+')^-1)^-1
     81 local block = lpeg.Cmt(lpeg.C(block_indicator * lexer.newline), function(input, index, indicator)
     82 	local indent = lexer.indent_amount[lexer.line_from_position(index - #indicator)]
     83 	for s, i, j in input:gmatch('()\n()[ \t]*()[^ \t\r\n]', index) do -- ignore blank lines
     84 		if s >= index then -- compatibility for Lua < 5.4, which doesn't have init for string.gmatch()
     85 			if j - i <= indent then return s end
     86 		end
     87 	end
     88 	return #input + 1
     89 end)
     90 local seq = B('- ') * lexer.nonnewline^1
     91 local csv = B(', ') * (lexer.nonnewline - S(',]}'))^1
     92 local stop_chars, LF = {[string.byte('{')] = true, [string.byte('\n')] = true}, string.byte('\n')
     93 local map = B(': ') * lexer.nonnewline * P(function(input, index)
     94 	local pos = index
     95 	while pos > 1 and not stop_chars[input:byte(pos)] do pos = pos - 1 end
     96 	local s = input:find(input:byte(pos) ~= LF and '[\n,}]' or '\n', index)
     97 	return s or #input + 1
     98 end)
     99 lex:add_rule('scalar', lex:tag(lexer.DEFAULT, block + seq + csv + map))
    100 
    101 -- Directives
    102 lex:add_rule('directive', lex:tag(lexer.PREPROCESSOR, lexer.starts_line(lexer.to_eol('%'))))
    103 
    104 lexer.property['scintillua.comment'] = '#'
    105 
    106 return lex