vis

a vi-like editor based on Plan 9's structural regular expressions

git clone https://9o.is/git/vis.git

xml.lua

(2696B)


      1 -- Copyright 2006-2025 Mitchell. See LICENSE.
      2 -- XML LPeg lexer.
      3 
      4 local lexer = lexer
      5 local P, S = lpeg.P, lpeg.S
      6 
      7 local lex = lexer.new(...)
      8 
      9 -- Comments and CDATA.
     10 lex:add_rule('comment', lex:tag(lexer.COMMENT, lexer.range('<!--', '-->')))
     11 lex:add_rule('cdata', lex:tag('cdata', lexer.range('<![CDATA[', ']]>')))
     12 
     13 -- Doctype.
     14 local ws = lex:get_rule('whitespace')
     15 local identifier = (lexer.alpha + S('_-')) * (lexer.alnum + S('_-'))^0
     16 local doctype = lex:tag(lexer.TAG .. '.doctype', '<!DOCTYPE') * ws *
     17 	lex:tag(lexer.TAG .. '.doctype', identifier) * (ws * identifier)^-1 * (1 - P('>'))^0 *
     18 	lex:tag(lexer.TAG .. '.doctype', '>')
     19 lex:add_rule('doctype', doctype)
     20 
     21 -- Processing instructions.
     22 lex:add_rule('proc_insn', lex:tag(lexer.TAG .. '.pi', '<?' * identifier + '?>'))
     23 
     24 -- Tags.
     25 local namespace = lex:tag(lexer.OPERATOR, ':') * lex:tag(lexer.LABEL, identifier)
     26 lex:add_rule('element', lex:tag(lexer.TAG, '<' * P('/')^-1 * identifier) * namespace^-1)
     27 
     28 -- Closing tags.
     29 lex:add_rule('close_tag', lex:tag(lexer.TAG, P('/')^-1 * '>'))
     30 
     31 -- Equals.
     32 -- TODO: performance is terrible on large files.
     33 local in_tag = P(function(input, index)
     34 	local before = input:sub(1, index - 1)
     35 	local s, e = before:find('<[^>]-$'), before:find('>[^<]-$')
     36 	if s and e then return s > e end
     37 	if s then return true end
     38 	return input:find('^[^<]->', index) ~= nil
     39 end)
     40 
     41 local equals = lex:tag(lexer.OPERATOR, '=') -- * in_tag
     42 -- lex:add_rule('equal', equals)
     43 
     44 -- Attributes.
     45 local attribute_eq = lex:tag(lexer.ATTRIBUTE, identifier) * namespace^-1 * ws^-1 * equals
     46 lex:add_rule('attribute', attribute_eq)
     47 
     48 -- Strings.
     49 local sq_str = lexer.range("'", false, false)
     50 local dq_str = lexer.range('"', false, false)
     51 lex:add_rule('string', lex:tag(lexer.STRING, lexer.after_set('=', sq_str + dq_str)))
     52 
     53 -- Numbers.
     54 local number = lex:tag(lexer.NUMBER, lexer.dec_num * P('%')^-1)
     55 lex:add_rule('number', lexer.after_set('=', number)) -- *in_tag)
     56 
     57 -- Entities.
     58 local predefined = lex:tag(lexer.CONSTANT_BUILTIN .. '.entity',
     59 	'&' * lexer.word_match('lt gt amp apos quot') * ';')
     60 local general = lex:tag(lexer.CONSTANT .. '.entity', '&' * identifier * ';')
     61 lex:add_rule('entity', predefined + general)
     62 
     63 -- Fold Points.
     64 local function disambiguate_lt(text, pos, line, s) return not line:find('^</', s) and 1 or -1 end
     65 lex:add_fold_point(lexer.TAG, '<', disambiguate_lt)
     66 lex:add_fold_point(lexer.TAG, '/>', -1)
     67 lex:add_fold_point(lexer.COMMENT, '<!--', '-->')
     68 lex:add_fold_point('cdata', '<![CDATA[', ']]>')
     69 
     70 lexer.property['scintillua.comment'] = '<!--|-->'
     71 lexer.property['scintillua.angle.braces'] = '1'
     72 lexer.property['scintillua.word.chars'] =
     73 	'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-'
     74 
     75 return lex