vis

a vi-like editor based on Plan 9's structural regular expressions

git clone https://9o.is/git/vis.git

sml.lua

(3262B)


      1 -- Copyright 2017-2025 Murray Calavera. See LICENSE.
      2 -- Standard ML LPeg lexer.
      3 
      4 local lexer = require('lexer')
      5 local token, word_match = lexer.token, lexer.word_match
      6 local P, S = lpeg.P, lpeg.S
      7 
      8 local lex = lexer.new('sml')
      9 
     10 -- Whitespace.
     11 local ws = token(lexer.WHITESPACE, lexer.space^1)
     12 lex:add_rule('whitespace', ws)
     13 
     14 -- Structures.
     15 local id = (lexer.alnum + "'" + '_')^0
     16 local aid = lexer.alpha * id
     17 local longid = (aid * '.')^0 * aid
     18 local struct_dec = token(lexer.KEYWORD, 'structure') * ws * token(lexer.CLASS, aid) * ws *
     19 	token(lexer.OPERATOR, '=') * ws
     20 lex:add_rule('struct_new', struct_dec * token(lexer.KEYWORD, 'struct'))
     21 lex:add_rule('struct_alias', struct_dec * token(lexer.CLASS, longid))
     22 lex:add_rule('structure', token(lexer.CLASS, aid * '.'))
     23 
     24 -- Open.
     25 lex:add_rule('open', token(lexer.KEYWORD, word_match('open structure functor')) * ws *
     26 	token(lexer.CLASS, longid))
     27 
     28 -- Keywords.
     29 lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
     30 	'abstype', 'and', 'andalso', 'as', 'case', 'do', 'datatype', 'else', 'end', 'exception', 'fn',
     31 	'fun', 'handle', 'if', 'in', 'infix', 'infixr', 'let', 'local', 'nonfix', 'of', 'op', 'orelse',
     32 	'raise', 'rec', 'then', 'type', 'val', 'with', 'withtype', 'while', --
     33 	'eqtype', 'functor', 'include', 'sharing', 'sig', 'signature', 'struct', 'structure'
     34 }))
     35 
     36 -- Types.
     37 lex:add_rule('type', token(lexer.TYPE, word_match{
     38 	'int', 'real', 'word', 'bool', 'char', 'string', 'unit', 'array', 'exn', 'list', 'option',
     39 	'order', 'ref', 'substring', 'vector'
     40 }))
     41 
     42 -- Functions.
     43 -- `real`, `vector` and `substring` are a problem.
     44 lex:add_rule('function', token(lexer.FUNCTION, word_match{
     45 	'app', 'before', 'ceil', 'chr', 'concat', 'exnMessage', 'exnName', 'explode', 'floor', 'foldl',
     46 	'foldr', 'getOpt', 'hd', 'ignore', 'implode', 'isSome', 'length', 'map', 'not', 'null', 'ord',
     47 	'print', 'real', 'rev', 'round', 'size', 'str', 'substring', 'tl', 'trunc', 'valOf', 'vector',
     48 	'o', 'abs', 'mod', 'div'
     49 }))
     50 
     51 -- Constants.
     52 lex:add_rule('constant', token(lexer.CONSTANT, word_match('true false nil') + lexer.upper * id))
     53 
     54 -- Indentifiers (non-symbolic).
     55 lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.lower * id))
     56 
     57 -- Strings.
     58 lex:add_rule('string', token(lexer.STRING, P('#')^-1 * lexer.range('"', true)))
     59 
     60 -- Comments.
     61 local line_comment = lexer.to_eol('(*)')
     62 local block_comment = lexer.range('(*', '*)', false, false, true)
     63 lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
     64 
     65 -- Numbers.
     66 local function num(digit) return digit * (digit^0 * '_')^0 * digit^1 + digit end
     67 local int = num(lexer.digit)
     68 local frac = '.' * int
     69 local minus = lpeg.P('~')^-1
     70 local exp = lpeg.S('eE') * minus * int
     71 local real = int * frac^-1 * exp + int * frac * exp^-1
     72 local hex = num(lexer.xdigit)
     73 local bin = num(lpeg.S('01'))
     74 -- LuaFormatter off
     75 lex:add_rule('number', token(lexer.NUMBER,
     76   '0w' * int +
     77   (P('0wx') + '0xw') * hex +
     78   (P('0wb') + '0bw') * bin +
     79   minus * '0x' * hex +
     80   minus * '0b' * bin +
     81   minus * real +
     82   minus * int))
     83 -- LuaFormatter on
     84 
     85 -- Type variables.
     86 lex:add_rule('typevar', token(lexer.VARIABLE, "'" * id))
     87 
     88 -- Operators.
     89 lex:add_rule('operator', token(lexer.OPERATOR, S('!*/+-^:@=<>()[]{},;._|#%&$?~`\\')))
     90 
     91 lexer.property['scintillua.comment'] = '(*)'
     92 
     93 return lex