vis

a vi-like editor based on Plan 9's structural regular expressions

git clone https://9o.is/git/vis.git

commit b37a0d7e6453dbd9f4fdc6b221d77104311c0350
parent 4cdab9e49fef0f176082d1edb4e060b5bd170d34
Author: stutonk <xealblade@gmail.com>
Date:   Sun, 19 Mar 2017 01:13:35 -0400

Fix errors and add ANS Forth 2012 keywords

Added all ANS Forth 2012 keywords as defined at
http://lars.nocrew.org/forth2012/core.html and removed keywords
that were not part of the standard. This necessitated rewriting
most of the Strings rules as well as removing some rules not
consistent with the standard. Only the s\" form should allow
escaping. The list of characters which may appear as part of a
keyword has also been expanded where appropriate.

Because '.' is a keyword as well as the first chatacter in a string
pattern, strings must now be given parsing precedence over keywords
to ensure proper highlighting.

A few errors were also fixed such as moving the true (which should
make keywords case-insensitive) within the word_match function's
closing paren. Parens have been removed from the operator list and
moved to their correct place as the delimiters for block comments.

Diffstat:
Mlua/lexers/forth.lua | 48+++++++++++++++++++++++++++++++-----------------
1 file changed, 31 insertions(+), 17 deletions(-)

diff --git a/lua/lexers/forth.lua b/lua/lexers/forth.lua @@ -12,42 +12,56 @@ local ws = token(l.WHITESPACE, l.space^1) -- Comments. local line_comment = S('|\\') * l.nonnewline^0 -local block_comment = '(*' * (l.any - '*)')^0 * P('*)')^-1 +local block_comment = '(' * (l.any - ')')^0 * P(')')^-1 local comment = token(l.COMMENT, line_comment + block_comment) -- Strings. +local c_str = 'c' * l.delimited_range('"', true, true) local s_str = 's' * l.delimited_range('"', true, true) +local s_bs_str = 's\\' * l.delimited_range('"', true, false) local dot_str = '.' * l.delimited_range('"', true, true) -local f_str = 'f' * l.delimited_range('"', true, true) -local dq_str = l.delimited_range('"', true, true) -local string = token(l.STRING, s_str + dot_str + f_str + dq_str) +local dot_paren_str = '.' * l.delimited_range('()', true, true, false) +local abort_str = 'abort' * l.delimited_range('"', true, true) +local string = token( + l.STRING, + c_str + s_str + s_bs_str + dot_str + dot_paren_str + abort_str +) -- Numbers. local number = token(l.NUMBER, P('-')^-1 * l.digit^1 * (S('./') * l.digit^1)^-1) -- Keywords. local keyword = token(l.KEYWORD, word_match({ - 'swap', 'drop', 'dup', 'nip', 'over', 'rot', '-rot', '2dup', '2drop', '2over', - '2swap', '>r', 'r>', - 'and', 'or', 'xor', '>>', '<<', 'not', 'negate', 'mod', '/mod', '1+', '1-', - 'base', 'hex', 'decimal', 'binary', 'octal', - '@', '!', 'c@', 'c!', '+!', 'cell+', 'cells', 'char+', 'chars', - 'create', 'does>', 'variable', 'variable,', 'literal', 'last', '1,', '2,', - '3,', ',', 'here', 'allot', 'parse', 'find', 'compile', - -- Operators. - 'if', '=if', '<if', '>if', '<>if', 'then', 'repeat', 'until', 'forth', 'macro' -}, '2><1-@!+3,=')) + '#>', '#s', '*/', '*/mod', '+loop', ',', '.', '.r', '/mod', '0<', '0<>', + '0>', '0=', '1+', '1-', '2!', '2*', '2/', '2>r', '2@', '2drop', '2dup', + '2over', '2r>', '2r@', '2swap', ':noname', '<#', '<>', '>body', '>in', + '>number', '>r', '?do','?dup', '@', 'abort', 'abs', 'accept', 'action-of', + 'again', 'align', 'aligned', 'allot', 'and', 'base', 'begin', 'bl', + 'buffer:', 'c!', 'c,', 'c@', 'case', 'cell+', 'cells', 'char', 'char+', + 'chars', 'compile,', 'constant', 'count', 'cr', 'create', 'decimal', 'defer', + 'defer!', 'defer@', 'depth', 'do', 'does>', 'drop', 'dup', 'else', 'emit', + 'endcase', 'endof', 'environment?', 'erase', 'evaluate', 'execute', 'exit', + 'false', 'fill', 'find', 'fm/mod', 'here', 'hex', 'hold', 'holds', 'i', 'if', + 'immediate', 'invert', 'is', 'j', 'key', 'leave', 'literal', 'loop', + 'lshift', 'm*', 'marker', 'max', 'min', 'mod', 'move', 'negate', 'nip', 'of', + 'or', 'over', 'pad', 'parse', 'parse-name', 'pick', 'postpone', 'quit', 'r>', + 'r@', 'recurse', 'refill', 'restore-input', 'roll', 'rot', 'rshift', 's>d', + 'save-input', 'sign', 'sm/rem', 'source', 'source-id', 'space', 'spaces', + 'state', 'swap', 'to', 'then', 'true', 'tuck', 'type', 'u.', 'u.r', 'u>', + 'u<', 'um*', 'um/mod', 'unloop', 'until', 'unused', 'value', 'variable', + 'while', 'within', 'word', 'xor', '[\']', '[char]', '[compile]' +}, '><-@!?+,=[].\'', true)) -- Identifiers. -local identifier = token(l.IDENTIFIER, (l.alnum + S('+-*=<>.?/\'%,_$'))^1) +local identifier = token(l.IDENTIFIER, (l.alnum + S('+-*=<>.?/\'%,_$#'))^1) -- Operators. -local operator = token(l.OPERATOR, S(':;<>+*-/()[]')) +local operator = token(l.OPERATOR, S(':;<>+*-/[]#')) M._rules = { {'whitespace', ws}, - {'keyword', keyword}, {'string', string}, + {'keyword', keyword}, {'identifier', identifier}, {'comment', comment}, {'number', number},