vis

a vi-like editor based on Plan 9's structural regular expressions

git clone https://9o.is/git/vis.git

cpp.lua

(13194B)


      1 -- Copyright 2006-2025 Mitchell. See LICENSE.
      2 -- C++ LPeg lexer.
      3 
      4 local lexer = lexer
      5 local P, S, B = lpeg.P, lpeg.S, lpeg.B
      6 
      7 local lex = lexer.new(...)
      8 
      9 -- Keywords.
     10 lex:add_rule('keyword', lex:tag(lexer.KEYWORD, lex:word_match(lexer.KEYWORD)))
     11 
     12 -- Types.
     13 local basic_type = lex:tag(lexer.TYPE, lex:word_match(lexer.TYPE))
     14 local stl_type = lex:tag(lexer.TYPE .. '.stl', 'std::' * lex:word_match(lexer.TYPE .. '.stl'))
     15 lex:add_rule('type', basic_type + stl_type * -P('::'))
     16 
     17 -- Functions.
     18 local non_member = -(B('.') + B('->') + B('::'))
     19 local builtin_func = lex:tag(lexer.FUNCTION_BUILTIN,
     20 	P('std::')^-1 * lex:word_match(lexer.FUNCTION_BUILTIN))
     21 local stl_func = lex:tag(lexer.FUNCTION_BUILTIN .. '.stl',
     22 	'std::' * lex:word_match(lexer.FUNCTION_BUILTIN .. '.stl'))
     23 local func = lex:tag(lexer.FUNCTION, lexer.word)
     24 local method = (B('.') + B('->')) * lex:tag(lexer.FUNCTION_METHOD, lexer.word)
     25 lex:add_rule('function',
     26 	(non_member * (stl_func + builtin_func) + method + func) * #(lexer.space^0 * '('))
     27 
     28 -- Constants.
     29 local const =
     30 	lex:tag(lexer.CONSTANT_BUILTIN, P('std::')^-1 * lex:word_match(lexer.CONSTANT_BUILTIN))
     31 local stl_const = lex:tag(lexer.CONSTANT_BUILTIN .. '.stl',
     32 	'std::' * lex:word_match(lexer.CONSTANT_BUILTIN .. '.stl'))
     33 lex:add_rule('constants', stl_const + const)
     34 
     35 -- Strings.
     36 local sq_str = lexer.range("'", true)
     37 local dq_str = lexer.range('"', true)
     38 lex:add_rule('string', lex:tag(lexer.STRING, ('u8' + S('LuU'))^-1 * (sq_str + dq_str)))
     39 
     40 -- Identifiers.
     41 lex:add_rule('identifier', lex:tag(lexer.IDENTIFIER, lexer.word))
     42 
     43 -- Comments.
     44 local line_comment = lexer.to_eol('//', true)
     45 local block_comment = lexer.range('/*', '*/')
     46 lex:add_rule('comment', lex:tag(lexer.COMMENT, line_comment + block_comment))
     47 
     48 -- Numbers.
     49 lex:add_rule('number', lex:tag(lexer.NUMBER, lexer.number_("'")))
     50 
     51 -- Preprocessor.
     52 local include = lex:tag(lexer.PREPROCESSOR, '#' * S('\t ')^0 * 'include') *
     53 	(lex:get_rule('whitespace') * lex:tag(lexer.STRING, lexer.range('<', '>', true)))^-1
     54 local preproc = lex:tag(lexer.PREPROCESSOR, '#' * S('\t ')^0 * lex:word_match(lexer.PREPROCESSOR))
     55 lex:add_rule('preprocessor', include + preproc)
     56 
     57 -- Attributes.
     58 lex:add_rule('attribute', lex:tag(lexer.ATTRIBUTE, '[[' * lex:word_match(lexer.ATTRIBUTE) * ']]'))
     59 
     60 -- Operators.
     61 lex:add_rule('operator', lex:tag(lexer.OPERATOR, S('+-/*%<>!=^&|?~:;,.()[]{}')))
     62 
     63 -- Fold points.
     64 lex:add_fold_point(lexer.PREPROCESSOR, 'if', 'endif')
     65 lex:add_fold_point(lexer.PREPROCESSOR, 'ifdef', 'endif')
     66 lex:add_fold_point(lexer.PREPROCESSOR, 'ifndef', 'endif')
     67 lex:add_fold_point(lexer.OPERATOR, '{', '}')
     68 lex:add_fold_point(lexer.COMMENT, '/*', '*/')
     69 
     70 -- Word lists.
     71 lex:set_word_list(lexer.KEYWORD, {
     72 	'asm', 'auto', 'break', 'case', 'catch', 'class', 'const', 'const_cast', 'continue', 'default',
     73 	'delete', 'do', 'dynamic_cast', 'else', 'explicit', 'export', 'extern', 'false', 'for', 'friend',
     74 	'goto', 'if', 'inline', 'mutable', 'namespace', 'new', 'operator', 'private', 'protected',
     75 	'public', 'register', 'reinterpret_cast', 'return', 'sizeof', 'static', 'static_cast', 'switch',
     76 	'template', 'this', 'throw', 'true', 'try', 'typedef', 'typeid', 'typename', 'using', 'virtual',
     77 	'volatile', 'while',
     78 	-- Operators.
     79 	'and', 'and_eq', 'bitand', 'bitor', 'compl', 'not', 'not_eq', 'or', 'or_eq', 'xor', 'xor_eq',
     80 	-- C++11.
     81 	'alignas', 'alignof', 'constexpr', 'decltype', 'final', 'noexcept', 'nullptr', 'override',
     82 	'static_assert', 'thread_local', --
     83 	'consteval', 'constinit', 'co_await', 'co_return', 'co_yield', 'requires' -- C++20
     84 })
     85 
     86 lex:set_word_list(lexer.TYPE, {
     87 	'bool', 'char', 'double', 'enum', 'float', 'int', 'long', 'short', 'signed', 'struct', 'union',
     88 	'unsigned', 'void', 'wchar_t', --
     89 	'char16_t', 'char32_t', -- C++11
     90 	'char8_t', -- C++20
     91 	-- <cstddef>
     92 	'size_t', 'ptrdiff_t', 'max_align_t', --
     93 	'byte', -- C++17
     94 	-- <cstdint>
     95 	'int8_t', 'int16_t', 'int32_t', 'int64_t', 'int_fast8_t', 'int_fast16_t', 'int_fast32_t',
     96 	'int_fast64_t', 'int_least8_t', 'int_least16_t', 'int_least32_t', 'int_least64_t', 'intmax_t',
     97 	'intptr_t', 'uint8_t', 'uint16_t', 'uint32_t', 'uint64_t', 'uint_fast8_t', 'uint_fast16_t',
     98 	'uint_fast32_t', 'uint_fast64_t', 'uint_least8_t', 'uint_least16_t', 'uint_least32_t',
     99 	'uint_least64_t', 'uintmax_t', 'uintptr_t'
    100 })
    101 
    102 lex:set_word_list(lexer.TYPE .. '.stl', {
    103 	'any', 'bad_any_cast', -- <any> C++17
    104 	'array', -- <array> C++11
    105 	'atomic', -- <atomic> C++11
    106 	'barrier', -- <barrier> C++20
    107 	'bitset', -- <bitset>
    108 	-- <concepts> C++20
    109 	'same_as', 'derived_from', 'convertible_to', 'common_reference_with', 'common_with', 'integral',
    110 	'signed_integral', 'unsigned_integral', 'floating_point', 'assignable_from', 'swappable',
    111 	'swappable_with', 'destructible', 'constructible_from', 'default_initializable',
    112 	'move_constructible', 'copy_constructible', 'equality_comparable', 'equality_comparable_with',
    113 	'movable', 'copyable', 'semiregular', 'regular', 'invocable', 'regular_invocable', 'predicate',
    114 	'relation', 'equivalence_relation', 'strict_weak_order', --
    115 	'complex', -- <complex>
    116 	'deque', -- <deque>
    117 	'exception', 'bad_exception', -- <exception>
    118 	'forward_list', -- <forward_list> C++11
    119 	'function', 'hash', -- <functional> C++11
    120 	-- <future> C++11
    121 	'promise', 'packaged_task', 'future', 'shared_future', 'launch', 'future_status', 'future_error',
    122 	'future_errc', --
    123 	'initializer_list', -- <initializer_list>
    124 	'istream', 'iostream', -- <istream>
    125 	-- <iterator>
    126 	'reverse_iterator', 'back_insert_iterator', 'front_insert_iterator', 'insert_iterator',
    127 	'istream_iterator', 'ostream_iterator', 'istreambuf_iterator', 'ostreambuf_iterator', --
    128 	'move_iterator', -- C++11
    129 	'latch', -- <latch> C++20
    130 	'list', -- <list>
    131 	-- <map>
    132 	'map', 'multimap', --
    133 	'unordered_set', 'unordered_map', 'unordered_multiset', 'unordered_multimap', -- C++11
    134 	'unique_ptr', 'shared_ptr', 'weak_ptr', -- <memory> C++11
    135 	-- <mutex> C++11
    136 	'mutex', 'timed_mutex', 'recursive_mutex', 'recursive_timed_mutex', 'lock_guard', 'unique_lock', --
    137 	'scoped_lock', -- C++17
    138 	'optional', 'bad_optional_access', -- <optional> C++17
    139 	'ostream', -- <ostream>
    140 	'queue', 'priority_queue', -- <queue>
    141 	-- <random> C++11
    142 	'linear_congruential_engine', 'mersenne_twister_engine', 'subtract_with_carry_engine',
    143 	'discard_block_engine', 'independent_bits_engine', 'shuffle_order_engine', 'random_device',
    144 	'uniform_int_distribution', 'uniform_real_distribution', 'bernoulli_distribution',
    145 	'binomial_distribution', 'negative_binomial_distribution', 'geometric_distribution',
    146 	'poisson_distribution', 'exponential_distribution', 'gamma_distribution', 'weibull_distribution',
    147 	'extreme_value_distribution', 'normal_distribution', 'lognormal_distribution',
    148 	'chi_squared_distribution', 'cauchy_distribution', 'fisher_f_distribution',
    149 	'student_t_distribution', 'discrete_distibution', 'piecewise_constant_distribution',
    150 	'piecewise_linear_distribution', 'seed_seq', --
    151 	'ratio', -- <ratio> C++11
    152 	-- <regex> C++11
    153 	'regex', 'csub_match', 'ssub_match', 'cmatch', 'smatch', 'cregex_iterator', 'sregex_iterator',
    154 	'cregex_token_iterator', 'sregex_token_iterator', 'regex_error', 'regex_traits', --
    155 	'counting_semaphore', 'binary_semaphore', -- <semaphore> C++20
    156 	'set', 'multiset', -- <set>
    157 	'span', -- <span> C++20
    158 	'stringbuf', 'istringstream', 'ostringstream', 'stringstream', -- <stringstream>
    159 	'stack', -- <stack>
    160 	-- <stdexcept>
    161 	'logic_error', 'invalid_argument', 'domain_error', 'length_error', 'out_of_range',
    162 	'runtime_error', 'range_error', 'overflow_error', 'underflow_error', --
    163 	'streambuf', -- <streambuf>
    164 	-- <string>
    165 	'string', --
    166 	'u16string', 'u32string', -- C++11
    167 	'u8string', -- C++20
    168 	-- <string_view> C++17
    169 	'string_view', 'u16string_view', 'u32string_view', --
    170 	'u8string_view', -- C++20
    171 	'syncbuf', 'osyncstream', -- <syncstream> C++20
    172 	'thread', -- <thread> C++11
    173 	'tuple', 'tuple_size', 'tuple_element', -- <tuple> C++11
    174 	'pair', -- <utility>
    175 	'variant', 'monostate', 'bad_variant_access', 'variant_size', 'variant_alternative', -- <variant> C++17
    176 	'vector' -- <vector>
    177 })
    178 
    179 lex:set_word_list(lexer.FUNCTION_BUILTIN, {
    180 	'assert', -- <cassert>
    181 	-- <cctype>
    182 	'isalnum', 'isalpha', 'islower', 'isupper', 'isdigit', 'isxdigit', 'iscntrl', 'isgraph',
    183 	'isspace', 'isprint', 'ispunct', 'tolower', 'toupper', --
    184 	'isblank', -- C++11
    185 	'va_start', 'va_arg', 'va_end', -- <cstdarg>
    186 	-- <cmath>
    187 	'abs', 'fmod', 'exp', 'log', 'log10', 'pow', 'sqrt', 'sin', 'cos', 'tan', 'asin', 'acos', 'atan',
    188 	'atan2', 'sinh', 'cosh', 'tanh', 'ceil', 'floor', 'frexp', 'ldexp', 'modf',
    189 	-- C++11.
    190 	'remainder', 'remquo', 'exp2', 'expm1', 'log2', 'log1p', 'cbrt', 'hypot', 'asinh', 'acosh',
    191 	'atanh', 'erf', 'erfc', 'tgamma', 'lgamma', 'trunc', 'round', 'nearbyint', 'rint', 'scalbn',
    192 	'ilogb', 'logb', 'nextafter', 'copysign', 'isfinite', 'isinf', 'isnan', 'isnormal', 'signbit',
    193 	'isgreater', 'isgreaterequal', 'isless', 'islessequal', 'islessgreater', 'isunordered', --
    194 	-- C++17.
    195 	'assoc_laguerre', 'assoc_legendre', 'beta', 'comp_ellint_1', 'comp_ellint_2', 'comp_ellint_3',
    196 	'cyl_bessel_i', 'cyl_bessel_j', 'cyl_bessel_k', 'cyl_neumann', 'ellint_1', 'ellint_2', 'ellint_3',
    197 	'expint', 'lhermite', 'lgendre', 'laguerre', 'riemann_zeta', 'sph_bessel', 'sph_legendre',
    198 	'sph_neumann', --
    199 	'lerp', -- C++20
    200 	-- <cstring>
    201 	'strcpy', 'strncpy', 'strcat', 'strncat', 'strxfrm', 'strlen', 'strcmp', 'strncmp', 'strcoll',
    202 	'strchr', 'strrchr', 'strspn', 'strcspn', 'strpbrk', 'strstr', 'strtok', 'memchr', 'memcmp',
    203 	'memset', 'memcpy', 'memmove', 'strerror'
    204 })
    205 
    206 lex:set_word_list(lexer.FUNCTION_BUILTIN .. '.stl', {
    207 	-- <algorithm>
    208 	'for_each', 'count', 'count_if', 'mismatch', 'find', 'find_if', 'find_end', 'find_first_of',
    209 	'adjacent_find', 'search', 'search_n', 'copy', 'copy_backward', 'fill', 'fill_n', 'transform',
    210 	'generate', 'generate_n', 'remove', 'remove_if', 'remove_copy', 'remove_copy_if', 'replace',
    211 	'replace_if', 'replace_copy', 'replace_copy_if', 'swap', 'swap_ranges', 'iter_swap', 'reverse',
    212 	'reverse_copy', 'rotate', 'rotate_copy', 'unique_copy', 'partition', 'stable_partition', 'sort',
    213 	'partial_sort', 'partial_sort_copy', 'stable_sort', 'nth_element', 'lower_bound', 'upper_bound',
    214 	'binary_search', 'equal_range', 'merge', 'inplace_merge', 'includes', 'set_difference',
    215 	'set_intersection', 'set_symmetric_difference', 'set_union', 'make_heap', 'push_heap', 'pop_heap',
    216 	'sort_heap', 'max', 'max_element', 'min', 'min_element', 'equal', 'lexicographical_compare',
    217 	'next_permutation', 'prev_permutation', --
    218 	-- C++11.
    219 	'all_of', 'any_of', 'none_of', 'find_if_not', 'copy_if', 'copy_n', 'move', 'move_backward',
    220 	'shuffle', 'is_partitioned', 'partition_copy', 'partition_point', 'is_sorted', 'is_sorted_until',
    221 	'is_heap', 'is_heap_until', 'minmax', 'minmax_element', 'is_permutation', --
    222 	'for_each_n', 'random_shuffle', 'sample', 'clamp', -- C++17
    223 	'shift_left', 'shift_right', 'lexicographical_compare_three_way', -- C++20
    224 	'make_any', 'any_cast', -- <any> C++17
    225 	-- <bit> C++20
    226 	'bit_cast', 'byteswap', 'has_single_bit', 'bit_ceil', 'bit_floor', 'bit_width', 'rotl', 'rotr',
    227 	'countl_zero', 'countl_one', 'countl_zero', 'countr_one', 'popcount', --
    228 	'from_chars', 'to_chars', -- <charconv> C++17
    229 	-- <format> C++20
    230 	'format', 'format_to', 'format_to_n', 'formatted_size', 'vformat', 'vformat_to',
    231 	'visit_format_arg', 'make_format_args', --
    232 	'async', 'future_category', -- <future> C++11
    233 	-- <iterator>
    234 	'front_inserter', 'back_inserter', 'inserter', --
    235 	'make_move_iterator', -- C++11
    236 	'make_reverse_iterator', -- C++14
    237 	-- <memory>
    238 	'make_shared', 'allocate_shared', 'static_pointer_cast', 'dynamic_pointer_cast',
    239 	'const_pointer_cast', --
    240 	'make_unique', -- C++14
    241 	'reinterpret_pointer_cast', -- C++17
    242 	'try_lock', 'lock', 'call_once', -- <mutex> C++11
    243 	-- <numeric>
    244 	'accumulate', 'inner_product', 'adjacent_difference', 'partial_sum', --
    245 	'iota', -- C++11
    246 	'reduce', 'transform_reduce', 'inclusive_scan', 'exclusive_scan', 'gcd', 'lcm', -- C++17
    247 	'midpoint', -- C++20
    248 	'make_optional', -- <optional> C++17
    249 	'generate_canonical', -- <random> C++11
    250 	'regex_match', 'regex_search', 'regex_replace', -- <regex> C++11
    251 	'as_bytes', 'as_writable_bytes', -- <span> C++20
    252 	-- <tuple> C++11
    253 	'make_tuple', 'tie', 'forward_as_tuple', 'tuple_cat', --
    254 	'apply', 'make_from_tuple', -- C++17
    255 	-- <utility>
    256 	'swap', 'make_pair', 'get', --
    257 	'forward', 'move', 'move_if_noexcept', 'declval', -- C++11
    258 	'exchange', -- C++14
    259 	'as_const', -- C++17
    260 	-- C++20.
    261 	'cmp_equal', 'cmp_not_equal', 'cmp_less', 'cmp_greater', 'cmp_less_equal', 'cmp_greater_equal',
    262 	'in_range', --
    263 	'visit', 'holds_alternative', 'get_if' -- <variant> C++17
    264 })
    265 
    266 lex:set_word_list(lexer.CONSTANT_BUILTIN .. '.stl', {
    267 	'cin', 'cout', 'cerr', 'clog', -- <iostream>
    268 	'endl', 'ends', 'flush', -- <ostream>
    269 	'nullopt' -- <optional> C++17
    270 })
    271 
    272 lex:set_word_list(lexer.PREPROCESSOR, {
    273 	'define', 'defined', 'elif', 'else', 'endif', 'error', 'if', 'ifdef', 'ifndef', 'import', 'line',
    274 	'pragma', 'undef', 'using', 'warning', --
    275 	'export', 'include', 'module' -- C++20
    276 })
    277 
    278 lex:set_word_list(lexer.ATTRIBUTE, {
    279 	'carries_dependency', 'noreturn', -- C++11
    280 	'deprecated', -- C++14
    281 	'fallthrough', 'maybe_unused', 'nodiscard', -- C++17
    282 	'likely', 'no_unique_address', 'unlikely' -- C++20
    283 })
    284 
    285 lexer.property['scintillua.comment'] = '//'
    286 
    287 return lex