vis

a vi-like editor based on Plan 9's structural regular expressions

git clone https://9o.is/git/vis.git

text-objects.c

(11573B)


      1 #include <errno.h>
      2 #include <stdlib.h>
      3 #include <string.h>
      4 #include <ctype.h>
      5 #include "text-motions.h"
      6 #include "text-objects.h"
      7 #include "text-util.h"
      8 #include "util.h"
      9 
     10 #define blank(c) ((c) == ' ' || (c) == '\t')
     11 #define space(c) (isspace((unsigned char)c))
     12 #define boundary(c) (isboundary((unsigned char)c))
     13 
     14 Filerange text_object_entire(Text *txt, size_t pos) {
     15 	return text_range_new(0, text_size(txt));
     16 }
     17 
     18 static Filerange text_object_customword(Text *txt, size_t pos, int (*isboundary)(int)) {
     19 	Filerange r;
     20 	char c, prev = '0', next = '0';
     21 	Iterator it = text_iterator_get(txt, pos);
     22 	if (!text_iterator_byte_get(&it, &c))
     23 		return text_range_empty();
     24 	if (pos > 0 && text_iterator_byte_prev(&it, &prev))
     25 		text_iterator_byte_next(&it, NULL);
     26 	text_iterator_byte_next(&it, &next);
     27 	if (space(c)) {
     28 		r.start = text_char_next(txt, text_customword_end_prev(txt, pos, isboundary));
     29 		r.end = text_customword_start_next(txt, pos, isboundary);
     30 	} else if (boundary(c)) {
     31 		if (boundary(prev) && !space(prev))
     32 			r.start = text_customword_start_prev(txt, pos, isboundary);
     33 		else
     34 			r.start = pos;
     35 
     36 		if (boundary(next) && !space(next))
     37 			r.end = text_char_next(txt, text_customword_end_next(txt, pos, isboundary));
     38 		else
     39 			r.end = text_char_next(txt, pos);
     40 	} else {
     41 		if (boundary(prev))
     42 			r.start = pos;
     43 		else
     44 			r.start = text_customword_start_prev(txt, pos, isboundary);
     45 
     46 		if (boundary(next))
     47 			r.end = text_char_next(txt, pos);
     48 		else
     49 			r.end = text_char_next(txt, text_customword_end_next(txt, pos, isboundary));
     50 	}
     51 
     52 	return r;
     53 }
     54 
     55 Filerange text_object_word(Text *txt, size_t pos) {
     56 	return text_object_customword(txt, pos, is_word_boundary);
     57 }
     58 
     59 Filerange text_object_longword(Text *txt, size_t pos) {
     60 	return text_object_customword(txt, pos, isspace);
     61 }
     62 
     63 static Filerange text_object_customword_outer(Text *txt, size_t pos, int (*isboundary)(int)) {
     64 	Filerange r;
     65 	char c, prev = '0', next = '0';
     66 	Iterator it = text_iterator_get(txt, pos);
     67 	if (!text_iterator_byte_get(&it, &c))
     68 		return text_range_empty();
     69 	if (pos > 0 && text_iterator_byte_prev(&it, &prev))
     70 		text_iterator_byte_next(&it, NULL);
     71 	text_iterator_byte_next(&it, &next);
     72 	if (space(c)) {
     73 		/* middle of two words, include leading white space */
     74 		r.start = text_char_next(txt, text_customword_end_prev(txt, pos, isboundary));
     75 		r.end = text_char_next(txt, text_customword_end_next(txt, pos, isboundary));
     76 	} else if (boundary(prev) && boundary(next)) {
     77 		if (boundary(c)) {
     78 			r.start = text_char_next(txt, text_customword_end_prev(txt, pos, isboundary));
     79 			r.end = text_word_start_next(txt, text_customword_end_next(txt, pos, isboundary));
     80 		} else {
     81 			/* on a single character */
     82 			r.start = pos;
     83 			r.end = text_customword_start_next(txt, pos, isboundary);
     84 		}
     85 	} else if (boundary(prev)) {
     86 		/* at start of a word */
     87 		r.start = pos;
     88 		r.end = text_customword_start_next(txt, text_customword_end_next(txt, pos, isboundary), isboundary);
     89 	} else if (boundary(next)) {
     90 		/* at end of a word */
     91 		r.start = text_customword_start_prev(txt, pos, isboundary);
     92 		r.end = text_customword_start_next(txt, pos, isboundary);
     93 	} else {
     94 		/* in the middle of a word */
     95 		r.start = text_customword_start_prev(txt, pos, isboundary);
     96 		r.end = text_customword_start_next(txt, text_customword_end_next(txt, pos, isboundary), isboundary);
     97 	}
     98 
     99 	return r;
    100 }
    101 
    102 Filerange text_object_longword_outer(Text *txt, size_t pos) {
    103 	return text_object_customword_outer(txt, pos, isspace);
    104 }
    105 
    106 Filerange text_object_word_outer(Text *txt, size_t pos) {
    107 	return text_object_customword_outer(txt, pos, is_word_boundary);
    108 }
    109 
    110 Filerange text_object_word_find_next(Text *txt, size_t pos, const char *word) {
    111 	size_t len = strlen(word);
    112 	for (;;) {
    113 		size_t match_pos = text_find_next(txt, pos, word);
    114 		if (match_pos != pos) {
    115 			Filerange match_word = text_object_word(txt, match_pos);
    116 			if (text_range_size(&match_word) == len)
    117 				return match_word;
    118 			pos = match_word.end;
    119 		} else {
    120 			return text_range_empty();
    121 		}
    122 	}
    123 }
    124 
    125 Filerange text_object_word_find_prev(Text *txt, size_t pos, const char *word) {
    126 	size_t len = strlen(word);
    127 	for (;;) {
    128 		size_t match_pos = text_find_prev(txt, pos, word);
    129 		if (match_pos != pos) {
    130 			Filerange match_word = text_object_word(txt, match_pos);
    131 			if (text_range_size(&match_word) == len)
    132 				return match_word;
    133 			pos = match_pos;
    134 		} else {
    135 			return text_range_empty();
    136 		}
    137 	}
    138 }
    139 
    140 Filerange text_object_find_next(Text *txt, size_t pos, const char *search) {
    141 	size_t start = text_find_next(txt, pos, search);
    142 	if (start == pos)
    143 		return text_range_empty();
    144 	return text_range_new(start, start+strlen(search));
    145 }
    146 
    147 Filerange text_object_find_prev(Text *txt, size_t pos, const char *search) {
    148 	size_t start = text_find_prev(txt, pos, search);
    149 	if (start == pos)
    150 		return text_range_empty();
    151 	return text_range_new(start, start+strlen(search));
    152 }
    153 
    154 Filerange text_object_line(Text *txt, size_t pos) {
    155 	Filerange r;
    156 	r.start = text_line_begin(txt, pos);
    157 	r.end = text_line_next(txt, pos);
    158 	return r;
    159 }
    160 
    161 Filerange text_object_line_inner(Text *txt, size_t pos) {
    162 	Filerange r = text_object_line(txt, pos);
    163 	return text_range_inner(txt, &r);
    164 }
    165 
    166 Filerange text_object_sentence(Text *txt, size_t pos) {
    167 	Filerange r;
    168 	r.start = text_sentence_prev(txt, pos);
    169 	r.end = text_sentence_next(txt, pos);
    170 	return r;
    171 }
    172 
    173 static bool text_line_blank(Text *txt, size_t pos) {
    174 	char c;
    175 	bool b = true;
    176 	Iterator it = text_iterator_get(txt, text_line_begin(txt, pos));
    177 	while (text_iterator_byte_get(&it, &c) && c != '\n' && (b = blank(c)))
    178 		text_iterator_char_next(&it, NULL);
    179 	return b;
    180 }
    181 
    182 Filerange text_object_paragraph(Text *txt, size_t pos) {
    183 	char c;
    184 	Filerange r;
    185 	if (text_line_blank(txt, pos)) {
    186 		Iterator it = text_iterator_get(txt, pos), rit = it;
    187 		while (text_iterator_byte_get(&rit, &c) && (c == '\n' || blank(c)))
    188 			text_iterator_byte_prev(&rit, NULL);
    189 		if (c == '\n' || blank(c))
    190 			r.start = rit.pos;
    191 		else
    192 			r.start = text_line_next(txt, rit.pos);
    193 		while (text_iterator_byte_get(&it, &c) && (c == '\n' || blank(c)))
    194 			text_iterator_byte_next(&it, NULL);
    195 		if (it.pos == text_size(txt))
    196 			r.end = rit.pos;
    197 		else
    198 			r.end = text_line_begin(txt, it.pos);
    199 	} else {
    200 		r.start = text_line_blank_prev(txt, pos);
    201 		if (r.start > 0 || (text_byte_get(txt, r.start, &c) && c == '\n'))
    202 			r.start = text_line_next(txt, r.start);
    203 		r.end = text_line_blank_next(txt, pos);
    204 	}
    205 	return r;
    206 }
    207 
    208 Filerange text_object_paragraph_outer(Text *txt, size_t pos) {
    209 	Filerange p1 = text_object_paragraph(txt, pos);
    210 	Filerange p2 = text_object_paragraph(txt, p1.end);
    211 	return text_range_union(&p1, &p2);
    212 }
    213 
    214 static Filerange text_object_bracket(Text *txt, size_t pos, char type) {
    215 	char c, open, close;
    216 	int opened = 1, closed = 1;
    217 	Filerange r = text_range_empty();
    218 
    219 	switch (type) {
    220 	case '(':  case ')': open = '(';  close = ')';  break;
    221 	case '{':  case '}': open = '{';  close = '}';  break;
    222 	case '[':  case ']': open = '[';  close = ']';  break;
    223 	case '<':  case '>': open = '<';  close = '>';  break;
    224 	case '"':            open = '"';  close = '"';  break;
    225 	case '`':            open = '`';  close = '`';  break;
    226 	case '\'':           open = '\''; close = '\''; break;
    227 	default: return r;
    228 	}
    229 
    230 	Iterator it = text_iterator_get(txt, pos);
    231 
    232 	if (open == close && text_iterator_byte_get(&it, &c) && (c == '"' || c == '`' || c == '\'')) {
    233 		size_t match = text_bracket_match(txt, pos, NULL);
    234 		r.start = MIN(pos, match) + 1;
    235 		r.end = MAX(pos, match);
    236 		return r;
    237 	}
    238 
    239 	while (text_iterator_byte_get(&it, &c)) {
    240 		if (c == open && --opened == 0) {
    241 			r.start = it.pos + 1;
    242 			break;
    243 		} else if (c == close && it.pos != pos) {
    244 			opened++;
    245 		}
    246 		text_iterator_byte_prev(&it, NULL);
    247 	}
    248 
    249 	it = text_iterator_get(txt, pos);
    250 	while (text_iterator_byte_get(&it, &c)) {
    251 		if (c == close && --closed == 0) {
    252 			r.end = it.pos;
    253 			break;
    254 		} else if (c == open && it.pos != pos) {
    255 			closed++;
    256 		}
    257 		text_iterator_byte_next(&it, NULL);
    258 	}
    259 
    260 	if (!text_range_valid(&r))
    261 		return text_range_empty();
    262 	return r;
    263 }
    264 
    265 Filerange text_object_square_bracket(Text *txt, size_t pos) {
    266 	return text_object_bracket(txt, pos, ']');
    267 }
    268 
    269 Filerange text_object_curly_bracket(Text *txt, size_t pos) {
    270 	return text_object_bracket(txt, pos, '}');
    271 }
    272 
    273 Filerange text_object_angle_bracket(Text *txt, size_t pos) {
    274 	return text_object_bracket(txt, pos, '>');
    275 }
    276 
    277 Filerange text_object_parenthesis(Text *txt, size_t pos) {
    278 	return text_object_bracket(txt, pos, ')');
    279 }
    280 
    281 Filerange text_object_quote(Text *txt, size_t pos) {
    282 	return text_object_bracket(txt, pos, '"');
    283 }
    284 
    285 Filerange text_object_single_quote(Text *txt, size_t pos) {
    286 	return text_object_bracket(txt, pos, '\'');
    287 }
    288 
    289 Filerange text_object_backtick(Text *txt, size_t pos) {
    290 	return text_object_bracket(txt, pos, '`');
    291 }
    292 
    293 Filerange text_object_search_forward(Text *txt, size_t pos, Regex *regex) {
    294 	size_t start = pos;
    295 	size_t end = text_size(txt);
    296 	RegexMatch match[1];
    297 	bool found = start < end && !text_search_range_forward(txt, start, end - start, regex, 1, match, 0);
    298 	if (found)
    299 		return text_range_new(match[0].start, match[0].end);
    300 	return text_range_empty();
    301 }
    302 
    303 Filerange text_object_search_backward(Text *txt, size_t pos, Regex *regex) {
    304 	size_t start = 0;
    305 	size_t end = pos;
    306 	RegexMatch match[1];
    307 	bool found = !text_search_range_backward(txt, start, end, regex, 1, match, 0);
    308 	if (found)
    309 		return text_range_new(match[0].start, match[0].end);
    310 	return text_range_empty();
    311 }
    312 
    313 Filerange text_object_indentation(Text *txt, size_t pos) {
    314 	char c;
    315 	size_t bol = text_line_begin(txt, pos);
    316 	size_t sol = text_line_start(txt, bol);
    317 	size_t start = bol;
    318 	size_t end = text_line_next(txt, bol);
    319 	size_t line_indent = sol - bol;
    320 	bool line_empty = text_byte_get(txt, bol, &c) && c == '\n';
    321 
    322 	char *buf = text_bytes_alloc0(txt, bol, line_indent);
    323 	char *tmp = malloc(line_indent);
    324 
    325 	if (!buf || !tmp) {
    326 		free(buf);
    327 		free(tmp);
    328 		return text_range_empty();
    329 	}
    330 
    331 	while ((bol = text_line_begin(txt, text_line_prev(txt, start))) != start) {
    332 		sol = text_line_start(txt, bol);
    333 		size_t indent = sol - bol;
    334 		if (indent < line_indent)
    335 			break;
    336 		bool empty = text_byte_get(txt, bol, &c) && c == '\n';
    337 		if (line_empty && !empty)
    338 			break;
    339 		if (line_indent == 0 && empty)
    340 			break;
    341 		text_bytes_get(txt, bol, line_indent, tmp);
    342 		if (memcmp(buf, tmp, line_indent))
    343 			break;
    344 		start = bol;
    345 	}
    346 
    347 	do {
    348 		bol = end;
    349 		sol = text_line_start(txt, bol);
    350 		size_t indent = sol - bol;
    351 		if (indent < line_indent)
    352 			break;
    353 		bool empty = text_byte_get(txt, bol, &c) && c == '\n';
    354 		if (line_empty && !empty)
    355 			break;
    356 		if (line_indent == 0 && empty)
    357 			break;
    358 		text_bytes_get(txt, bol, line_indent, tmp);
    359 		if (memcmp(buf, tmp, line_indent))
    360 			break;
    361 		end = text_line_next(txt, bol);
    362 	} while (bol != end);
    363 
    364 	free(buf);
    365 	free(tmp);
    366 	return text_range_new(start, end);
    367 }
    368 
    369 Filerange text_range_linewise(Text *txt, Filerange *rin) {
    370 	Filerange rout = *rin;
    371 	rout.start = text_line_begin(txt, rin->start);
    372 	if (rin->end != text_line_begin(txt, rin->end))
    373 		rout.end = text_line_next(txt, rin->end);
    374 	return rout;
    375 }
    376 
    377 bool text_range_is_linewise(Text *txt, Filerange *r) {
    378 	return text_range_size(r) > 0 &&
    379 	       r->start == text_line_begin(txt, r->start) &&
    380 	       r->end == text_line_begin(txt, r->end);
    381 }
    382 
    383 Filerange text_range_inner(Text *txt, Filerange *rin) {
    384 	char c;
    385 	Filerange r = *rin;
    386 	Iterator it = text_iterator_get(txt, rin->start);
    387 	while (text_iterator_byte_get(&it, &c) && space(c))
    388 		text_iterator_byte_next(&it, NULL);
    389 	r.start = it.pos;
    390 	it = text_iterator_get(txt, rin->end);
    391 	do r.end = it.pos; while (text_iterator_byte_prev(&it, &c) && space(c));
    392 	return r;
    393 }