vis
a vi-like editor based on Plan 9's structural regular expressions
git clone https://9o.is/git/vis.git
text-motions.c
(18137B)
1 #include <ctype.h>
2 #include <string.h>
3 #include <stdlib.h>
4 #include <wchar.h>
5 #include <errno.h>
6 #include <limits.h>
7 #include "text-motions.h"
8 #include "text-util.h"
9 #include "util.h"
10 #include "text-objects.h"
11
12 #define blank(c) ((c) == ' ' || (c) == '\t')
13 #define space(c) (isspace((unsigned char)c))
14 #define boundary(c) (isboundary((unsigned char)c))
15
16 // TODO: specify this per file type?
17 int is_word_boundary(int c) {
18 return ISASCII(c) && !(('0' <= c && c <= '9') ||
19 ('a' <= c && c <= 'z') ||
20 ('A' <= c && c <= 'Z') || c == '_');
21 }
22
23 size_t text_begin(Text *txt, size_t pos) {
24 return 0;
25 }
26
27 size_t text_end(Text *txt, size_t pos) {
28 return text_size(txt);
29 }
30
31 size_t text_char_next(Text *txt, size_t pos) {
32 Iterator it = text_iterator_get(txt, pos);
33 text_iterator_char_next(&it, NULL);
34 return it.pos;
35 }
36
37 size_t text_char_prev(Text *txt, size_t pos) {
38 Iterator it = text_iterator_get(txt, pos);
39 text_iterator_char_prev(&it, NULL);
40 return it.pos;
41 }
42
43 size_t text_codepoint_next(Text *txt, size_t pos) {
44 Iterator it = text_iterator_get(txt, pos);
45 text_iterator_codepoint_next(&it, NULL);
46 return it.pos;
47 }
48
49 size_t text_codepoint_prev(Text *txt, size_t pos) {
50 Iterator it = text_iterator_get(txt, pos);
51 text_iterator_codepoint_prev(&it, NULL);
52 return it.pos;
53 }
54
55 static size_t find_next(Text *txt, size_t pos, const char *s, bool line) {
56 if (!s)
57 return pos;
58 size_t len = strlen(s), matched = 0;
59 Iterator it = text_iterator_get(txt, pos), sit;
60 for (char c; matched < len && text_iterator_byte_get(&it, &c); ) {
61 if (c == s[matched]) {
62 if (matched == 0)
63 sit = it;
64 matched++;
65 } else if (matched > 0) {
66 it = sit;
67 matched = 0;
68 }
69 text_iterator_byte_next(&it, NULL);
70 if (line && c == '\n')
71 break;
72 }
73 return matched == len ? it.pos - len : pos;
74 }
75
76 size_t text_find_next(Text *txt, size_t pos, const char *s) {
77 return find_next(txt, pos, s, false);
78 }
79
80 size_t text_line_find_next(Text *txt, size_t pos, const char *s) {
81 return find_next(txt, pos, s, true);
82 }
83
84 static size_t find_prev(Text *txt, size_t pos, const char *s, bool line) {
85 if (!s)
86 return pos;
87 size_t len = strlen(s), matched = len - 1;
88 Iterator it = text_iterator_get(txt, pos), sit;
89 if (len == 0)
90 return pos;
91 for (char c; text_iterator_byte_prev(&it, &c); ) {
92 if (c == s[matched]) {
93 if (matched == 0)
94 return it.pos;
95 if (matched == len - 1)
96 sit = it;
97 matched--;
98 } else if (matched < len - 1) {
99 it = sit;
100 matched = len - 1;
101 }
102 if (line && c == '\n')
103 break;
104 }
105 return pos;
106 }
107
108 size_t text_find_prev(Text *txt, size_t pos, const char *s) {
109 return find_prev(txt, pos, s, false);
110 }
111
112 size_t text_line_find_prev(Text *txt, size_t pos, const char *s) {
113 return find_prev(txt, pos, s, true);
114 }
115
116 size_t text_line_prev(Text *txt, size_t pos) {
117 Iterator it = text_iterator_get(txt, pos);
118 text_iterator_byte_find_prev(&it, '\n');
119 return it.pos;
120 }
121
122 size_t text_line_begin(Text *txt, size_t pos) {
123 Iterator it = text_iterator_get(txt, pos);
124 return text_iterator_byte_find_prev(&it, '\n') ? it.pos+1 : it.pos;
125 }
126
127 size_t text_line_start(Text *txt, size_t pos) {
128 char c;
129 Iterator it = text_iterator_get(txt, text_line_begin(txt, pos));
130 while (text_iterator_byte_get(&it, &c) && blank(c))
131 text_iterator_byte_next(&it, NULL);
132 return it.pos;
133 }
134
135 size_t text_line_finish(Text *txt, size_t pos) {
136 char c;
137 size_t end = text_line_end(txt, pos);
138 Iterator it = text_iterator_get(txt, end);
139 if (!text_iterator_byte_prev(&it, &c) || c == '\n')
140 return end;
141 while (blank(c) && text_iterator_byte_prev(&it, &c));
142 return it.pos + (c == '\n');
143 }
144
145 size_t text_line_end(Text *txt, size_t pos) {
146 Iterator it = text_iterator_get(txt, pos);
147 text_iterator_byte_find_next(&it, '\n');
148 return it.pos;
149 }
150
151 size_t text_line_next(Text *txt, size_t pos) {
152 Iterator it = text_iterator_get(txt, pos);
153 if (text_iterator_byte_find_next(&it, '\n'))
154 text_iterator_byte_next(&it, NULL);
155 return it.pos;
156 }
157
158 size_t text_line_offset(Text *txt, size_t pos, size_t off) {
159 char c;
160 size_t bol = text_line_begin(txt, pos);
161 Iterator it = text_iterator_get(txt, bol);
162 while (off-- > 0 && text_iterator_byte_get(&it, &c) && c != '\n')
163 text_iterator_byte_next(&it, NULL);
164 return it.pos;
165 }
166
167 size_t text_line_char_set(Text *txt, size_t pos, int count) {
168 char c;
169 size_t bol = text_line_begin(txt, pos);
170 Iterator it = text_iterator_get(txt, bol);
171 if (text_iterator_byte_get(&it, &c) && c != '\n')
172 while (count-- > 0 && text_iterator_char_next(&it, &c) && c != '\n');
173 return it.pos;
174 }
175
176 int text_line_char_get(Text *txt, size_t pos) {
177 char c;
178 int count = 0;
179 size_t bol = text_line_begin(txt, pos);
180 Iterator it = text_iterator_get(txt, bol);
181 if (text_iterator_byte_get(&it, &c) && c != '\n') {
182 while (it.pos < pos && c != '\n' && text_iterator_char_next(&it, &c))
183 count++;
184 }
185 return count;
186 }
187
188 int text_line_width_get(Text *txt, size_t pos) {
189 int width = 0;
190 mbstate_t ps = { 0 };
191 size_t bol = text_line_begin(txt, pos);
192 Iterator it = text_iterator_get(txt, bol);
193
194 while (it.pos < pos) {
195 char buf[MB_LEN_MAX];
196 size_t len = text_bytes_get(txt, it.pos, sizeof buf, buf);
197 if (len == 0 || buf[0] == '\n')
198 break;
199 wchar_t wc;
200 size_t wclen = mbrtowc(&wc, buf, len, &ps);
201 if (wclen == (size_t)-1 && errno == EILSEQ) {
202 ps = (mbstate_t){0};
203 /* assume a replacement symbol will be displayed */
204 width++;
205 } else if (wclen == (size_t)-2) {
206 /* do nothing, advance to next character */
207 } else if (wclen == 0) {
208 /* assume NUL byte will be displayed as ^@ */
209 width += 2;
210 } else if (buf[0] == '\t') {
211 width++;
212 } else {
213 int w = wcwidth(wc);
214 if (w == -1)
215 w = 2; /* assume non-printable will be displayed as ^{char} */
216 width += w;
217 }
218
219 if (!text_iterator_codepoint_next(&it, NULL))
220 break;
221 }
222
223 return width;
224 }
225
226 size_t text_line_width_set(Text *txt, size_t pos, int width) {
227 int cur_width = 0;
228 mbstate_t ps = { 0 };
229 size_t bol = text_line_begin(txt, pos);
230 Iterator it = text_iterator_get(txt, bol);
231
232 for (;;) {
233 char buf[MB_LEN_MAX];
234 size_t len = text_bytes_get(txt, it.pos, sizeof buf, buf);
235 if (len == 0 || buf[0] == '\n')
236 break;
237 wchar_t wc;
238 size_t wclen = mbrtowc(&wc, buf, len, &ps);
239 if (wclen == (size_t)-1 && errno == EILSEQ) {
240 ps = (mbstate_t){0};
241 /* assume a replacement symbol will be displayed */
242 cur_width++;
243 } else if (wclen == (size_t)-2) {
244 /* do nothing, advance to next character */
245 } else if (wclen == 0) {
246 /* assume NUL byte will be displayed as ^@ */
247 cur_width += 2;
248 } else if (buf[0] == '\t') {
249 cur_width++;
250 } else {
251 int w = wcwidth(wc);
252 if (w == -1)
253 w = 2; /* assume non-printable will be displayed as ^{char} */
254 cur_width += w;
255 }
256
257 if (cur_width >= width || !text_iterator_codepoint_next(&it, NULL))
258 break;
259 }
260
261 return it.pos;
262 }
263
264 size_t text_line_char_next(Text *txt, size_t pos) {
265 char c;
266 Iterator it = text_iterator_get(txt, pos);
267 if (!text_iterator_byte_get(&it, &c) || c == '\n')
268 return pos;
269 text_iterator_char_next(&it, NULL);
270 return it.pos;
271 }
272
273 size_t text_line_char_prev(Text *txt, size_t pos) {
274 char c;
275 Iterator it = text_iterator_get(txt, pos);
276 if (!text_iterator_char_prev(&it, &c) || c == '\n')
277 return pos;
278 return it.pos;
279 }
280
281 size_t text_line_up(Text *txt, size_t pos) {
282 int width = text_line_width_get(txt, pos);
283 size_t prev = text_line_prev(txt, pos);
284 return text_line_width_set(txt, prev, width);
285 }
286
287 size_t text_line_down(Text *txt, size_t pos) {
288 int width = text_line_width_get(txt, pos);
289 size_t next = text_line_next(txt, pos);
290 if (next == text_size(txt))
291 return pos;
292 return text_line_width_set(txt, next, width);
293 }
294
295 size_t text_range_line_first(Text *txt, Filerange *r) {
296 if (!text_range_valid(r))
297 return EPOS;
298 return r->start;
299 }
300
301 size_t text_range_line_last(Text *txt, Filerange *r) {
302 if (!text_range_valid(r))
303 return EPOS;
304 size_t pos = text_line_begin(txt, r->end);
305 if (pos == r->end) {
306 /* range ends at a begin of a line, skip last line ending */
307 pos = text_line_prev(txt, pos);
308 pos = text_line_begin(txt, pos);
309 }
310 return r->start <= pos ? pos : r->start;
311 }
312
313 size_t text_range_line_next(Text *txt, Filerange *r, size_t pos) {
314 if (!text_range_contains(r, pos))
315 return EPOS;
316 size_t newpos = text_line_next(txt, pos);
317 return newpos != pos && newpos < r->end ? newpos : EPOS;
318 }
319
320 size_t text_range_line_prev(Text *txt, Filerange *r, size_t pos) {
321 if (!text_range_contains(r, pos))
322 return EPOS;
323 size_t newpos = text_line_begin(txt, text_line_prev(txt, pos));
324 return newpos != pos && r->start <= newpos ? newpos : EPOS;
325 }
326
327 size_t text_customword_start_next(Text *txt, size_t pos, int (*isboundary)(int)) {
328 char c;
329 Iterator it = text_iterator_get(txt, pos);
330 if (!text_iterator_byte_get(&it, &c))
331 return pos;
332 if (boundary(c))
333 while (boundary(c) && !space(c) && text_iterator_char_next(&it, &c));
334 else
335 while (!boundary(c) && text_iterator_char_next(&it, &c));
336 while (space(c) && text_iterator_char_next(&it, &c));
337 return it.pos;
338 }
339
340 size_t text_customword_start_prev(Text *txt, size_t pos, int (*isboundary)(int)) {
341 char c;
342 Iterator it = text_iterator_get(txt, pos);
343 while (text_iterator_char_prev(&it, &c) && space(c));
344 if (boundary(c))
345 do pos = it.pos; while (text_iterator_char_prev(&it, &c) && boundary(c) && !space(c));
346 else
347 do pos = it.pos; while (text_iterator_char_prev(&it, &c) && !boundary(c));
348 return pos;
349 }
350
351 size_t text_customword_end_next(Text *txt, size_t pos, int (*isboundary)(int)) {
352 char c;
353 Iterator it = text_iterator_get(txt, pos);
354 while (text_iterator_char_next(&it, &c) && space(c));
355 if (boundary(c))
356 do pos = it.pos; while (text_iterator_char_next(&it, &c) && boundary(c) && !space(c));
357 else
358 do pos = it.pos; while (text_iterator_char_next(&it, &c) && !isboundary(c));
359 return pos;
360 }
361
362 size_t text_customword_end_prev(Text *txt, size_t pos, int (*isboundary)(int)) {
363 char c;
364 Iterator it = text_iterator_get(txt, pos);
365 if (!text_iterator_byte_get(&it, &c))
366 return pos;
367 if (boundary(c))
368 while (boundary(c) && !space(c) && text_iterator_char_prev(&it, &c));
369 else
370 while (!boundary(c) && text_iterator_char_prev(&it, &c));
371 while (space(c) && text_iterator_char_prev(&it, &c));
372 return it.pos;
373 }
374
375 size_t text_longword_end_next(Text *txt, size_t pos) {
376 return text_customword_end_next(txt, pos, isspace);
377 }
378
379 size_t text_longword_end_prev(Text *txt, size_t pos) {
380 return text_customword_end_prev(txt, pos, isspace);
381 }
382
383 size_t text_longword_start_next(Text *txt, size_t pos) {
384 return text_customword_start_next(txt, pos, isspace);
385 }
386
387 size_t text_longword_start_prev(Text *txt, size_t pos) {
388 return text_customword_start_prev(txt, pos, isspace);
389 }
390
391 size_t text_word_end_next(Text *txt, size_t pos) {
392 return text_customword_end_next(txt, pos, is_word_boundary);
393 }
394
395 size_t text_word_end_prev(Text *txt, size_t pos) {
396 return text_customword_end_prev(txt, pos, is_word_boundary);
397 }
398
399 size_t text_word_start_next(Text *txt, size_t pos) {
400 return text_customword_start_next(txt, pos, is_word_boundary);
401 }
402
403 size_t text_word_start_prev(Text *txt, size_t pos) {
404 return text_customword_start_prev(txt, pos, is_word_boundary);
405 }
406
407 size_t text_sentence_next(Text *txt, size_t pos) {
408 char c, prev = 'X';
409 Iterator it = text_iterator_get(txt, pos), rev = it;
410
411 if (!text_iterator_byte_get(&it, &c))
412 return pos;
413
414 while (text_iterator_byte_get(&rev, &prev) && space(prev))
415 text_iterator_byte_prev(&rev, NULL);
416 prev = rev.pos == 0 ? '.' : prev; /* simulate punctuation at BOF */
417
418 do {
419 if ((prev == '.' || prev == '?' || prev == '!') && space(c)) {
420 do text_iterator_byte_next(&it, NULL);
421 while (text_iterator_byte_get(&it, &c) && space(c));
422 return it.pos;
423 }
424 prev = c;
425 } while (text_iterator_byte_next(&it, &c));
426 return it.pos;
427 }
428
429 size_t text_sentence_prev(Text *txt, size_t pos) {
430 char c, prev = 'X';
431 bool content = false;
432 Iterator it = text_iterator_get(txt, pos);
433
434 while (it.pos != 0 && text_iterator_byte_prev(&it, &c)) {
435 if (content && space(prev) && (c == '.' || c == '?' || c == '!')) {
436 do text_iterator_byte_next(&it, NULL);
437 while (text_iterator_byte_get(&it, &c) && space(c));
438 return it.pos;
439 }
440 content |= !space(c);
441 prev = c;
442 } /* The loop only ends on hitting BOF or error */
443 if (content) /* starting pos was after first sentence in file => find that sentences start */
444 while (text_iterator_byte_get(&it, &c) && space(c))
445 text_iterator_byte_next(&it, NULL);
446 return it.pos;
447 }
448
449 size_t text_paragraph_next(Text *txt, size_t pos) {
450 char c;
451 Iterator it = text_iterator_get(txt, pos);
452
453 while (text_iterator_byte_get(&it, &c) && (c == '\n' || blank(c)))
454 text_iterator_char_next(&it, NULL);
455 return text_line_blank_next(txt, it.pos);
456 }
457
458 size_t text_paragraph_prev(Text *txt, size_t pos) {
459 char c;
460 Iterator it = text_iterator_get(txt, pos);
461
462 while (text_iterator_byte_get(&it, &c) && (c == '\n' || blank(c)))
463 text_iterator_char_prev(&it, NULL);
464 return text_line_blank_prev(txt, it.pos);
465 }
466
467 size_t text_line_empty_next(Text *txt, size_t pos) {
468 char c;
469 Iterator it = text_iterator_get(txt, pos);
470 while (text_iterator_byte_find_next(&it, '\n')) {
471 if (text_iterator_byte_next(&it, &c) && c == '\n')
472 return it.pos;
473 }
474 return it.pos;
475 }
476
477 size_t text_line_empty_prev(Text *txt, size_t pos) {
478 char c;
479 Iterator it = text_iterator_get(txt, pos);
480 while (text_iterator_byte_find_prev(&it, '\n')) {
481 if (text_iterator_byte_prev(&it, &c) && c == '\n')
482 return it.pos + 1;
483 }
484 return it.pos;
485 }
486
487 size_t text_line_blank_next(Text *txt, size_t pos) {
488 char c;
489 Iterator it = text_iterator_get(txt, pos);
490 while (text_iterator_byte_find_next(&it, '\n')) {
491 size_t n = it.pos;
492 while (text_iterator_byte_next(&it, &c) && blank(c));
493 if (c == '\n')
494 return n + 1;
495 }
496 return it.pos;
497 }
498
499 size_t text_line_blank_prev(Text *txt, size_t pos) {
500 char c;
501 Iterator it = text_iterator_get(txt, pos);
502 while (text_iterator_byte_find_prev(&it, '\n')) {
503 while (text_iterator_byte_prev(&it, &c) && blank(c));
504 if (c == '\n')
505 return it.pos + 1;
506 }
507 return it.pos;
508 }
509
510 size_t text_block_start(Text *txt, size_t pos) {
511 Filerange r = text_object_curly_bracket(txt, pos-1);
512 return text_range_valid(&r) ? r.start-1 : pos;
513 }
514
515 size_t text_block_end(Text *txt, size_t pos) {
516 Filerange r = text_object_curly_bracket(txt, pos+1);
517 return text_range_valid(&r) ? r.end : pos;
518 }
519
520 size_t text_parenthesis_start(Text *txt, size_t pos) {
521 Filerange r = text_object_parenthesis(txt, pos-1);
522 return text_range_valid(&r) ? r.start-1 : pos;
523 }
524
525 size_t text_parenthesis_end(Text *txt, size_t pos) {
526 Filerange r = text_object_parenthesis(txt, pos+1);
527 return text_range_valid(&r) ? r.end : pos;
528 }
529
530 size_t text_bracket_match(Text *txt, size_t pos, const Filerange *limits) {
531 return text_bracket_match_symbol(txt, pos, NULL, limits);
532 }
533
534 static size_t match_symbol(Text *txt, size_t pos, char search, int direction, const Filerange *limits) {
535 char c, current;
536 int count = 1;
537 bool instring = false;
538 Iterator it = text_iterator_get(txt, pos);
539 if (!text_iterator_byte_get(&it, ¤t))
540 return pos;
541 if (direction >= 0) { /* forward search */
542 while (text_iterator_byte_next(&it, &c)) {
543 if (limits && it.pos >= limits->end)
544 break;
545 if (c != current && c == '"')
546 instring = !instring;
547 if (!instring) {
548 if (c == search && --count == 0)
549 return it.pos;
550 else if (c == current)
551 count++;
552 }
553 }
554 } else { /* backwards */
555 while (text_iterator_byte_prev(&it, &c)) {
556 if (limits && it.pos < limits->start)
557 break;
558 if (c != current && c == '"')
559 instring = !instring;
560 if (!instring) {
561 if (c == search && --count == 0)
562 return it.pos;
563 else if (c == current)
564 count++;
565 }
566 }
567 }
568
569 return pos; /* no match found */
570 }
571
572 size_t text_bracket_match_symbol(Text *txt, size_t pos, const char *symbols, const Filerange *limits) {
573 int direction;
574 char search, current, c;
575 Iterator it = text_iterator_get(txt, pos);
576 if (!text_iterator_byte_get(&it, ¤t))
577 return pos;
578 if (symbols && !memchr(symbols, current, strlen(symbols)))
579 return pos;
580 switch (current) {
581 case '(': search = ')'; direction = 1; break;
582 case ')': search = '('; direction = -1; break;
583 case '{': search = '}'; direction = 1; break;
584 case '}': search = '{'; direction = -1; break;
585 case '[': search = ']'; direction = 1; break;
586 case ']': search = '['; direction = -1; break;
587 case '<': search = '>'; direction = 1; break;
588 case '>': search = '<'; direction = -1; break;
589 case '"':
590 case '`':
591 case '\'':
592 {
593 /* prefer matches on the same line */
594 size_t fw = match_symbol(txt, pos, current, +1, limits);
595 size_t bw = match_symbol(txt, pos, current, -1, limits);
596 if (fw == pos)
597 return bw;
598 if (bw == pos)
599 return fw;
600 size_t line = text_lineno_by_pos(txt, pos);
601 size_t line_fw = text_lineno_by_pos(txt, fw);
602 size_t line_bw = text_lineno_by_pos(txt, bw);
603 if (line != line_fw)
604 return bw;
605 if (line != line_bw)
606 return fw;
607 direction = +1;
608 if (text_iterator_byte_next(&it, &c)) {
609 /* if a single or double quote is followed by
610 * a special character, search backwards */
611 char special[] = " \t\n)}]>.,:;";
612 if (memchr(special, c, sizeof(special)))
613 direction = -1;
614 }
615 return direction >= 0 ? fw : bw;
616 }
617 default:
618 return pos;
619 }
620
621 return match_symbol(txt, pos, search, direction, limits);
622 }
623
624 size_t text_search_forward(Text *txt, size_t pos, Regex *regex) {
625 size_t start = pos + 1;
626 size_t end = text_size(txt);
627 RegexMatch match[1];
628 char c;
629 int flags = text_byte_get(txt, pos, &c) && c == '\n' ? 0 : REG_NOTBOL;
630 bool found = start < end && !text_search_range_forward(txt, start, end - start, regex, 1, match, flags);
631
632 if (!found) {
633 start = 0;
634 found = !text_search_range_forward(txt, start, end - start, regex, 1, match, 0);
635 }
636
637 return found ? match[0].start : pos;
638 }
639
640 size_t text_search_backward(Text *txt, size_t pos, Regex *regex) {
641 size_t start = 0;
642 size_t end = pos;
643 RegexMatch match[1];
644 bool found = !text_search_range_backward(txt, start, end, regex, 1, match, REG_NOTEOL);
645
646 if (!found) {
647 end = text_size(txt);
648 found = !text_search_range_backward(txt, start, end - start, regex, 1, match, 0);
649 }
650
651 return found ? match[0].start : pos;
652 }