vis

a vi-like editor based on Plan 9's structural regular expressions

git clone https://9o.is/git/vis.git

text-regex-tre.c

(3833B)


      1 #include <stdlib.h>
      2 #include <string.h>
      3 #include <wchar.h>
      4 #include <errno.h>
      5 
      6 #include "text-regex.h"
      7 #include "text-motions.h"
      8 
      9 struct Regex {
     10 	regex_t regex;
     11 	tre_str_source str_source;
     12 	Text *text;
     13 	Iterator it;
     14 	size_t end;
     15 };
     16 
     17 size_t text_regex_nsub(Regex *r) {
     18        if (!r)
     19                return 0;
     20        return r->regex.re_nsub;
     21 }
     22 
     23 static int str_next_char(tre_char_t *c, unsigned int *pos_add, void *context) {
     24 	Regex *r = context;
     25 	Iterator *it = &r->it;
     26 	if (TRE_WCHAR) {
     27 		mbstate_t ps = { 0 };
     28 		bool eof = false;
     29 		size_t start = it->pos;
     30 		for (;;) {
     31 			if (it->pos >= r->end) {
     32 				eof = true;
     33 				break;
     34 			}
     35 			size_t rem = r->end - it->pos;
     36 			size_t plen = it->end - it->text;
     37 			size_t len = rem < plen ? rem : plen;
     38 			size_t wclen = mbrtowc(c, it->text, len, &ps);
     39 			if (wclen == (size_t)-1 && errno == EILSEQ) {
     40 				ps = (mbstate_t){0};
     41 				*c = L'\0';
     42 				text_iterator_codepoint_next(it, NULL);
     43 				break;
     44 			} else if (wclen == (size_t)-2) {
     45 				if (!text_iterator_next(it)) {
     46 					eof = true;
     47 					break;
     48 				}
     49 			} else if (wclen == 0) {
     50 				text_iterator_byte_next(it, NULL);
     51 				break;
     52 			} else {
     53 				if (wclen < plen) {
     54 					it->text += wclen;
     55 					it->pos += wclen;
     56 				} else {
     57 					text_iterator_next(it);
     58 				}
     59 				break;
     60 			}
     61 		}
     62 
     63 		if (eof) {
     64 			*c = L'\0';
     65 			*pos_add = 1;
     66 			return 1;
     67 		} else {
     68 			*pos_add = it->pos - start;
     69 			return 0;
     70 		}
     71 	} else {
     72 		*pos_add = 1;
     73 		if (it->pos < r->end && text_iterator_byte_get(it, (char*)c)) {
     74 			text_iterator_byte_next(it, NULL);
     75 			return 0;
     76 		} else {
     77 			*c = '\0';
     78 			return 1;
     79 		}
     80 	}
     81 }
     82 
     83 static void str_rewind(size_t pos, void *context) {
     84 	Regex *r = context;
     85 	r->it = text_iterator_get(r->text, pos);
     86 }
     87 
     88 static int str_compare(size_t pos1, size_t pos2, size_t len, void *context) {
     89 	Regex *r = context;
     90 	int ret = 1;
     91 	void *buf1 = malloc(len), *buf2 = malloc(len);
     92 	if (!buf1 || !buf2)
     93 		goto err;
     94 	text_bytes_get(r->text, pos1, len, buf1);
     95 	text_bytes_get(r->text, pos2, len, buf2);
     96 	ret = memcmp(buf1, buf2, len);
     97 err:
     98 	free(buf1);
     99 	free(buf2);
    100 	return ret;
    101 }
    102 
    103 Regex *text_regex_new(void) {
    104 	Regex *r = calloc(1, sizeof(*r));
    105 	if (!r)
    106 		return NULL;
    107 	r->str_source = (tre_str_source) {
    108 		.get_next_char = str_next_char,
    109 		.rewind = str_rewind,
    110 		.compare = str_compare,
    111 		.context = r,
    112 	};
    113 	return r;
    114 }
    115 
    116 void text_regex_free(Regex *r) {
    117 	if (!r)
    118 		return;
    119 	tre_regfree(&r->regex);
    120 	free(r);
    121 }
    122 
    123 int text_regex_compile(Regex *regex, const char *string, int cflags) {
    124 	int r = tre_regcomp(&regex->regex, string, cflags);
    125 	if (r)
    126 		tre_regcomp(&regex->regex, "\0\0", 0);
    127 	return r;
    128 }
    129 
    130 int text_regex_match(Regex *r, const char *data, int eflags) {
    131 	return tre_regexec(&r->regex, data, 0, NULL, eflags);
    132 }
    133 
    134 int text_search_range_forward(Text *txt, size_t pos, size_t len, Regex *r, size_t nmatch, RegexMatch pmatch[], int eflags) {
    135 	r->text = txt;
    136 	r->it = text_iterator_get(txt, pos);
    137 	r->end = pos+len;
    138 
    139 	regmatch_t match[MAX_REGEX_SUB];
    140 	int ret = tre_reguexec(&r->regex, &r->str_source, nmatch, match, eflags);
    141 	if (!ret) {
    142 		for (size_t i = 0; i < nmatch; i++) {
    143 			pmatch[i].start = match[i].rm_so == -1 ? EPOS : pos + match[i].rm_so;
    144 			pmatch[i].end = match[i].rm_eo == -1 ? EPOS : pos + match[i].rm_eo;
    145 		}
    146 	}
    147 	return ret;
    148 }
    149 
    150 int text_search_range_backward(Text *txt, size_t pos, size_t len, Regex *r, size_t nmatch, RegexMatch pmatch[], int eflags) {
    151 	int ret = REG_NOMATCH;
    152 	size_t end = pos + len;
    153 
    154 	while (pos < end && !text_search_range_forward(txt, pos, len, r, nmatch, pmatch, eflags)) {
    155 		ret = 0;
    156 		// FIXME: assumes nmatch >= 1
    157 		size_t next = pmatch[0].end;
    158 		if (next == pos) {
    159 			next = text_line_next(txt, pos);
    160 			if (next == pos)
    161 				break;
    162 		}
    163 		pos = next;
    164 		len = end - pos;
    165 
    166 		char c;
    167 		if (text_byte_get(txt, pos-1, &c) && c == '\n')
    168 			eflags &= ~REG_NOTBOL;
    169 		else
    170 			eflags |= REG_NOTBOL;
    171 	}
    172 
    173 	return ret;
    174 }