vis

a vi-like editor based on Plan 9's structural regular expressions

git clone https://9o.is/git/vis.git

commit 9995abd1c4e3531610380b34b40e28331cc5a664
parent dff037345c1d31dff9533b03a2aa9ae035c2f374
Author: Marc André Tanner <mat@brain-dump.org>
Date:   Sun,  7 Aug 2016 22:27:23 +0200

text-regex: improve searching in binary data

The regex(3) API we currently use, matches on NUL terminated strings.
Therefore it does not work for binary data. This commit adds loops
to manually skip over NUL bytes. While it does not work for patterns
which would match strings containing NUL bytes, it should improve the
most basic cases.

Binary file handling will need further improvements in the future.

Fixes #359.

Diffstat:
Mtext-regex.c | 63+++++++++++++++++++++++++++++++++++++++++++--------------------
1 file changed, 43 insertions(+), 20 deletions(-)

diff --git a/text-regex.c b/text-regex.c @@ -38,13 +38,25 @@ int text_search_range_forward(Text *txt, size_t pos, size_t len, Regex *r, size_ char *buf = text_bytes_alloc0(txt, pos, len); if (!buf) return REG_NOMATCH; + char *cur = buf, *end = buf + len; + int ret = REG_NOMATCH; regmatch_t match[nmatch]; - int ret = regexec(&r->regex, buf, nmatch, match, eflags); - if (!ret) { - for (size_t i = 0; i < nmatch; i++) { - pmatch[i].start = match[i].rm_so == -1 ? EPOS : pos + match[i].rm_so; - pmatch[i].end = match[i].rm_eo == -1 ? EPOS : pos + match[i].rm_eo; + for (size_t junk = len; len > 0; len -= junk, pos += junk) { + ret = regexec(&r->regex, cur, nmatch, match, eflags); + if (!ret) { + for (size_t i = 0; i < nmatch; i++) { + pmatch[i].start = match[i].rm_so == -1 ? EPOS : pos + match[i].rm_so; + pmatch[i].end = match[i].rm_eo == -1 ? EPOS : pos + match[i].rm_eo; + } + break; } + char *next = memchr(cur, 0, len); + if (!next) + break; + while (!*next && next != end) + next++; + junk = next - cur; + cur = next; } free(buf); return ret; @@ -54,25 +66,36 @@ int text_search_range_backward(Text *txt, size_t pos, size_t len, Regex *r, size char *buf = text_bytes_alloc0(txt, pos, len); if (!buf) return REG_NOMATCH; - regmatch_t match[nmatch]; - char *cur = buf; + char *cur = buf, *end = buf + len; int ret = REG_NOMATCH; - while (!regexec(&r->regex, cur, nmatch, match, eflags)) { - ret = 0; - for (size_t i = 0; i < nmatch; i++) { - pmatch[i].start = match[i].rm_so == -1 ? EPOS : pos + (size_t)(cur - buf) + match[i].rm_so; - pmatch[i].end = match[i].rm_eo == -1 ? EPOS : pos + (size_t)(cur - buf) + match[i].rm_eo; - } - if (match[0].rm_so == 0 && match[0].rm_eo == 0) { - /* empty match at the beginning of cur, advance to next line */ - if ((cur = strchr(cur, '\n'))) - cur++; - else - break; + regmatch_t match[nmatch]; + for (size_t junk = len; len > 0; len -= junk, pos += junk) { + char *next; + if (!regexec(&r->regex, cur, nmatch, match, eflags)) { + ret = 0; + for (size_t i = 0; i < nmatch; i++) { + pmatch[i].start = match[i].rm_so == -1 ? EPOS : pos + match[i].rm_so; + pmatch[i].end = match[i].rm_eo == -1 ? EPOS : pos + match[i].rm_eo; + } + if (match[0].rm_so == 0 && match[0].rm_eo == 0) { + /* empty match at the beginning of cur, advance to next line */ + next = strchr(cur, '\n'); + if (!next) + break; + next++; + } else { + next = cur + match[0].rm_eo; + } } else { - cur += match[0].rm_eo; + next = memchr(cur, 0, len); + if (!next) + break; + while (!*next && next != end) + next++; } + junk = next - cur; + cur = next; } free(buf); return ret;