vis
a vi-like editor based on Plan 9's structural regular expressions
git clone https://9o.is/git/vis.git
commit cdde2c6bb31e7ef2e389dd7efdb8018e5c9462c3 parent 400ee3c42e8924f78075491cd3d692f3e9f01e62 Author: Marc André Tanner <mat@brain-dump.org> Date: Mon, 12 Dec 2016 12:02:20 +0100 text-regex: add regex backend based on libtre While memory consumption should be improved, backward searches will still be slow, because they are implemented in terms of repeated forward searches. It needs to be investigated whether the underlying automaton can have its transitions reversed and essentially run backwards, as is the case in sam. Diffstat:
| M | Makefile | | | 12 | ++++++++---- |
| M | README.md | | | 1 | + |
| M | configure | | | 52 | ++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | text-regex-tre.c | | | 112 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| M | text-regex.h | | | 5 | +++++ |
| M | vis-cmds.c | | | 1 | + |
6 files changed, 179 insertions(+), 4 deletions(-)
diff --git a/Makefile b/Makefile @@ -1,9 +1,11 @@ -include config.mk +REGEX_SRC ?= text-regex.c + SRC = array.c buffer.c libutf.c main.c map.c register.c ring-buffer.c \ - sam.c text.c text-motions.c text-objects.c text-regex.c text-util.c \ + sam.c text.c text-motions.c text-objects.c text-util.c \ ui-curses.c view.c vis.c vis-lua.c vis-modes.c vis-motions.c \ - vis-operators.c vis-prompt.c vis-text-objects.c + vis-operators.c vis-prompt.c vis-text-objects.c $(REGEX_SRC) EXECUTABLES = vis vis-clipboard vis-complete vis-menu vis-open @@ -19,6 +21,7 @@ VERSION = $(shell git describe --always --dirty 2>/dev/null || echo "0.2-git") CONFIG_LUA ?= 1 CONFIG_LPEG ?= 0 +CONFIG_TRE ?= 0 CONFIG_ACL ?= 0 CONFIG_SELINUX ?= 0 @@ -27,16 +30,17 @@ CFLAGS_STD += -DVERSION=\"${VERSION}\" LDFLAGS_STD ?= -lc CFLAGS_VIS = $(CFLAGS_AUTO) $(CFLAGS_TERMKEY) $(CFLAGS_CURSES) $(CFLAGS_ACL) \ - $(CFLAGS_SELINUX) $(CFLAGS_LUA) $(CFLAGS_LPEG) $(CFLAGS_STD) + $(CFLAGS_SELINUX) $(CFLAGS_TRE) $(CFLAGS_LUA) $(CFLAGS_LPEG) $(CFLAGS_STD) CFLAGS_VIS += -DVIS_PATH=\"${SHAREPREFIX}/vis\" CFLAGS_VIS += -DCONFIG_LUA=${CONFIG_LUA} CFLAGS_VIS += -DCONFIG_LPEG=${CONFIG_LPEG} +CFLAGS_VIS += -DCONFIG_TRE=${CONFIG_TRE} CFLAGS_VIS += -DCONFIG_SELINUX=${CONFIG_SELINUX} CFLAGS_VIS += -DCONFIG_ACL=${CONFIG_ACL} LDFLAGS_VIS = $(LDFLAGS_AUTO) $(LDFLAGS_TERMKEY) $(LDFLAGS_CURSES) $(LDFLAGS_ACL) \ - $(LDFLAGS_SELINUX) $(LDFLAGS_LUA) $(LDFLAGS_LPEG) $(LDFLAGS_STD) + $(LDFLAGS_SELINUX) $(LDFLAGS_TRE) $(LDFLAGS_LUA) $(LDFLAGS_LPEG) $(LDFLAGS_STD) STRIP?=strip diff --git a/README.md b/README.md @@ -60,6 +60,7 @@ compatible environment as well as: * [Lua](http://www.lua.org/) >= 5.2 (optional) * [LPeg](http://www.inf.puc-rio.br/~roberto/lpeg/) >= 0.12 (optional runtime dependency required for syntax highlighting) + * [TRE](http://laurikari.net/tre/) (optional for more memory efficient regex search) Assuming these dependencies are met, execute: diff --git a/configure b/configure @@ -25,6 +25,7 @@ Fine tuning of the installation directories: Optional features: --enable-lua build with Lua support [auto] --enable-lpeg build with support for statically linking to LPeg [auto] + --enable-tre build with TRE regex support [auto] --enable-selinux build with SELinux support [auto] --enable-acl build with POSIX ACL support [auto] @@ -115,6 +116,7 @@ MANDIR='$(PREFIX)/share/man' lua=auto lpeg=auto +tre=auto selinux=auto acl=auto @@ -133,6 +135,8 @@ case "$arg" in --disable-lua|--enable-lua=no) lua=no ;; --enable-lpeg|--enable-lpeg=yes) lpeg=yes ;; --disable-lpeg|--enable-lpeg=no) lpeg=no ;; +--enable-tre|--enable-tre=yes) tre=yes ;; +--disable-tre|--enable-tre=no) tre=no ;; --enable-selinux|--enable-selinux=yes) selinux=yes ;; --disable-selinux|--enable-selinux=no) selinux=no ;; --enable-acl|--enable-acl=yes) acl=yes ;; @@ -353,6 +357,50 @@ else fail "$0: cannot find libtermkey" fi +CONFIG_TRE=0 +REGEX_SRC=text-regex.c + +if test "$tre" != "no" ; then + + printf "checking for libtre... " + +cat > "$tmpc" <<EOF +#include <tre/tre.h> + +int main() { + regex_t preg; + tre_str_source *source = NULL; + regmatch_t pmatch[1]; + tre_regcomp(&preg, "\0", REG_EXTENDED); + tre_reguexec(&preg, source, 1, pmatch, 0); + tre_regfree(&preg); + return 0; +} +EOF + + if test "$have_pkgconfig" = "yes" ; then + CFLAGS_TRE=$(pkg-config --cflags tre 2>/dev/null) + LDFLAGS_TRE=$(pkg-config --libs tre 2>/dev/null) + fi + + if test -z "$LDFLAGS_TRE"; then + CFLAGS_TRE="" + LDFLAGS_TRE="-ltre" + fi + + if $CC $CFLAGS $CFLAGS_TRE "$tmpc" \ + $LDFLAGS $LDFLAGS_TRE -o "$tmpo" >/dev/null 2>&1; then + CONFIG_TRE=1 + REGEX_SRC=text-regex-tre.c + printf "%s\n" "yes" + else + printf "%s\n" "no" + CFLAGS_TRE="" + LDFLAGS_TRE="" + test "$tre" = "yes" && fail "$0: cannot find libtre" + fi +fi + CONFIG_LUA=0 # enabling builtin lpeg requires lua support @@ -537,6 +585,10 @@ CFLAGS_CURSES = $CFLAGS_CURSES LDFLAGS_CURSES = $LDFLAGS_CURSES CFLAGS_TERMKEY = $CFLAGS_TERMKEY LDFLAGS_TERMKEY = $LDFLAGS_TERMKEY +REGEX_SRC = $REGEX_SRC +CONFIG_TRE = $CONFIG_TRE +CFLAGS_TRE = $CFLAGS_TRE +LDFLAGS_TRE = $LDFLAGS_TRE CONFIG_LUA = $CONFIG_LUA CFLAGS_LUA = $CFLAGS_LUA LDFLAGS_LUA = $LDFLAGS_LUA diff --git a/text-regex-tre.c b/text-regex-tre.c @@ -0,0 +1,112 @@ +#include <stdlib.h> +#include <string.h> + +#include "text-regex.h" +#include "text-motions.h" + +struct Regex { + regex_t regex; + tre_str_source str_source; + Text *text; + Iterator it; + size_t end; +}; + +size_t text_regex_nsub(Regex *r) { + if (!r) + return 0; + return r->regex.re_nsub; +} + +static int str_next_char(tre_char_t *c, unsigned int *pos_add, void *context) { + Regex *r = context; + text_iterator_byte_get(&r->it, (char*)c); + return r->it.pos < r->end && text_iterator_byte_next(&r->it, NULL) ? 0 : 1; +} + +static void str_rewind(size_t pos, void *context) { + Regex *r = context; + r->it = text_iterator_get(r->text, pos); +} + +static int str_compare(size_t pos1, size_t pos2, size_t len, void *context) { + Regex *r = context; + int ret = 1; + void *buf1 = malloc(len), *buf2 = malloc(len); + if (!buf1 || !buf2) + goto err; + text_bytes_get(r->text, pos1, len, buf1); + text_bytes_get(r->text, pos2, len, buf2); + ret = memcmp(buf1, buf2, len); +err: + free(buf1); + free(buf2); + return ret; +} + +Regex *text_regex_new(void) { + Regex *r = calloc(1, sizeof(*r)); + if (!r) + return NULL; + r->str_source = (tre_str_source) { + .get_next_char = str_next_char, + .rewind = str_rewind, + .compare = str_compare, + .context = r, + }; + return r; +} + +void text_regex_free(Regex *r) { + if (!r) + return; + tre_regfree(&r->regex); + free(r); +} + +int text_regex_compile(Regex *regex, const char *string, int cflags) { + int r = tre_regcomp(®ex->regex, string, cflags); + if (r) + tre_regcomp(®ex->regex, "\0\0", 0); + return r; +} + +int text_regex_match(Regex *r, const char *data, int eflags) { + return tre_regexec(&r->regex, data, 0, NULL, eflags); +} + +int text_search_range_forward(Text *txt, size_t pos, size_t len, Regex *r, size_t nmatch, RegexMatch pmatch[], int eflags) { + r->text = txt; + r->it = text_iterator_get(txt, pos); + r->end = pos+len; + + regmatch_t match[nmatch]; + int ret = tre_reguexec(&r->regex, &r->str_source, nmatch, match, eflags); + if (!ret) { + for (size_t i = 0; i < nmatch; i++) { + pmatch[i].start = match[i].rm_so == -1 ? EPOS : pos + match[i].rm_so; + pmatch[i].end = match[i].rm_eo == -1 ? EPOS : pos + match[i].rm_eo; + } + } + return ret; +} + +int text_search_range_backward(Text *txt, size_t pos, size_t len, Regex *r, size_t nmatch, RegexMatch pmatch[], int eflags) { + int ret = REG_NOMATCH; + size_t end = pos + len; + + while (pos < end && !text_search_range_forward(txt, pos, len, r, nmatch, pmatch, eflags)) { + ret = 0; + // FIXME: assumes nmatch >= 1 + size_t next = pmatch[0].end; + if (next == pos) { + next = text_line_next(txt, pos); + if (next == pos) + break; + } + pos = next; + len = end - pos; + } + + return ret; +} diff --git a/text-regex.h b/text-regex.h @@ -1,7 +1,12 @@ #ifndef TEXT_REGEX_H #define TEXT_REGEX_H +/* make the REG_* constants available */ +#if CONFIG_TRE +#include <tre/tre.h> +#else #include <regex.h> +#endif #include "text.h" typedef struct Regex Regex; diff --git a/vis-cmds.c b/vis-cmds.c @@ -741,6 +741,7 @@ static bool cmd_help(Vis *vis, Win *win, Command *cmd, const char *argv[], Curso } configs[] = { { "Lua support: ", CONFIG_LUA }, { "Lua LPeg statically built-in: ", CONFIG_LPEG }, + { "TRE based regex support: ", CONFIG_TRE }, { "POSIX ACL support: ", CONFIG_ACL }, { "SELinux support: ", CONFIG_SELINUX }, };