vis

a vi-like editor based on Plan 9's structural regular expressions

commit 78ad678468e9489071c21934e20c704bc7802d6d
parent ee26b9f10998fab879351678df76d26de63f8523
Author: Marc André Tanner <mat@brain-dump.org>
Date:   Sat, 27 Sep 2014 15:15:20 +0200

Implement proper distinction between word and WORD

Diffstat:
M README  | 4 +---
M config.def.h  | 8 ++++----
M text-motions.c  | 63 +++++++++++++++++++++++++++++++++++++--------------------------

3 files changed, 42 insertions(+), 33 deletions(-)
diff --git a/README b/README
@@ -346,9 +346,7 @@ and their current support in vis.
    /{text}  (to next match of text in forward direction)
    ?{text}  (to next match of text in backward direction)
 
-  There is currently no distinction between what vim calls a WORD and
-  a word, only the former is implemented. Though infrastructure for
-  the latter also exists.
+  An empty line is currently neither a word nor a WORD.
 
   The semantics of a paragraph and a sentence is also not always 100%
   the same as in vim.
diff --git a/config.def.h b/config.def.h
@@ -120,13 +120,13 @@ static KeyBinding vis_movements[] = {
 	{ { NONE('g'), NONE('_')    }, movement,     { .i = MOVE_LINE_FINISH         } },
 	{ { NONE('$')               }, movement,     { .i = MOVE_LINE_LASTCHAR       } },
 	{ { NONE('%')               }, movement,     { .i = MOVE_BRACKET_MATCH       } },
-	{ { NONE('b')               }, movement,     { .i = MOVE_LONGWORD_START_PREV } },
+	{ { NONE('b')               }, movement,     { .i = MOVE_WORD_START_PREV     } },
 	{ { NONE('B')               }, movement,     { .i = MOVE_LONGWORD_START_PREV } },
-	{ { NONE('w')               }, movement,     { .i = MOVE_LONGWORD_START_NEXT } },
+	{ { NONE('w')               }, movement,     { .i = MOVE_WORD_START_NEXT     } },
 	{ { NONE('W')               }, movement,     { .i = MOVE_LONGWORD_START_NEXT } },
-	{ { NONE('g'), NONE('e')    }, movement,     { .i = MOVE_LONGWORD_END_PREV   } },
+	{ { NONE('g'), NONE('e')    }, movement,     { .i = MOVE_WORD_END_PREV       } },
 	{ { NONE('g'), NONE('E')    }, movement,     { .i = MOVE_LONGWORD_END_PREV   } },
-	{ { NONE('e')               }, movement,     { .i = MOVE_LONGWORD_END_NEXT   } },
+	{ { NONE('e')               }, movement,     { .i = MOVE_WORD_END_NEXT       } },
 	{ { NONE('E')               }, movement,     { .i = MOVE_LONGWORD_END_NEXT   } },
 	{ { NONE('{')               }, movement,     { .i = MOVE_PARAGRAPH_PREV      } },
 	{ { NONE('}')               }, movement,     { .i = MOVE_PARAGRAPH_NEXT      } },
diff --git a/text-motions.c b/text-motions.c
@@ -20,7 +20,7 @@
 
 // TODO: specify this per file type?
 static int is_word_boundry(int c) {
-	return !(('0' <= c && c <= '9') ||
+	return ISASCII(c) && !(('0' <= c && c <= '9') ||
 	         ('a' <= c && c <= 'z') ||
 	         ('A' <= c && c <= 'Z'));
 }
@@ -158,73 +158,84 @@ size_t text_line_next(Text *txt, size_t pos) {
 	return it.pos;
 }
 
-size_t text_word_boundry_start_next(Text *txt, size_t pos, int (*isboundry)(int)) {
+static size_t text_customword_start_next(Text *txt, size_t pos, int (*isboundry)(int)) {
 	char c;
 	Iterator it = text_iterator_get(txt, pos);
-	text_iterator_byte_next(&it, NULL);
-	while (text_iterator_byte_get(&it, &c) && !isboundry(c))
-		text_iterator_byte_next(&it, NULL);
-	while (text_iterator_byte_get(&it, &c) && isboundry(c))
-		text_iterator_byte_next(&it, NULL);
+	if (!text_iterator_byte_get(&it, &c))
+		return pos;
+	if (isboundry(c))
+		while (isboundry(c) && !isspace(c) && text_iterator_char_next(&it, &c));
+	else
+		while (!isboundry(c) && text_iterator_char_next(&it, &c));
+	while (isspace(c) && text_iterator_char_next(&it, &c));
 	return it.pos;
 }
 
-size_t text_word_boundry_start_prev(Text *txt, size_t pos, int (*isboundry)(int)) {
+static size_t text_customword_start_prev(Text *txt, size_t pos, int (*isboundry)(int)) {
 	char c;
 	Iterator it = text_iterator_get(txt, pos);
-	while (text_iterator_byte_prev(&it, &c) && isboundry(c));
-	do pos = it.pos; while (text_iterator_char_prev(&it, &c) && !isboundry(c));
+	while (text_iterator_char_prev(&it, &c) && isspace(c));
+	if (isboundry(c))
+		do pos = it.pos; while (text_iterator_char_prev(&it, &c) && isboundry(c) && !isspace(c));
+	else
+		do pos = it.pos; while (text_iterator_char_prev(&it, &c) && !isboundry(c));
 	return pos;
 }
 
-size_t text_word_boundry_end_next(Text *txt, size_t pos, int (*isboundry)(int)) {
+static size_t text_customword_end_next(Text *txt, size_t pos, int (*isboundry)(int)) {
 	char c;
 	Iterator it = text_iterator_get(txt, pos);
-	while (text_iterator_char_next(&it, &c) && isboundry(c));
-	do pos = it.pos; while (text_iterator_char_next(&it, &c) && !isboundry(c));
+	while (text_iterator_char_next(&it, &c) && isspace(c));
+	if (isboundry(c))
+		do pos = it.pos; while (text_iterator_char_next(&it, &c) && isboundry(c) && !isspace(c));
+	else
+		do pos = it.pos; while (text_iterator_char_next(&it, &c) && !isboundry(c));
 	return pos;
 }
 
-size_t text_word_boundry_end_prev(Text *txt, size_t pos, int (*isboundry)(int)) {
+static size_t text_customword_end_prev(Text *txt, size_t pos, int (*isboundry)(int)) {
 	char c;
 	Iterator it = text_iterator_get(txt, pos);
-	while (text_iterator_byte_get(&it, &c) && !isboundry(c))
-		text_iterator_byte_prev(&it, NULL);
-	while (text_iterator_char_prev(&it, &c) && isboundry(c));
+	if (!text_iterator_byte_get(&it, &c))
+		return pos;
+	if (isboundry(c))
+		while (isboundry(c) && !isspace(c) && text_iterator_char_prev(&it, &c));
+	else
+		while (!isboundry(c) && text_iterator_char_prev(&it, &c));
+	while (isspace(c) && text_iterator_char_prev(&it, &c));
 	return it.pos;
 }
 
 size_t text_longword_end_next(Text *txt, size_t pos) {
-	return text_word_boundry_end_next(txt, pos, isspace);
+	return text_customword_end_next(txt, pos, isspace);
 }
 
 size_t text_longword_end_prev(Text *txt, size_t pos) {
-	return text_word_boundry_end_prev(txt, pos, isspace);
+	return text_customword_end_prev(txt, pos, isspace);
 }
 
 size_t text_longword_start_next(Text *txt, size_t pos) {
-	return text_word_boundry_start_next(txt, pos, isspace);
+	return text_customword_start_next(txt, pos, isspace);
 }
 
 size_t text_longword_start_prev(Text *txt, size_t pos) {
-	return text_word_boundry_start_prev(txt, pos, isspace);
+	return text_customword_start_prev(txt, pos, isspace);
 }
 
-// TODO: this actually doesn't work that way -> rewrite
 size_t text_word_end_next(Text *txt, size_t pos) {
-	return text_word_boundry_end_next(txt, pos, is_word_boundry);
+	return text_customword_end_next(txt, pos, is_word_boundry);
 }
 
 size_t text_word_end_prev(Text *txt, size_t pos) {
-	return text_word_boundry_end_prev(txt, pos, is_word_boundry);
+	return text_customword_end_prev(txt, pos, is_word_boundry);
 }
 
 size_t text_word_start_next(Text *txt, size_t pos) {
-	return text_word_boundry_start_next(txt, pos, is_word_boundry);
+	return text_customword_start_next(txt, pos, is_word_boundry);
 }
 
 size_t text_word_start_prev(Text *txt, size_t pos) {
-	return text_word_boundry_start_prev(txt, pos, is_word_boundry);
+	return text_customword_start_prev(txt, pos, is_word_boundry);
 }
 
 static size_t text_paragraph_sentence_next(Text *txt, size_t pos, bool sentence) {

M	README	\|	4	+---
M	config.def.h	\|	8	++++----
M	text-motions.c	\|	63	+++++++++++++++++++++++++++++++++++++--------------------------