vis

a vi-like editor based on Plan 9's structural regular expressions

git clone https://9o.is/git/vis.git

text.c

(28194B)


      1 #include <unistd.h>
      2 #include <stdio.h>
      3 #include <stdlib.h>
      4 #include <string.h>
      5 #include <time.h>
      6 #include <fcntl.h>
      7 #include <errno.h>
      8 #include <wchar.h>
      9 #include <stdint.h>
     10 #include <libgen.h>
     11 #include <limits.h>
     12 #include <sys/types.h>
     13 #include <sys/stat.h>
     14 #include <sys/mman.h>
     15 
     16 #include "text.h"
     17 #include "text-util.h"
     18 #include "text-motions.h"
     19 #include "util.h"
     20 #include "array.h"
     21 #include "text-internal.h"
     22 
     23 /* A piece holds a reference (but doesn't itself store) a certain amount of data.
     24  * All active pieces chained together form the whole content of the document.
     25  * At the beginning there exists only one piece, spanning the whole document.
     26  * Upon insertion/deletion new pieces will be created to represent the changes.
     27  * Generally pieces are never destroyed, but kept around to perform undo/redo
     28  * operations.
     29  */
     30 struct Piece {
     31 	Text *text;             /* text to which this piece belongs */
     32 	Piece *prev, *next;     /* pointers to the logical predecessor/successor */
     33 	Piece *global_prev;     /* double linked list in order of allocation, */
     34 	Piece *global_next;     /* used to free individual pieces */
     35 	const char *data;       /* pointer into a Block holding the data */
     36 	size_t len;             /* the length in number of bytes of the data */
     37 };
     38 
     39 /* used to transform a global position (byte offset starting from the beginning
     40  * of the text) into an offset relative to a piece.
     41  */
     42 typedef struct {
     43 	Piece *piece;           /* piece holding the location */
     44 	size_t off;             /* offset into the piece in bytes */
     45 } Location;
     46 
     47 /* A Span holds a certain range of pieces. Changes to the document are always
     48  * performed by swapping out an existing span with a new one.
     49  */
     50 typedef struct {
     51 	Piece *start, *end;     /* start/end of the span */
     52 	size_t len;             /* the sum of the lengths of the pieces which form this span */
     53 } Span;
     54 
     55 /* A Change keeps all needed information to redo/undo an insertion/deletion. */
     56 typedef struct Change Change;
     57 struct Change {
     58 	Span old;               /* all pieces which are being modified/swapped out by the change */
     59 	Span new;               /* all pieces which are introduced/swapped in by the change */
     60 	size_t pos;             /* absolute position at which the change occurred */
     61 	Change *next;           /* next change which is part of the same revision */
     62 	Change *prev;           /* previous change which is part of the same revision */
     63 };
     64 
     65 /* A Revision is a list of Changes which are used to undo/redo all modifications
     66  * since the last snapshot operation. Revisions are stored in a directed graph structure.
     67  */
     68 typedef struct Revision Revision;
     69 struct Revision {
     70 	Change *change;         /* the most recent change */
     71 	Revision *next;         /* the next (child) revision in the undo tree */
     72 	Revision *prev;         /* the previous (parent) revision in the undo tree */
     73 	Revision *earlier;      /* the previous Revision, chronologically */
     74 	Revision *later;        /* the next Revision, chronologically */
     75 	time_t time;            /* when the first change of this revision was performed */
     76 	size_t seq;             /* a unique, strictly increasing identifier */
     77 };
     78 
     79 typedef struct {
     80 	size_t pos;             /* position in bytes from start of file */
     81 	size_t lineno;          /* line number in file i.e. number of '\n' in [0, pos) */
     82 } LineCache;
     83 
     84 /* The main struct holding all information of a given file */
     85 struct Text {
     86 	Array blocks;           /* blocks which hold text content */
     87 	Piece *pieces;          /* all pieces which have been allocated, used to free them */
     88 	Piece *cache;           /* most recently modified piece */
     89 	Piece begin, end;       /* sentinel nodes which always exists but don't hold any data */
     90 	Revision *history;        /* undo tree */
     91 	Revision *current_revision; /* revision holding all file changes until a snapshot is performed */
     92 	Revision *last_revision;    /* the last revision added to the tree, chronologically */
     93 	Revision *saved_revision;   /* the last revision at the time of the save operation */
     94 	size_t size;            /* current file content size in bytes */
     95 	struct stat info;       /* stat as probed at load time */
     96 	LineCache lines;        /* mapping between absolute pos in bytes and logical line breaks */
     97 };
     98 
     99 /* block management */
    100 static const char *block_store(Text*, const char *data, size_t len);
    101 /* cache layer */
    102 static void cache_piece(Text *txt, Piece *p);
    103 static bool cache_contains(Text *txt, Piece *p);
    104 static bool cache_insert(Text *txt, Piece *p, size_t off, const char *data, size_t len);
    105 static bool cache_delete(Text *txt, Piece *p, size_t off, size_t len);
    106 /* piece management */
    107 static Piece *piece_alloc(Text *txt);
    108 static void piece_free(Piece *p);
    109 static void piece_init(Piece *p, Piece *prev, Piece *next, const char *data, size_t len);
    110 static Location piece_get_intern(Text *txt, size_t pos);
    111 static Location piece_get_extern(const Text *txt, size_t pos);
    112 /* span management */
    113 static void span_init(Span *span, Piece *start, Piece *end);
    114 static void span_swap(Text *txt, Span *old, Span *new);
    115 /* change management */
    116 static Change *change_alloc(Text *txt, size_t pos);
    117 static void change_free(Change *c);
    118 /* revision management */
    119 static Revision *revision_alloc(Text *txt);
    120 static void revision_free(Revision *rev);
    121 /* logical line counting cache */
    122 static void lineno_cache_invalidate(LineCache *cache);
    123 static size_t lines_skip_forward(Text *txt, size_t pos, size_t lines, size_t *lines_skipped);
    124 static size_t lines_count(Text *txt, size_t pos, size_t len);
    125 
    126 /* stores the given data in a block, allocates a new one if necessary. Returns
    127  * a pointer to the storage location or NULL if allocation failed. */
    128 static const char *block_store(Text *txt, const char *data, size_t len) {
    129 	Block *blk = array_get_ptr(&txt->blocks, txt->blocks.len - 1);
    130 	if (!blk || !block_capacity(blk, len)) {
    131 		blk = block_alloc(len);
    132 		if (!blk)
    133 			return NULL;
    134 		if (!array_add_ptr(&txt->blocks, blk)) {
    135 			block_free(blk);
    136 			return NULL;
    137 		}
    138 	}
    139 	return block_append(blk, data, len);
    140 }
    141 
    142 /* cache the given piece if it is the most recently changed one */
    143 static void cache_piece(Text *txt, Piece *p) {
    144 	Block *blk = array_get_ptr(&txt->blocks, txt->blocks.len - 1);
    145 	if (!blk || p->data < blk->data || p->data + p->len != blk->data + blk->len)
    146 		return;
    147 	txt->cache = p;
    148 }
    149 
    150 /* check whether the given piece was the most recently modified one */
    151 static bool cache_contains(Text *txt, Piece *p) {
    152 	Block *blk = array_get_ptr(&txt->blocks, txt->blocks.len - 1);
    153 	Revision *rev = txt->current_revision;
    154 	if (!blk || !txt->cache || txt->cache != p || !rev || !rev->change)
    155 		return false;
    156 
    157 	Piece *start = rev->change->new.start;
    158 	Piece *end = rev->change->new.end;
    159 	bool found = false;
    160 	for (Piece *cur = start; !found; cur = cur->next) {
    161 		if (cur == p)
    162 			found = true;
    163 		if (cur == end)
    164 			break;
    165 	}
    166 
    167 	return found && p->data + p->len == blk->data + blk->len;
    168 }
    169 
    170 /* try to insert a chunk of data at a given piece offset. The insertion is only
    171  * performed if the piece is the most recently changed one. The length of the
    172  * piece, the span containing it and the whole text is adjusted accordingly */
    173 static bool cache_insert(Text *txt, Piece *p, size_t off, const char *data, size_t len) {
    174 	if (!cache_contains(txt, p))
    175 		return false;
    176 	Block *blk = array_get_ptr(&txt->blocks, txt->blocks.len - 1);
    177 	size_t bufpos = p->data + off - blk->data;
    178 	if (!block_insert(blk, bufpos, data, len))
    179 		return false;
    180 	p->len += len;
    181 	txt->current_revision->change->new.len += len;
    182 	txt->size += len;
    183 	return true;
    184 }
    185 
    186 /* try to delete a chunk of data at a given piece offset. The deletion is only
    187  * performed if the piece is the most recently changed one and the whole
    188  * affected range lies within it. The length of the piece, the span containing it
    189  * and the whole text is adjusted accordingly */
    190 static bool cache_delete(Text *txt, Piece *p, size_t off, size_t len) {
    191 	if (!cache_contains(txt, p))
    192 		return false;
    193 	Block *blk = array_get_ptr(&txt->blocks, txt->blocks.len - 1);
    194 	size_t end;
    195 	size_t bufpos = p->data + off - blk->data;
    196 	if (!addu(off, len, &end) || end > p->len || !block_delete(blk, bufpos, len))
    197 		return false;
    198 	p->len -= len;
    199 	txt->current_revision->change->new.len -= len;
    200 	txt->size -= len;
    201 	return true;
    202 }
    203 
    204 /* initialize a span and calculate its length */
    205 static void span_init(Span *span, Piece *start, Piece *end) {
    206 	size_t len = 0;
    207 	span->start = start;
    208 	span->end = end;
    209 	for (Piece *p = start; p; p = p->next) {
    210 		len += p->len;
    211 		if (p == end)
    212 			break;
    213 	}
    214 	span->len = len;
    215 }
    216 
    217 /* swap out an old span and replace it with a new one.
    218  *
    219  *  - if old is an empty span do not remove anything, just insert the new one
    220  *  - if new is an empty span do not insert anything, just remove the old one
    221  *
    222  * adjusts the document size accordingly.
    223  */
    224 static void span_swap(Text *txt, Span *old, Span *new) {
    225 	if (old->len == 0 && new->len == 0) {
    226 		return;
    227 	} else if (old->len == 0) {
    228 		/* insert new span */
    229 		new->start->prev->next = new->start;
    230 		new->end->next->prev = new->end;
    231 	} else if (new->len == 0) {
    232 		/* delete old span */
    233 		old->start->prev->next = old->end->next;
    234 		old->end->next->prev = old->start->prev;
    235 	} else {
    236 		/* replace old with new */
    237 		old->start->prev->next = new->start;
    238 		old->end->next->prev = new->end;
    239 	}
    240 	txt->size -= old->len;
    241 	txt->size += new->len;
    242 }
    243 
    244 /* Allocate a new revision and place it in the revision graph.
    245  * All further changes will be associated with this revision. */
    246 static Revision *revision_alloc(Text *txt) {
    247 	Revision *rev = calloc(1, sizeof *rev);
    248 	if (!rev)
    249 		return NULL;
    250 	rev->time = time(NULL);
    251 	txt->current_revision = rev;
    252 
    253 	/* set sequence number */
    254 	if (!txt->last_revision)
    255 		rev->seq = 0;
    256 	else
    257 		rev->seq = txt->last_revision->seq + 1;
    258 
    259 	/* set earlier, later pointers */
    260 	if (txt->last_revision)
    261 		txt->last_revision->later = rev;
    262 	rev->earlier = txt->last_revision;
    263 
    264 	if (!txt->history) {
    265 		txt->history = rev;
    266 		return rev;
    267 	}
    268 
    269 	/* set prev, next pointers */
    270 	rev->prev = txt->history;
    271 	txt->history->next = rev;
    272 	txt->history = rev;
    273 	return rev;
    274 }
    275 
    276 static void revision_free(Revision *rev) {
    277 	if (!rev)
    278 		return;
    279 	for (Change *next, *c = rev->change; c; c = next) {
    280 		next = c->next;
    281 		change_free(c);
    282 	}
    283 	free(rev);
    284 }
    285 
    286 static Piece *piece_alloc(Text *txt) {
    287 	Piece *p = calloc(1, sizeof *p);
    288 	if (!p)
    289 		return NULL;
    290 	p->text = txt;
    291 	p->global_next = txt->pieces;
    292 	if (txt->pieces)
    293 		txt->pieces->global_prev = p;
    294 	txt->pieces = p;
    295 	return p;
    296 }
    297 
    298 static void piece_free(Piece *p) {
    299 	if (!p)
    300 		return;
    301 	if (p->global_prev)
    302 		p->global_prev->global_next = p->global_next;
    303 	if (p->global_next)
    304 		p->global_next->global_prev = p->global_prev;
    305 	if (p->text->pieces == p)
    306 		p->text->pieces = p->global_next;
    307 	if (p->text->cache == p)
    308 		p->text->cache = NULL;
    309 	free(p);
    310 }
    311 
    312 static void piece_init(Piece *p, Piece *prev, Piece *next, const char *data, size_t len) {
    313 	p->prev = prev;
    314 	p->next = next;
    315 	p->data = data;
    316 	p->len = len;
    317 }
    318 
    319 /* returns the piece holding the text at byte offset pos. If pos happens to
    320  * be at a piece boundary i.e. the first byte of a piece then the previous piece
    321  * to the left is returned with an offset of piece->len. This is convenient for
    322  * modifications to the piece chain where both pieces (the returned one and the
    323  * one following it) are needed, but unsuitable as a public interface.
    324  *
    325  * in particular if pos is zero, the begin sentinel piece is returned.
    326  */
    327 static Location piece_get_intern(Text *txt, size_t pos) {
    328 	size_t cur = 0;
    329 	for (Piece *p = &txt->begin; p->next; p = p->next) {
    330 		if (cur <= pos && pos <= cur + p->len)
    331 			return (Location){ .piece = p, .off = pos - cur };
    332 		cur += p->len;
    333 	}
    334 
    335 	return (Location){ 0 };
    336 }
    337 
    338 /* similar to piece_get_intern but usable as a public API. Returns the piece
    339  * holding the text at byte offset pos. Never returns a sentinel piece.
    340  * it pos is the end of file (== text_size()) and the file is not empty then
    341  * the last piece holding data is returned.
    342  */
    343 static Location piece_get_extern(const Text *txt, size_t pos) {
    344 	size_t cur = 0;
    345 	Piece *p;
    346 
    347 	for (p = txt->begin.next; p->next; p = p->next) {
    348 		if (cur <= pos && pos < cur + p->len)
    349 			return (Location){ .piece = p, .off = pos - cur };
    350 		cur += p->len;
    351 	}
    352 
    353 	if (cur == pos)
    354 		return (Location){ .piece = p->prev, .off = p->prev->len };
    355 
    356 	return (Location){ 0 };
    357 }
    358 
    359 /* allocate a new change, associate it with current revision or a newly
    360  * allocated one if none exists. */
    361 static Change *change_alloc(Text *txt, size_t pos) {
    362 	Revision *rev = txt->current_revision;
    363 	if (!rev) {
    364 		rev = revision_alloc(txt);
    365 		if (!rev)
    366 			return NULL;
    367 	}
    368 	Change *c = calloc(1, sizeof *c);
    369 	if (!c)
    370 		return NULL;
    371 	c->pos = pos;
    372 	c->next = rev->change;
    373 	if (rev->change)
    374 		rev->change->prev = c;
    375 	rev->change = c;
    376 	return c;
    377 }
    378 
    379 static void change_free(Change *c) {
    380 	if (!c)
    381 		return;
    382 	/* only free the new part of the span, the old one is still in use */
    383 	if (c->new.start != c->new.end)
    384 		piece_free(c->new.end);
    385 	piece_free(c->new.start);
    386 	free(c);
    387 }
    388 
    389 /* When inserting new data there are 2 cases to consider.
    390  *
    391  *  - in the first the insertion point falls into the middle of an existing
    392  *    piece which is replaced by three new pieces:
    393  *
    394  *      /-+ --> +---------------+ --> +-\
    395  *      | |     | existing text |     | |
    396  *      \-+ <-- +---------------+ <-- +-/
    397  *                         ^
    398  *                         Insertion point for "demo "
    399  *
    400  *      /-+ --> +---------+ --> +-----+ --> +-----+ --> +-\
    401  *      | |     | existing|     |demo |     |text |     | |
    402  *      \-+ <-- +---------+ <-- +-----+ <-- +-----+ <-- +-/
    403  *
    404  *  - the second case deals with an insertion point at a piece boundary:
    405  *
    406  *      /-+ --> +---------------+ --> +-\
    407  *      | |     | existing text |     | |
    408  *      \-+ <-- +---------------+ <-- +-/
    409  *            ^
    410  *            Insertion point for "short"
    411  *
    412  *      /-+ --> +-----+ --> +---------------+ --> +-\
    413  *      | |     |short|     | existing text |     | |
    414  *      \-+ <-- +-----+ <-- +---------------+ <-- +-/
    415  */
    416 bool text_insert(Text *txt, size_t pos, const char *data, size_t len) {
    417 	if (len == 0)
    418 		return true;
    419 	if (pos > txt->size)
    420 		return false;
    421 	if (pos < txt->lines.pos)
    422 		lineno_cache_invalidate(&txt->lines);
    423 
    424 	Location loc = piece_get_intern(txt, pos);
    425 	Piece *p = loc.piece;
    426 	if (!p)
    427 		return false;
    428 	size_t off = loc.off;
    429 	if (cache_insert(txt, p, off, data, len))
    430 		return true;
    431 
    432 	Change *c = change_alloc(txt, pos);
    433 	if (!c)
    434 		return false;
    435 
    436 	if (!(data = block_store(txt, data, len)))
    437 		return false;
    438 
    439 	Piece *new = NULL;
    440 
    441 	if (off == p->len) {
    442 		/* insert between two existing pieces, hence there is nothing to
    443 		 * remove, just add a new piece holding the extra text */
    444 		if (!(new = piece_alloc(txt)))
    445 			return false;
    446 		piece_init(new, p, p->next, data, len);
    447 		span_init(&c->new, new, new);
    448 		span_init(&c->old, NULL, NULL);
    449 	} else {
    450 		/* insert into middle of an existing piece, therefore split the old
    451 		 * piece. That is we have 3 new pieces one containing the content
    452 		 * before the insertion point then one holding the newly inserted
    453 		 * text and one holding the content after the insertion point.
    454 		 */
    455 		Piece *before = piece_alloc(txt);
    456 		new = piece_alloc(txt);
    457 		Piece *after = piece_alloc(txt);
    458 		if (!before || !new || !after)
    459 			return false;
    460 		piece_init(before, p->prev, new, p->data, off);
    461 		piece_init(new, before, after, data, len);
    462 		piece_init(after, new, p->next, p->data + off, p->len - off);
    463 
    464 		span_init(&c->new, before, after);
    465 		span_init(&c->old, p, p);
    466 	}
    467 
    468 	cache_piece(txt, new);
    469 	span_swap(txt, &c->old, &c->new);
    470 	return true;
    471 }
    472 
    473 static size_t revision_undo(Text *txt, Revision *rev) {
    474 	size_t pos = EPOS;
    475 	for (Change *c = rev->change; c; c = c->next) {
    476 		span_swap(txt, &c->new, &c->old);
    477 		pos = c->pos;
    478 	}
    479 	return pos;
    480 }
    481 
    482 static size_t revision_redo(Text *txt, Revision *rev) {
    483 	size_t pos = EPOS;
    484 	Change *c = rev->change;
    485 	while (c->next)
    486 		c = c->next;
    487 	for ( ; c; c = c->prev) {
    488 		span_swap(txt, &c->old, &c->new);
    489 		pos = c->pos;
    490 		if (c->new.len > c->old.len)
    491 			pos += c->new.len - c->old.len;
    492 	}
    493 	return pos;
    494 }
    495 
    496 size_t text_undo(Text *txt) {
    497 	size_t pos = EPOS;
    498 	/* taking rev snapshot makes sure that txt->current_revision is reset */
    499 	text_snapshot(txt);
    500 	Revision *rev = txt->history->prev;
    501 	if (!rev)
    502 		return pos;
    503 	pos = revision_undo(txt, txt->history);
    504 	txt->history = rev;
    505 	lineno_cache_invalidate(&txt->lines);
    506 	return pos;
    507 }
    508 
    509 size_t text_redo(Text *txt) {
    510 	size_t pos = EPOS;
    511 	/* taking a snapshot makes sure that txt->current_revision is reset */
    512 	text_snapshot(txt);
    513 	Revision *rev = txt->history->next;
    514 	if (!rev)
    515 		return pos;
    516 	pos = revision_redo(txt, rev);
    517 	txt->history = rev;
    518 	lineno_cache_invalidate(&txt->lines);
    519 	return pos;
    520 }
    521 
    522 static bool history_change_branch(Revision *rev) {
    523 	bool changed = false;
    524 	while (rev->prev) {
    525 		if (rev->prev->next != rev) {
    526 			rev->prev->next = rev;
    527 			changed = true;
    528 		}
    529 		rev = rev->prev;
    530 	}
    531 	return changed;
    532 }
    533 
    534 static size_t history_traverse_to(Text *txt, Revision *rev) {
    535 	size_t pos = EPOS;
    536 	if (!rev)
    537 		return pos;
    538 	bool changed = history_change_branch(rev);
    539 	if (!changed) {
    540 		if (rev->seq == txt->history->seq) {
    541 			return txt->lines.pos;
    542 		} else if (rev->seq > txt->history->seq) {
    543 			while (txt->history != rev)
    544 				pos = text_redo(txt);
    545 			return pos;
    546 		} else if (rev->seq < txt->history->seq) {
    547 			while (txt->history != rev)
    548 				pos = text_undo(txt);
    549 			return pos;
    550 		}
    551 	} else {
    552 		while (txt->history->prev && txt->history->prev->next == txt->history)
    553 			text_undo(txt);
    554 		pos = text_undo(txt);
    555 		while (txt->history != rev)
    556 			pos = text_redo(txt);
    557 		return pos;
    558 	}
    559 	return pos;
    560 }
    561 
    562 size_t text_earlier(Text *txt) {
    563 	return history_traverse_to(txt, txt->history->earlier);
    564 }
    565 
    566 size_t text_later(Text *txt) {
    567 	return history_traverse_to(txt, txt->history->later);
    568 }
    569 
    570 size_t text_restore(Text *txt, time_t time) {
    571 	Revision *rev = txt->history;
    572 	while (time < rev->time && rev->earlier)
    573 		rev = rev->earlier;
    574 	while (time > rev->time && rev->later)
    575 		rev = rev->later;
    576 	time_t diff = labs(rev->time - time);
    577 	if (rev->earlier && rev->earlier != txt->history && labs(rev->earlier->time - time) < diff)
    578 		rev = rev->earlier;
    579 	if (rev->later && rev->later != txt->history && labs(rev->later->time - time) < diff)
    580 		rev = rev->later;
    581 	return history_traverse_to(txt, rev);
    582 }
    583 
    584 time_t text_state(const Text *txt) {
    585 	return txt->history->time;
    586 }
    587 
    588 Text *text_loadat_method(int dirfd, const char *filename, enum TextLoadMethod method) {
    589 	Text *txt = calloc(1, sizeof *txt);
    590 	if (!txt)
    591 		return NULL;
    592 	Piece *p = piece_alloc(txt);
    593 	if (!p)
    594 		goto out;
    595 	Block *block = NULL;
    596 	array_init(&txt->blocks);
    597 	lineno_cache_invalidate(&txt->lines);
    598 	if (filename) {
    599 		errno = 0;
    600 		block = block_load(dirfd, filename, method, &txt->info);
    601 		if (!block && errno)
    602 			goto out;
    603 		if (block && !array_add_ptr(&txt->blocks, block)) {
    604 			block_free(block);
    605 			goto out;
    606 		}
    607 	}
    608 
    609 	if (!block)
    610 		piece_init(p, &txt->begin, &txt->end, "\0", 0);
    611 	else
    612 		piece_init(p, &txt->begin, &txt->end, block->data, block->len);
    613 
    614 	piece_init(&txt->begin, NULL, p, NULL, 0);
    615 	piece_init(&txt->end, p, NULL, NULL, 0);
    616 	txt->size = p->len;
    617 	/* write an empty revision */
    618 	change_alloc(txt, EPOS);
    619 	text_snapshot(txt);
    620 	txt->saved_revision = txt->history;
    621 
    622 	return txt;
    623 out:
    624 	text_free(txt);
    625 	return NULL;
    626 }
    627 
    628 struct stat text_stat(const Text *txt) {
    629 	return txt->info;
    630 }
    631 
    632 void text_saved(Text *txt, struct stat *meta) {
    633 	if (meta)
    634 		txt->info = *meta;
    635 	txt->saved_revision = txt->history;
    636 	text_snapshot(txt);
    637 }
    638 
    639 Block *text_block_mmaped(Text *txt) {
    640 	Block *block = array_get_ptr(&txt->blocks, 0);
    641 	if (block && block->type == BLOCK_TYPE_MMAP_ORIG && block->size)
    642 		return block;
    643 	return NULL;
    644 }
    645 
    646 /* A delete operation can either start/stop midway through a piece or at
    647  * a boundary. In the former case a new piece is created to represent the
    648  * remaining text before/after the modification point.
    649  *
    650  *      /-+ --> +---------+ --> +-----+ --> +-----+ --> +-\
    651  *      | |     | existing|     |demo |     |text |     | |
    652  *      \-+ <-- +---------+ <-- +-----+ <-- +-----+ <-- +-/
    653  *                   ^                         ^
    654  *                   |------ delete range -----|
    655  *
    656  *      /-+ --> +----+ --> +--+ --> +-\
    657  *      | |     | exi|     |t |     | |
    658  *      \-+ <-- +----+ <-- +--+ <-- +-/
    659  */
    660 bool text_delete(Text *txt, size_t pos, size_t len) {
    661 	if (len == 0)
    662 		return true;
    663 	size_t pos_end;
    664 	if (!addu(pos, len, &pos_end) || pos_end > txt->size)
    665 		return false;
    666 	if (pos < txt->lines.pos)
    667 		lineno_cache_invalidate(&txt->lines);
    668 
    669 	Location loc = piece_get_intern(txt, pos);
    670 	Piece *p = loc.piece;
    671 	if (!p)
    672 		return false;
    673 	size_t off = loc.off;
    674 	if (cache_delete(txt, p, off, len))
    675 		return true;
    676 	Change *c = change_alloc(txt, pos);
    677 	if (!c)
    678 		return false;
    679 
    680 	bool midway_start = false, midway_end = false; /* split pieces? */
    681 	Piece *before, *after; /* unmodified pieces before/after deletion point */
    682 	Piece *start, *end;    /* span which is removed */
    683 	size_t cur;            /* how much has already been deleted */
    684 
    685 	if (off == p->len) {
    686 		/* deletion starts at a piece boundary */
    687 		cur = 0;
    688 		before = p;
    689 		start = p->next;
    690 	} else {
    691 		/* deletion starts midway through a piece */
    692 		midway_start = true;
    693 		cur = p->len - off;
    694 		start = p;
    695 		before = piece_alloc(txt);
    696 		if (!before)
    697 			return false;
    698 	}
    699 
    700 	/* skip all pieces which fall into deletion range */
    701 	while (cur < len) {
    702 		p = p->next;
    703 		cur += p->len;
    704 	}
    705 
    706 	if (cur == len) {
    707 		/* deletion stops at a piece boundary */
    708 		end = p;
    709 		after = p->next;
    710 	} else {
    711 		/* cur > len: deletion stops midway through a piece */
    712 		midway_end = true;
    713 		end = p;
    714 		after = piece_alloc(txt);
    715 		if (!after)
    716 			return false;
    717 		piece_init(after, before, p->next, p->data + p->len - (cur - len), cur - len);
    718 	}
    719 
    720 	if (midway_start) {
    721 		/* we finally know which piece follows our newly allocated before piece */
    722 		piece_init(before, start->prev, after, start->data, off);
    723 	}
    724 
    725 	Piece *new_start = NULL, *new_end = NULL;
    726 	if (midway_start) {
    727 		new_start = before;
    728 		if (!midway_end)
    729 			new_end = before;
    730 	}
    731 	if (midway_end) {
    732 		if (!midway_start)
    733 			new_start = after;
    734 		new_end = after;
    735 	}
    736 
    737 	span_init(&c->new, new_start, new_end);
    738 	span_init(&c->old, start, end);
    739 	span_swap(txt, &c->old, &c->new);
    740 	return true;
    741 }
    742 
    743 bool text_delete_range(Text *txt, const Filerange *r) {
    744 	if (!text_range_valid(r))
    745 		return false;
    746 	return text_delete(txt, r->start, text_range_size(r));
    747 }
    748 
    749 /* preserve the current text content such that it can be restored by
    750  * means of undo/redo operations */
    751 bool text_snapshot(Text *txt) {
    752 	if (txt->current_revision)
    753 		txt->last_revision = txt->current_revision;
    754 	txt->current_revision = NULL;
    755 	txt->cache = NULL;
    756 	return true;
    757 }
    758 
    759 
    760 void text_free(Text *txt) {
    761 	if (!txt)
    762 		return;
    763 
    764 	// free history
    765 	Revision *hist = txt->history;
    766 	while (hist && hist->prev)
    767 		hist = hist->prev;
    768 	while (hist) {
    769 		Revision *later = hist->later;
    770 		revision_free(hist);
    771 		hist = later;
    772 	}
    773 
    774 	for (Piece *next, *p = txt->pieces; p; p = next) {
    775 		next = p->global_next;
    776 		piece_free(p);
    777 	}
    778 
    779 	for (size_t i = 0, len = txt->blocks.len; i < len; i++)
    780 		block_free(array_get_ptr(&txt->blocks, i));
    781 	array_release(&txt->blocks);
    782 
    783 	free(txt);
    784 }
    785 
    786 bool text_modified(const Text *txt) {
    787 	return txt->saved_revision != txt->history;
    788 }
    789 
    790 bool text_mmaped(const Text *txt, const char *ptr) {
    791 	uintptr_t addr = (uintptr_t)ptr;
    792 	for (size_t i = 0, len = txt->blocks.len; i < len; i++) {
    793 		Block *blk = array_get_ptr(&txt->blocks, i);
    794 		if ((blk->type == BLOCK_TYPE_MMAP_ORIG || blk->type == BLOCK_TYPE_MMAP) &&
    795 		    (uintptr_t)(blk->data) <= addr && addr < (uintptr_t)(blk->data + blk->size))
    796 			return true;
    797 	}
    798 	return false;
    799 }
    800 
    801 static bool iterator_init(Iterator *it, size_t pos, Piece *p, size_t off) {
    802 	*it = (Iterator){
    803 		.pos = pos,
    804 		.piece = p,
    805 		.start = p ? p->data : NULL,
    806 		.end = p && p->data ? p->data + p->len : NULL,
    807 		.text = p && p->data ? p->data + off : NULL,
    808 	};
    809 	return text_iterator_valid(it);
    810 }
    811 
    812 bool text_iterator_init(const Text *txt, Iterator *it, size_t pos) {
    813 	Location loc = piece_get_extern(txt, pos);
    814 	return iterator_init(it, pos, loc.piece, loc.off);
    815 }
    816 
    817 Iterator text_iterator_get(const Text *txt, size_t pos) {
    818 	Iterator it;
    819 	text_iterator_init(txt, &it, pos);
    820 	return it;
    821 }
    822 
    823 bool text_iterator_next(Iterator *it) {
    824 	size_t rem = it->end - it->text;
    825 	return iterator_init(it, it->pos+rem, it->piece ? it->piece->next : NULL, 0);
    826 }
    827 
    828 bool text_iterator_prev(Iterator *it) {
    829 	size_t off = it->text - it->start;
    830 	size_t len = it->piece && it->piece->prev ? it->piece->prev->len : 0;
    831 	return iterator_init(it, it->pos-off, it->piece ? it->piece->prev : NULL, len);
    832 }
    833 
    834 const Text *text_iterator_text(const Iterator *it) {
    835 	return it->piece ? it->piece->text : NULL;
    836 }
    837 
    838 bool text_iterator_valid(const Iterator *it) {
    839 	/* filter out sentinel nodes */
    840 	return it->piece && it->piece->text;
    841 }
    842 
    843 bool text_iterator_has_next(const Iterator *it) {
    844 	return it->piece && it->piece->next;
    845 }
    846 
    847 bool text_iterator_has_prev(const Iterator *it) {
    848 	return it->piece && it->piece->prev;
    849 }
    850 
    851 size_t text_size(const Text *txt) {
    852 	return txt->size;
    853 }
    854 
    855 /* count the number of new lines '\n' in range [pos, pos+len) */
    856 static size_t lines_count(Text *txt, size_t pos, size_t len) {
    857 	size_t lines = 0;
    858 	for (Iterator it = text_iterator_get(txt, pos);
    859 	     text_iterator_valid(&it);
    860 	     text_iterator_next(&it)) {
    861 		const char *start = it.text;
    862 		while (len > 0 && start < it.end) {
    863 			size_t n = MIN(len, (size_t)(it.end - start));
    864 			const char *end = memchr(start, '\n', n);
    865 			if (!end) {
    866 				len -= n;
    867 				break;
    868 			}
    869 			lines++;
    870 			len -= end - start + 1;
    871 			start = end + 1;
    872 		}
    873 
    874 		if (len == 0)
    875 			break;
    876 	}
    877 	return lines;
    878 }
    879 
    880 /* skip n lines forward and return position afterwards */
    881 static size_t lines_skip_forward(Text *txt, size_t pos, size_t lines, size_t *lines_skipped) {
    882 	size_t lines_old = lines;
    883 	for (Iterator it = text_iterator_get(txt, pos);
    884 	     text_iterator_valid(&it);
    885 	     text_iterator_next(&it)) {
    886 		const char *start = it.text;
    887 		while (lines > 0 && start < it.end) {
    888 			size_t n = it.end - start;
    889 			const char *end = memchr(start, '\n', n);
    890 			if (!end) {
    891 				pos += n;
    892 				break;
    893 			}
    894 			pos += end - start + 1;
    895 			start = end + 1;
    896 			lines--;
    897 		}
    898 
    899 		if (lines == 0)
    900 			break;
    901 	}
    902 	if (lines_skipped)
    903 		*lines_skipped = lines_old - lines;
    904 	return pos;
    905 }
    906 
    907 static void lineno_cache_invalidate(LineCache *cache) {
    908 	cache->pos = 0;
    909 	cache->lineno = 1;
    910 }
    911 
    912 size_t text_pos_by_lineno(Text *txt, size_t lineno) {
    913 	size_t lines_skipped;
    914 	LineCache *cache = &txt->lines;
    915 	if (lineno <= 1)
    916 		return 0;
    917 	if (lineno > cache->lineno) {
    918 		cache->pos = lines_skip_forward(txt, cache->pos, lineno - cache->lineno, &lines_skipped);
    919 		cache->lineno += lines_skipped;
    920 	} else if (lineno < cache->lineno) {
    921 	#if 0
    922 		// TODO does it make sense to scan memory backwards here?
    923 		size_t diff = cache->lineno - lineno;
    924 		if (diff < lineno) {
    925 			lines_skip_backward(txt, cache->pos, diff);
    926 		} else
    927 	#endif
    928 		cache->pos = lines_skip_forward(txt, 0, lineno - 1, &lines_skipped);
    929 		cache->lineno = lines_skipped + 1;
    930 	}
    931 	return cache->lineno == lineno ? cache->pos : EPOS;
    932 }
    933 
    934 size_t text_lineno_by_pos(Text *txt, size_t pos) {
    935 	LineCache *cache = &txt->lines;
    936 	if (pos > txt->size)
    937 		pos = txt->size;
    938 	if (pos < cache->pos) {
    939 		size_t diff = cache->pos - pos;
    940 		if (diff < pos)
    941 			cache->lineno -= lines_count(txt, pos, diff);
    942 		else
    943 			cache->lineno = lines_count(txt, 0, pos) + 1;
    944 	} else if (pos > cache->pos) {
    945 		cache->lineno += lines_count(txt, cache->pos, pos - cache->pos);
    946 	}
    947 	cache->pos = text_line_begin(txt, pos);
    948 	return cache->lineno;
    949 }
    950 
    951 Mark text_mark_set(Text *txt, size_t pos) {
    952 	if (pos == txt->size)
    953 		return (Mark)&txt->end;
    954 	Location loc = piece_get_extern(txt, pos);
    955 	if (!loc.piece)
    956 		return EMARK;
    957 	return (Mark)(loc.piece->data + loc.off);
    958 }
    959 
    960 size_t text_mark_get(const Text *txt, Mark mark) {
    961 	size_t cur = 0;
    962 
    963 	if (mark == EMARK)
    964 		return EPOS;
    965 	if (mark == (Mark)&txt->end)
    966 		return txt->size;
    967 
    968 	for (Piece *p = txt->begin.next; p->next; p = p->next) {
    969 		Mark start = (Mark)(p->data);
    970 		Mark end = start + p->len;
    971 		if (start <= mark && mark < end)
    972 			return cur + (mark - start);
    973 		cur += p->len;
    974 	}
    975 
    976 	return EPOS;
    977 }