vis

a vi-like editor based on Plan 9's structural regular expressions

git clone https://9o.is/git/vis.git

text-io.c

(14420B)


      1 #include <fcntl.h>
      2 #include <unistd.h>
      3 #include <stdlib.h>
      4 #include <libgen.h>
      5 #include <stdio.h>
      6 #include <errno.h>
      7 #include <string.h>
      8 #include <limits.h>
      9 #include <sys/mman.h>
     10 #if CONFIG_ACL
     11 #include <sys/acl.h>
     12 #endif
     13 #if CONFIG_SELINUX
     14 #include <selinux/selinux.h>
     15 #endif
     16 
     17 #include "text.h"
     18 #include "text-internal.h"
     19 #include "text-util.h"
     20 #include "util.h"
     21 
     22 struct TextSave {                  /* used to hold context between text_save_{begin,commit} calls */
     23 	Text *txt;                 /* text to operate on */
     24 	char *filename;            /* filename to save to as given to text_save_begin */
     25 	char *tmpname;             /* temporary name used for atomic rename(2) */
     26 	int fd;                    /* file descriptor to write data to using text_save_write */
     27 	int dirfd;                 /* directory file descriptor, relative to which we save */
     28 	enum TextSaveMethod type;  /* method used to save file */
     29 };
     30 
     31 /* Allocate blocks holding the actual file content in chunks of size: */
     32 #ifndef BLOCK_SIZE
     33 #define BLOCK_SIZE (1 << 20)
     34 #endif
     35 /* Files smaller than this value are copied on load, larger ones are mmap(2)-ed
     36  * directly. Hence the former can be truncated, while doing so on the latter
     37  * results in havoc. */
     38 #define BLOCK_MMAP_SIZE (1 << 26)
     39 
     40 /* allocate a new block of MAX(size, BLOCK_SIZE) bytes */
     41 Block *block_alloc(size_t size) {
     42 	Block *blk = calloc(1, sizeof *blk);
     43 	if (!blk)
     44 		return NULL;
     45 	if (BLOCK_SIZE > size)
     46 		size = BLOCK_SIZE;
     47 	if (!(blk->data = malloc(size))) {
     48 		free(blk);
     49 		return NULL;
     50 	}
     51 	blk->type = BLOCK_TYPE_MALLOC;
     52 	blk->size = size;
     53 	return blk;
     54 }
     55 
     56 Block *block_read(size_t size, int fd) {
     57 	Block *blk = block_alloc(size);
     58 	if (!blk)
     59 		return NULL;
     60 	char *data = blk->data;
     61 	size_t rem = size;
     62 	while (rem > 0) {
     63 		ssize_t len = read(fd, data, rem);
     64 		if (len == -1) {
     65 			block_free(blk);
     66 			return NULL;
     67 		} else if (len == 0) {
     68 			break;
     69 		} else {
     70 			data += len;
     71 			rem -= len;
     72 		}
     73 	}
     74 	blk->len = size - rem;
     75 	return blk;
     76 }
     77 
     78 Block *block_mmap(size_t size, int fd, off_t offset) {
     79 	Block *blk = calloc(1, sizeof *blk);
     80 	if (!blk)
     81 		return NULL;
     82 	if (size) {
     83 		blk->data = mmap(NULL, size, PROT_READ, MAP_SHARED, fd, offset);
     84 		if (blk->data == MAP_FAILED) {
     85 			free(blk);
     86 			return NULL;
     87 		}
     88 	}
     89 	blk->type = BLOCK_TYPE_MMAP_ORIG;
     90 	blk->size = size;
     91 	blk->len = size;
     92 	return blk;
     93 }
     94 
     95 Block *block_load(int dirfd, const char *filename, enum TextLoadMethod method, struct stat *info) {
     96 	Block *block = NULL;
     97 	int fd = openat(dirfd, filename, O_RDONLY);
     98 	if (fd == -1)
     99 		goto out;
    100 	if (fstat(fd, info) == -1)
    101 		goto out;
    102 	if (!S_ISREG(info->st_mode)) {
    103 		errno = S_ISDIR(info->st_mode) ? EISDIR : ENOTSUP;
    104 		goto out;
    105 	}
    106 
    107 	// XXX: use lseek(fd, 0, SEEK_END); instead?
    108 	size_t size = info->st_size;
    109 	if (size == 0)
    110 		goto out;
    111 	if (method == TEXT_LOAD_READ || (method == TEXT_LOAD_AUTO && size < BLOCK_MMAP_SIZE))
    112 		block = block_read(size, fd);
    113 	else
    114 		block = block_mmap(size, fd, 0);
    115 out:
    116 	if (fd != -1)
    117 		close(fd);
    118 	return block;
    119 }
    120 
    121 void block_free(Block *blk) {
    122 	if (!blk)
    123 		return;
    124 	if (blk->type == BLOCK_TYPE_MALLOC)
    125 		free(blk->data);
    126 	else if ((blk->type == BLOCK_TYPE_MMAP_ORIG || blk->type == BLOCK_TYPE_MMAP) && blk->data)
    127 		munmap(blk->data, blk->size);
    128 	free(blk);
    129 }
    130 
    131 /* check whether block has enough free space to store len bytes */
    132 bool block_capacity(Block *blk, size_t len) {
    133 	return blk->size - blk->len >= len;
    134 }
    135 
    136 /* append data to block, assumes there is enough space available */
    137 const char *block_append(Block *blk, const char *data, size_t len) {
    138 	char *dest = memcpy(blk->data + blk->len, data, len);
    139 	blk->len += len;
    140 	return dest;
    141 }
    142 
    143 /* insert data into block at an arbitrary position, this should only be used with
    144  * data of the most recently created piece. */
    145 bool block_insert(Block *blk, size_t pos, const char *data, size_t len) {
    146 	if (pos > blk->len || !block_capacity(blk, len))
    147 		return false;
    148 	if (blk->len == pos)
    149 		return block_append(blk, data, len);
    150 	char *insert = blk->data + pos;
    151 	memmove(insert + len, insert, blk->len - pos);
    152 	memcpy(insert, data, len);
    153 	blk->len += len;
    154 	return true;
    155 }
    156 
    157 /* delete data from a block at an arbitrary position, this should only be used with
    158  * data of the most recently created piece. */
    159 bool block_delete(Block *blk, size_t pos, size_t len) {
    160 	size_t end;
    161 	if (!addu(pos, len, &end) || end > blk->len)
    162 		return false;
    163 	if (blk->len == pos) {
    164 		blk->len -= len;
    165 		return true;
    166 	}
    167 	char *delete = blk->data + pos;
    168 	memmove(delete, delete + len, blk->len - pos - len);
    169 	blk->len -= len;
    170 	return true;
    171 }
    172 
    173 Text *text_load(const char *filename) {
    174 	return text_load_method(filename, TEXT_LOAD_AUTO);
    175 }
    176 
    177 Text *text_loadat(int dirfd, const char *filename) {
    178 	return text_loadat_method(dirfd, filename, TEXT_LOAD_AUTO);
    179 }
    180 
    181 Text *text_load_method(const char *filename, enum TextLoadMethod method) {
    182 	return text_loadat_method(AT_FDCWD, filename, method);
    183 }
    184 
    185 ssize_t write_all(int fd, const char *buf, size_t count) {
    186 	size_t rem = count;
    187 	while (rem > 0) {
    188 		ssize_t written = write(fd, buf, rem > INT_MAX ? INT_MAX : rem);
    189 		if (written < 0) {
    190 			if (errno == EAGAIN || errno == EINTR)
    191 				continue;
    192 			return -1;
    193 		} else if (written == 0) {
    194 			break;
    195 		}
    196 		rem -= written;
    197 		buf += written;
    198 	}
    199 	return count - rem;
    200 }
    201 
    202 static bool preserve_acl(int src, int dest) {
    203 #if CONFIG_ACL
    204 	acl_t acl = acl_get_fd(src);
    205 	if (!acl)
    206 		return errno == ENOTSUP ? true : false;
    207 	if (acl_set_fd(dest, acl) == -1) {
    208 		acl_free(acl);
    209 		return false;
    210 	}
    211 	acl_free(acl);
    212 #endif /* CONFIG_ACL */
    213 	return true;
    214 }
    215 
    216 static bool preserve_selinux_context(int src, int dest) {
    217 #if CONFIG_SELINUX
    218 	char *context = NULL;
    219 	if (!is_selinux_enabled())
    220 		return true;
    221 	if (fgetfilecon(src, &context) == -1)
    222 		return errno == ENOTSUP ? true : false;
    223 	if (fsetfilecon(dest, context) == -1) {
    224 		freecon(context);
    225 		return false;
    226 	}
    227 	freecon(context);
    228 #endif /* CONFIG_SELINUX */
    229 	return true;
    230 }
    231 
    232 static int mkstempat(int dirfd, char *template) {
    233 	if (dirfd == AT_FDCWD)
    234 		return mkstemp(template);
    235 	// FIXME: not thread safe
    236 	int fd = -1;
    237 	int cwd = open(".", O_RDONLY|O_DIRECTORY);
    238 	if (cwd == -1)
    239 		goto err;
    240 	if (fchdir(dirfd) == -1)
    241 		goto err;
    242 	fd = mkstemp(template);
    243 err:
    244 	if (cwd != -1) {
    245 		int ret = fchdir(cwd);
    246 		close(cwd);
    247 		if (ret != 0)
    248 		  return -1;
    249 	}
    250 	return fd;
    251 }
    252 
    253 /* Create a new file named `.filename.vis.XXXXXX` (where `XXXXXX` is a
    254  * randomly generated, unique suffix) and try to preserve all important
    255  * meta data. After the file content has been written to this temporary
    256  * file, text_save_commit_atomic will atomically move it to  its final
    257  * (possibly already existing) destination using rename(2).
    258  *
    259  * This approach does not work if:
    260  *
    261  *   - the file is a symbolic link
    262  *   - the file is a hard link
    263  *   - file ownership can not be preserved
    264  *   - file group can not be preserved
    265  *   - directory permissions do not allow creation of a new file
    266  *   - POSIX ACL can not be preserved (if enabled)
    267  *   - SELinux security context can not be preserved (if enabled)
    268  */
    269 static bool text_save_begin_atomic(TextSave *ctx) {
    270 	int oldfd, saved_errno;
    271 	if ((oldfd = openat(ctx->dirfd, ctx->filename, O_RDONLY)) == -1 && errno != ENOENT)
    272 		goto err;
    273 	struct stat oldmeta = { 0 };
    274 	if (oldfd != -1 && fstatat(ctx->dirfd, ctx->filename, &oldmeta, AT_SYMLINK_NOFOLLOW) == -1)
    275 		goto err;
    276 	if (oldfd != -1) {
    277 		if (S_ISLNK(oldmeta.st_mode)) /* symbolic link */
    278 			goto err;
    279 		if (oldmeta.st_nlink > 1) /* hard link */
    280 			goto err;
    281 	}
    282 
    283 	char suffix[] = ".vis.XXXXXX";
    284 	size_t len = strlen(ctx->filename) + sizeof("./.") + sizeof(suffix);
    285 	char *dir = strdup(ctx->filename);
    286 	char *base = strdup(ctx->filename);
    287 
    288 	if (!(ctx->tmpname = malloc(len)) || !dir || !base) {
    289 		free(dir);
    290 		free(base);
    291 		goto err;
    292 	}
    293 
    294 	snprintf(ctx->tmpname, len, "%s/.%s%s", dirname(dir), basename(base), suffix);
    295 	free(dir);
    296 	free(base);
    297 
    298 	if ((ctx->fd = mkstempat(ctx->dirfd, ctx->tmpname)) == -1)
    299 		goto err;
    300 
    301 	if (oldfd == -1) {
    302 		mode_t mask = umask(0);
    303 		umask(mask);
    304 		if (fchmod(ctx->fd, 0666 & ~mask) == -1)
    305 			goto err;
    306 	} else {
    307 		if (fchmod(ctx->fd, oldmeta.st_mode) == -1)
    308 			goto err;
    309 		if (!preserve_acl(oldfd, ctx->fd) || !preserve_selinux_context(oldfd, ctx->fd))
    310 			goto err;
    311 		/* change owner if necessary */
    312 		if (oldmeta.st_uid != getuid() && fchown(ctx->fd, oldmeta.st_uid, (uid_t)-1) == -1)
    313 			goto err;
    314 		/* change group if necessary, in case of failure some editors reset
    315 		 * the group permissions to the same as for others */
    316 		if (oldmeta.st_gid != getgid() && fchown(ctx->fd, (uid_t)-1, oldmeta.st_gid) == -1)
    317 			goto err;
    318 		close(oldfd);
    319 	}
    320 
    321 	ctx->type = TEXT_SAVE_ATOMIC;
    322 	return true;
    323 err:
    324 	saved_errno = errno;
    325 	if (oldfd != -1)
    326 		close(oldfd);
    327 	if (ctx->fd != -1)
    328 		close(ctx->fd);
    329 	ctx->fd = -1;
    330 	free(ctx->tmpname);
    331 	ctx->tmpname = NULL;
    332 	errno = saved_errno;
    333 	return false;
    334 }
    335 
    336 static bool text_save_commit_atomic(TextSave *ctx) {
    337 	if (fsync(ctx->fd) == -1)
    338 		return false;
    339 
    340 	struct stat meta = { 0 };
    341 	if (fstat(ctx->fd, &meta) == -1)
    342 		return false;
    343 
    344 	bool close_failed = (close(ctx->fd) == -1);
    345 	ctx->fd = -1;
    346 	if (close_failed)
    347 		return false;
    348 
    349 	if (renameat(ctx->dirfd, ctx->tmpname, ctx->dirfd, ctx->filename) == -1)
    350 		return false;
    351 
    352 	free(ctx->tmpname);
    353 	ctx->tmpname = NULL;
    354 
    355 	int dir = openat(ctx->dirfd, dirname(ctx->filename), O_DIRECTORY|O_RDONLY);
    356 	if (dir == -1)
    357 		return false;
    358 
    359 	if (fsync(dir) == -1 && errno != EINVAL) {
    360 		close(dir);
    361 		return false;
    362 	}
    363 
    364 	if (close(dir) == -1)
    365 		return false;
    366 
    367 	text_saved(ctx->txt, &meta);
    368 	return true;
    369 }
    370 
    371 static bool text_save_begin_inplace(TextSave *ctx) {
    372 	Text *txt = ctx->txt;
    373 	struct stat now = { 0 };
    374 	int newfd = -1, saved_errno;
    375 	if ((ctx->fd = openat(ctx->dirfd, ctx->filename, O_CREAT|O_WRONLY, 0666)) == -1)
    376 		goto err;
    377 	if (fstat(ctx->fd, &now) == -1)
    378 		goto err;
    379 	struct stat loaded = text_stat(txt);
    380 	Block *block = text_block_mmaped(txt);
    381 	if (block && now.st_dev == loaded.st_dev && now.st_ino == loaded.st_ino) {
    382 		/* The file we are going to overwrite is currently mmap-ed from
    383 		 * text_load, therefore we copy the mmap-ed block to a temporary
    384 		 * file and remap it at the same position such that all pointers
    385 		 * from the various pieces are still valid.
    386 		 */
    387 		size_t size = block->size;
    388 		char tmpname[32] = "/tmp/vis-XXXXXX";
    389 		newfd = mkstemp(tmpname);
    390 		if (newfd == -1)
    391 			goto err;
    392 		if (unlink(tmpname) == -1)
    393 			goto err;
    394 		ssize_t written = write_all(newfd, block->data, size);
    395 		if (written == -1 || (size_t)written != size)
    396 			goto err;
    397 		void *data = mmap(block->data, size, PROT_READ, MAP_SHARED|MAP_FIXED, newfd, 0);
    398 		if (data == MAP_FAILED)
    399 			goto err;
    400 		bool close_failed = (close(newfd) == -1);
    401 		newfd = -1;
    402 		if (close_failed)
    403 			goto err;
    404 		block->type = BLOCK_TYPE_MMAP;
    405 	}
    406 	/* overwrite the existing file content, if something goes wrong
    407 	 * here we are screwed, TODO: make a backup before? */
    408 	if (ftruncate(ctx->fd, 0) == -1)
    409 		goto err;
    410 	ctx->type = TEXT_SAVE_INPLACE;
    411 	return true;
    412 err:
    413 	saved_errno = errno;
    414 	if (newfd != -1)
    415 		close(newfd);
    416 	if (ctx->fd != -1)
    417 		close(ctx->fd);
    418 	ctx->fd = -1;
    419 	errno = saved_errno;
    420 	return false;
    421 }
    422 
    423 static bool text_save_commit_inplace(TextSave *ctx) {
    424 	if (fsync(ctx->fd) == -1)
    425 		return false;
    426 	struct stat meta = { 0 };
    427 	if (fstat(ctx->fd, &meta) == -1)
    428 		return false;
    429 	if (close(ctx->fd) == -1)
    430 		return false;
    431 	text_saved(ctx->txt, &meta);
    432 	return true;
    433 }
    434 
    435 TextSave *text_save_begin(Text *txt, int dirfd, const char *filename, enum TextSaveMethod type) {
    436 	if (!filename)
    437 		return NULL;
    438 	TextSave *ctx = calloc(1, sizeof *ctx);
    439 	if (!ctx)
    440 		return NULL;
    441 	ctx->txt = txt;
    442 	ctx->fd = -1;
    443 	ctx->dirfd = dirfd;
    444 	if (!(ctx->filename = strdup(filename)))
    445 		goto err;
    446 	errno = 0;
    447 	if ((type == TEXT_SAVE_AUTO || type == TEXT_SAVE_ATOMIC) && text_save_begin_atomic(ctx))
    448 		return ctx;
    449 	if (errno == ENOSPC)
    450 		goto err;
    451 	if ((type == TEXT_SAVE_AUTO || type == TEXT_SAVE_INPLACE) && text_save_begin_inplace(ctx))
    452 		return ctx;
    453 err:
    454 	text_save_cancel(ctx);
    455 	return NULL;
    456 }
    457 
    458 bool text_save_commit(TextSave *ctx) {
    459 	if (!ctx)
    460 		return true;
    461 	bool ret;
    462 	switch (ctx->type) {
    463 	case TEXT_SAVE_ATOMIC:
    464 		ret = text_save_commit_atomic(ctx);
    465 		break;
    466 	case TEXT_SAVE_INPLACE:
    467 		ret = text_save_commit_inplace(ctx);
    468 		break;
    469 	default:
    470 		ret = false;
    471 		break;
    472 	}
    473 
    474 	text_save_cancel(ctx);
    475 	return ret;
    476 }
    477 
    478 void text_save_cancel(TextSave *ctx) {
    479 	if (!ctx)
    480 		return;
    481 	int saved_errno = errno;
    482 	if (ctx->fd != -1)
    483 		close(ctx->fd);
    484 	if (ctx->tmpname && ctx->tmpname[0])
    485 		unlinkat(ctx->dirfd, ctx->tmpname, 0);
    486 	free(ctx->tmpname);
    487 	free(ctx->filename);
    488 	free(ctx);
    489 	errno = saved_errno;
    490 }
    491 
    492 /* First try to save the file atomically using rename(2) if this does not
    493  * work overwrite the file in place. However if something goes wrong during
    494  * this overwrite the original file is permanently damaged.
    495  */
    496 bool text_save(Text *txt, const char *filename) {
    497 	return text_saveat(txt, AT_FDCWD, filename);
    498 }
    499 
    500 bool text_saveat(Text *txt, int dirfd, const char *filename) {
    501 	return text_saveat_method(txt, dirfd, filename, TEXT_SAVE_AUTO);
    502 }
    503 
    504 bool text_save_method(Text *txt, const char *filename, enum TextSaveMethod method) {
    505 	return text_saveat_method(txt, AT_FDCWD, filename, method);
    506 }
    507 
    508 bool text_saveat_method(Text *txt, int dirfd, const char *filename, enum TextSaveMethod method) {
    509 	if (!filename) {
    510 		text_saved(txt, NULL);
    511 		return true;
    512 	}
    513 	TextSave *ctx = text_save_begin(txt, dirfd, filename, method);
    514 	if (!ctx)
    515 		return false;
    516 	Filerange range = (Filerange){ .start = 0, .end = text_size(txt) };
    517 	ssize_t written = text_save_write_range(ctx, &range);
    518 	if (written == -1 || (size_t)written != text_range_size(&range)) {
    519 		text_save_cancel(ctx);
    520 		return false;
    521 	}
    522 	return text_save_commit(ctx);
    523 }
    524 
    525 ssize_t text_save_write_range(TextSave *ctx, const Filerange *range) {
    526 	return text_write_range(ctx->txt, range, ctx->fd);
    527 }
    528 
    529 ssize_t text_write(const Text *txt, int fd) {
    530 	Filerange r = (Filerange){ .start = 0, .end = text_size(txt) };
    531 	return text_write_range(txt, &r, fd);
    532 }
    533 
    534 ssize_t text_write_range(const Text *txt, const Filerange *range, int fd) {
    535 	size_t size = text_range_size(range), rem = size;
    536 	for (Iterator it = text_iterator_get(txt, range->start);
    537 	     rem > 0 && text_iterator_valid(&it);
    538 	     text_iterator_next(&it)) {
    539 		size_t prem = it.end - it.text;
    540 		if (prem > rem)
    541 			prem = rem;
    542 		ssize_t written = write_all(fd, it.text, prem);
    543 		if (written == -1)
    544 			return -1;
    545 		rem -= written;
    546 		if ((size_t)written != prem)
    547 			break;
    548 	}
    549 	return size - rem;
    550 }