git-query

git data extraction tool using c and libgit2

git clone https://9o.is/git/git-query.git

commit 051b9f1965f8d669b01a1f38a33f73e3a93ae651
parent 270f227e07f57d54d1b0d471893c8011e12aa6ff
Author: Jul <jul@9o.is>
Date:   Mon,  2 Mar 2026 15:49:30 +0800

save git-query

Diffstat:
MMakefile | 6+++++-
Agit-query.c | 775+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 780 insertions(+), 1 deletion(-)

diff --git a/Makefile b/Makefile @@ -29,7 +29,8 @@ COMPATSRC = \ strlcpy.c BIN = \ stagit\ - stagit-index + stagit-index\ + git-query MAN1 = \ stagit.1\ stagit-index.1 @@ -73,6 +74,9 @@ stagit: stagit.o ${COMPATOBJ} stagit-index: stagit-index.o ${COMPATOBJ} ${CC} -o $@ stagit-index.o ${COMPATOBJ} ${STAGIT_LDFLAGS} +git-query: git-query.o ${COMPATOBJ} + ${CC} -o $@ git-query.o ${COMPATOBJ} ${STAGIT_LDFLAGS} + clean: rm -f ${BIN} ${OBJ} ${NAME}-${VERSION}.tar.gz diff --git a/git-query.c b/git-query.c @@ -0,0 +1,775 @@ +#define _POSIX_C_SOURCE 200809L +#include <sys/param.h> +#include <sys/stat.h> +#include <sys/types.h> + +#include <err.h> +#include <errno.h> +#include <limits.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <unistd.h> + +#include <git2.h> + +#include "compat.h" + +#define LEN(s) (sizeof(s)/sizeof(*s)) + +struct deltainfo { + git_patch *patch; + + size_t addcount; + size_t delcount; +}; + +struct commitinfo { + const git_oid *id; + + char oid[GIT_OID_HEXSZ + 1]; + char parentoid[GIT_OID_HEXSZ + 1]; + + const git_signature *author; + const char *summary; + const char *msg; + + git_diff *diff; + git_commit *commit; + git_commit *parent; + git_tree *commit_tree; + git_tree *parent_tree; + + size_t addcount; + size_t delcount; + size_t filecount; + + struct deltainfo **deltas; + size_t ndeltas; +}; + +static git_repository *repo; +static const char *repodir; + +void +checkfileerror(FILE *fp, const char *name, int mode) +{ + if (mode == 'r' && ferror(fp)) + errx(1, "read error: %s", name); + else if (mode == 'w' && (fflush(fp) || ferror(fp))) + errx(1, "write error: %s", name); +} + +void +joinpath(char *buf, size_t bufsiz, const char *path, const char *path2) +{ + int r; + + r = snprintf(buf, bufsiz, "%s%s%s", + path, path[0] && path[strlen(path) - 1] != '/' ? "/" : "", path2); + if (r < 0 || (size_t)r >= bufsiz) + errx(1, "path truncated: '%s%s%s'", + path, path[0] && path[strlen(path) - 1] != '/' ? "/" : "", path2); +} + +void +xmlencode(FILE *fp, const char *s, size_t len) +{ + size_t i; + + for (i = 0; *s && i < len; s++, i++) { + switch(*s) { + case '<': fputs("&lt;", fp); break; + case '>': fputs("&gt;", fp); break; + case '\'': fputs("&#39;", fp); break; + case '&': fputs("&amp;", fp); break; + case '"': fputs("&quot;", fp); break; + default: putc(*s, fp); + } + } +} + +void +xmlencodeline(FILE *fp, const char *s, size_t len) +{ + size_t i; + + for (i = 0; *s && i < len; s++, i++) { + switch(*s) { + case '<': fputs("&lt;", fp); break; + case '>': fputs("&gt;", fp); break; + case '\'': fputs("&#39;", fp); break; + case '&': fputs("&amp;", fp); break; + case '"': fputs("&quot;", fp); break; + case '\r': break; + case '\n': break; + default: putc(*s, fp); + } + } +} + +void +printtimez(FILE *fp, const git_time *intime) +{ + struct tm *intm; + time_t t; + char out[32]; + + t = (time_t)intime->time; + if (!(intm = gmtime(&t))) + return; + strftime(out, sizeof(out), "%Y-%m-%dT%H:%M:%SZ", intm); + fputs(out, fp); +} + +void +deltainfo_free(struct deltainfo *di) +{ + if (!di) + return; + git_patch_free(di->patch); + memset(di, 0, sizeof(*di)); + free(di); +} + +void +commitinfo_free(struct commitinfo *ci) +{ + size_t i; + + if (!ci) + return; + if (ci->deltas) + for (i = 0; i < ci->ndeltas; i++) + deltainfo_free(ci->deltas[i]); + + free(ci->deltas); + git_diff_free(ci->diff); + git_tree_free(ci->commit_tree); + git_tree_free(ci->parent_tree); + git_commit_free(ci->commit); + git_commit_free(ci->parent); + memset(ci, 0, sizeof(*ci)); + free(ci); +} + +struct commitinfo * +commitinfo_getbyoid(const git_oid *id) +{ + struct commitinfo *ci; + + if (!(ci = calloc(1, sizeof(struct commitinfo)))) + err(1, "calloc"); + + if (git_commit_lookup(&(ci->commit), repo, id)) + goto err; + ci->id = id; + + git_oid_tostr(ci->oid, sizeof(ci->oid), git_commit_id(ci->commit)); + git_oid_tostr(ci->parentoid, sizeof(ci->parentoid), git_commit_parent_id(ci->commit, 0)); + + ci->author = git_commit_author(ci->commit); + ci->summary = git_commit_summary(ci->commit); + ci->msg = git_commit_message(ci->commit); + + return ci; + +err: + commitinfo_free(ci); + return NULL; +} + +int +commitinfo_getdiff(struct commitinfo *ci) +{ + struct deltainfo *di; + git_diff_options opts; + git_diff_find_options fopts; + const git_diff_delta *delta; + const git_diff_hunk *hunk; + const git_diff_line *line; + git_patch *patch = NULL; + size_t ndeltas, nhunks, nhunklines; + size_t i, j, k; + + if (git_tree_lookup(&(ci->commit_tree), repo, git_commit_tree_id(ci->commit))) + goto err; + if (!git_commit_parent(&(ci->parent), ci->commit, 0)) { + if (git_tree_lookup(&(ci->parent_tree), repo, git_commit_tree_id(ci->parent))) { + ci->parent = NULL; + ci->parent_tree = NULL; + } + } + + git_diff_init_options(&opts, GIT_DIFF_OPTIONS_VERSION); + opts.flags |= GIT_DIFF_DISABLE_PATHSPEC_MATCH | + GIT_DIFF_IGNORE_SUBMODULES | + GIT_DIFF_INCLUDE_TYPECHANGE; + if (git_diff_tree_to_tree(&(ci->diff), repo, ci->parent_tree, ci->commit_tree, &opts)) + goto err; + + if (git_diff_find_init_options(&fopts, GIT_DIFF_FIND_OPTIONS_VERSION)) + goto err; + fopts.flags |= GIT_DIFF_FIND_RENAMES | GIT_DIFF_FIND_COPIES | + GIT_DIFF_FIND_EXACT_MATCH_ONLY; + if (git_diff_find_similar(ci->diff, &fopts)) + goto err; + + ndeltas = git_diff_num_deltas(ci->diff); + if (ndeltas && !(ci->deltas = calloc(ndeltas, sizeof(struct deltainfo *)))) + err(1, "calloc"); + + for (i = 0; i < ndeltas; i++) { + if (git_patch_from_diff(&patch, ci->diff, i)) + goto err; + + if (!(di = calloc(1, sizeof(struct deltainfo)))) + err(1, "calloc"); + di->patch = patch; + ci->deltas[i] = di; + + delta = git_patch_get_delta(patch); + + if (delta->flags & GIT_DIFF_FLAG_BINARY) + continue; + + nhunks = git_patch_num_hunks(patch); + for (j = 0; j < nhunks; j++) { + if (git_patch_get_hunk(&hunk, &nhunklines, patch, j)) + break; + for (k = 0; ; k++) { + if (git_patch_get_line_in_hunk(&line, patch, j, k)) + break; + if (line->old_lineno == -1) { + di->addcount++; + ci->addcount++; + } else if (line->new_lineno == -1) { + di->delcount++; + ci->delcount++; + } + } + } + } + ci->ndeltas = i; + ci->filecount = i; + + return 0; + +err: + git_diff_free(ci->diff); + ci->diff = NULL; + git_tree_free(ci->commit_tree); + ci->commit_tree = NULL; + git_tree_free(ci->parent_tree); + ci->parent_tree = NULL; + git_commit_free(ci->parent); + ci->parent = NULL; + + if (ci->deltas) + for (i = 0; i < ci->ndeltas; i++) + deltainfo_free(ci->deltas[i]); + free(ci->deltas); + ci->deltas = NULL; + ci->ndeltas = 0; + ci->addcount = 0; + ci->delcount = 0; + ci->filecount = 0; + + return -1; +} + +void +print_info(FILE *fp) +{ + char path[PATH_MAX], repodirabs[PATH_MAX + 1], *p; + char description[255] = ""; + char owner[255] = ""; + char cloneurl[1024] = ""; + FILE *fpread; + + if (!realpath(repodir, repodirabs)) + err(1, "realpath"); + + if ((p = strrchr(repodirabs, '/'))) + p++; + else + p = ""; + + fputs("<repository>\n", fp); + fputs("<name>", fp); + xmlencode(fp, p, strlen(p)); + fputs("</name>\n", fp); + + joinpath(path, sizeof(path), repodir, "description"); + if (!(fpread = fopen(path, "r"))) { + joinpath(path, sizeof(path), repodir, ".git/description"); + fpread = fopen(path, "r"); + } + if (fpread) { + if (!fgets(description, sizeof(description), fpread)) + description[0] = '\0'; + fclose(fpread); + } + fputs("<description>", fp); + xmlencode(fp, description, strlen(description)); + fputs("</description>\n", fp); + + joinpath(path, sizeof(path), repodir, "owner"); + if (!(fpread = fopen(path, "r"))) { + joinpath(path, sizeof(path), repodir, ".git/owner"); + fpread = fopen(path, "r"); + } + if (fpread) { + if (!fgets(owner, sizeof(owner), fpread)) + owner[0] = '\0'; + fclose(fpread); + owner[strcspn(owner, "\n")] = '\0'; + } + fputs("<owner>", fp); + xmlencode(fp, owner, strlen(owner)); + fputs("</owner>\n", fp); + + joinpath(path, sizeof(path), repodir, "url"); + if (!(fpread = fopen(path, "r"))) { + joinpath(path, sizeof(path), repodir, ".git/url"); + fpread = fopen(path, "r"); + } + if (fpread) { + if (!fgets(cloneurl, sizeof(cloneurl), fpread)) + cloneurl[0] = '\0'; + fclose(fpread); + cloneurl[strcspn(cloneurl, "\n")] = '\0'; + } + fputs("<cloneurl>", fp); + xmlencode(fp, cloneurl, strlen(cloneurl)); + fputs("</cloneurl>\n", fp); + + fputs("</repository>\n", fp); +} + +void +print_log(FILE *fp, size_t nlogcommits) +{ + git_object *obj = NULL; + const git_oid *head = NULL; + struct commitinfo *ci; + git_revwalk *w = NULL; + git_oid id; + size_t count = 0; + + if (git_revparse_single(&obj, repo, "HEAD")) + errx(1, "cannot resolve HEAD"); + head = git_object_id(obj); + + fprintf(fp, "<log count=\"%zu\">\n", nlogcommits); + + git_revwalk_new(&w, repo); + git_revwalk_push(w, head); + + while (!git_revwalk_next(&id, w)) { + if (nlogcommits && count >= nlogcommits) + break; + + if (!(ci = commitinfo_getbyoid(&id))) + break; + if (commitinfo_getdiff(ci) == -1) + goto next; + + fprintf(fp, "<commit oid=\"%s\"", ci->oid); + if (ci->parentoid[0]) + fprintf(fp, " parent=\"%s\"", ci->parentoid); + fputs(">\n", fp); + + if (ci->author) { + fputs("<author name=\"", fp); + xmlencode(fp, ci->author->name, strlen(ci->author->name)); + fputs("\" email=\"", fp); + xmlencode(fp, ci->author->email, strlen(ci->author->email)); + fputs("\" when=\"", fp); + printtimez(fp, &(ci->author->when)); + fputs("\"/>\n", fp); + } + + fputs("<summary>", fp); + xmlencode(fp, ci->summary, strlen(ci->summary)); + fputs("</summary>\n", fp); + + fputs("<message>", fp); + xmlencode(fp, ci->msg, strlen(ci->msg)); + fputs("</message>\n", fp); + + fprintf(fp, "<stats files=\"%zu\" add=\"%zu\" del=\"%zu\"/>\n", + ci->filecount, ci->addcount, ci->delcount); + + fputs("</commit>\n", fp); + count++; + +next: + commitinfo_free(ci); + } + + git_revwalk_free(w); + git_object_free(obj); + + fputs("</log>\n", fp); +} + +void +print_commit(FILE *fp, const char *oidstr) +{ + struct commitinfo *ci; + git_oid id; + git_object *obj = NULL; + const git_diff_delta *delta; + const git_diff_hunk *hunk; + const git_diff_line *line; + git_patch *patch; + size_t nhunks, nhunklines; + size_t i, j, k; + int c; + + if (!oidstr) { + if (git_revparse_single(&obj, repo, "HEAD")) + errx(1, "cannot resolve HEAD"); + git_oid_cpy(&id, git_object_id(obj)); + git_object_free(obj); + } else { + if (git_oid_fromstrn(&id, oidstr, strlen(oidstr))) + errx(1, "invalid oid: %s", oidstr); + } + + if (!(ci = commitinfo_getbyoid(&id))) + errx(1, "commit not found: %s", oidstr); + + if (commitinfo_getdiff(ci) == -1) + errx(1, "failed to get diff for commit: %s", oidstr); + + fprintf(fp, "<commit oid=\"%s\"", ci->oid); + if (ci->parentoid[0]) + fprintf(fp, " parent=\"%s\"", ci->parentoid); + fputs(">\n", fp); + + if (ci->author) { + fputs("<author name=\"", fp); + xmlencode(fp, ci->author->name, strlen(ci->author->name)); + fputs("\" email=\"", fp); + xmlencode(fp, ci->author->email, strlen(ci->author->email)); + fputs("\" when=\"", fp); + printtimez(fp, &(ci->author->when)); + fputs("\"/>\n", fp); + } + + fputs("<message>", fp); + xmlencode(fp, ci->msg, strlen(ci->msg)); + fputs("</message>\n", fp); + + fputs("<diff>\n", fp); + for (i = 0; i < ci->ndeltas; i++) { + patch = ci->deltas[i]->patch; + delta = git_patch_get_delta(patch); + + switch (delta->status) { + case GIT_DELTA_ADDED: c = 'A'; break; + case GIT_DELTA_COPIED: c = 'C'; break; + case GIT_DELTA_DELETED: c = 'D'; break; + case GIT_DELTA_MODIFIED: c = 'M'; break; + case GIT_DELTA_RENAMED: c = 'R'; break; + case GIT_DELTA_TYPECHANGE: c = 'T'; break; + default: c = ' '; break; + } + + fprintf(fp, "<file path=\""); + xmlencode(fp, delta->new_file.path, strlen(delta->new_file.path)); + fprintf(fp, "\" status=\"%c\" add=\"%zu\" del=\"%zu\">\n", + c, ci->deltas[i]->addcount, ci->deltas[i]->delcount); + + if (delta->flags & GIT_DIFF_FLAG_BINARY) { + fputs("<binary/>\n", fp); + fputs("</file>\n", fp); + continue; + } + + nhunks = git_patch_num_hunks(patch); + for (j = 0; j < nhunks; j++) { + if (git_patch_get_hunk(&hunk, &nhunklines, patch, j)) + break; + + fputs("<hunk>", fp); + xmlencode(fp, hunk->header, hunk->header_len); + fputs("</hunk>\n", fp); + + for (k = 0; ; k++) { + if (git_patch_get_line_in_hunk(&line, patch, j, k)) + break; + + if (line->old_lineno == -1) + fputs("<line type=\"added\">", fp); + else if (line->new_lineno == -1) + fputs("<line type=\"removed\">", fp); + else + fputs("<line type=\"context\">", fp); + + xmlencodeline(fp, line->content, line->content_len); + fputs("</line>\n", fp); + } + } + fputs("</file>\n", fp); + } + fputs("</diff>\n", fp); + fputs("</commit>\n", fp); + + commitinfo_free(ci); +} + +void +print_tree(FILE *fp, const char *oidstr) +{ + git_tree *tree = NULL; + git_commit *commit = NULL; + git_oid id, tree_id; + const git_tree_entry *entry = NULL; + const char *entryname; + size_t count, i; + char oid[GIT_OID_HEXSZ + 1]; + + if (oidstr) { + if (git_oid_fromstrn(&id, oidstr, strlen(oidstr))) + errx(1, "invalid oid: %s", oidstr); + if (git_commit_lookup(&commit, repo, &id)) + errx(1, "commit not found: %s", oidstr); + git_commit_tree(&tree, commit); + } else { + git_object *obj = NULL; + if (git_revparse_single(&obj, repo, "HEAD")) + errx(1, "cannot resolve HEAD"); + git_commit_lookup(&commit, repo, git_object_id(obj)); + git_commit_tree(&tree, commit); + git_object_free(obj); + } + + git_oid_tostr(oid, sizeof(oid), git_tree_id(tree)); + fprintf(fp, "<tree oid=\"%s\">\n", oid); + + count = git_tree_entrycount(tree); + for (i = 0; i < count; i++) { + if (!(entry = git_tree_entry_byindex(tree, i)) || + !(entryname = git_tree_entry_name(entry))) + continue; + + fprintf(fp, "<entry path=\"", fp); + xmlencode(fp, entryname, strlen(entryname)); + fputs("\" mode=\"", fp); + + switch (git_tree_entry_type(entry)) { + case GIT_OBJ_BLOB: { + git_object *blobobj = NULL; + if (!git_tree_entry_to_object(&blobobj, repo, entry)) { + git_blob *b = (git_blob *)blobobj; + fprintf(fp, "%o\" type=\"blob\" oid=\"", git_tree_entry_filemode(entry)); + git_oid_tostr(oid, sizeof(oid), git_tree_entry_id(entry)); + fprintf(fp, "%s\" size=\"%zu\"/>\n", oid, git_blob_rawsize(b)); + git_object_free(blobobj); + } + break; + } + case GIT_OBJ_TREE: + fprintf(fp, "%o", git_tree_entry_filemode(entry)); + fputs("\" type=\"tree\" oid=\"", fp); + git_oid_tostr(oid, sizeof(oid), git_tree_entry_id(entry)); + fputs("\"/>\n", fp); + break; + case GIT_OBJ_COMMIT: + fputs("m---------\" type=\"commit\" oid=\"", fp); + git_oid_tostr(oid, sizeof(oid), git_tree_entry_id(entry)); + fputs("\"/>\n", fp); + break; + default: + break; + } + } + + fputs("</tree>\n", fp); + + git_tree_free(tree); + git_commit_free(commit); +} + +void +print_blob(FILE *fp, const char *path, const char *oidstr) +{ + git_object *obj = NULL; + git_blob *blob = NULL; + git_oid id; + char oid[GIT_OID_HEXSZ + 1]; + + if (oidstr) { + if (git_oid_fromstrn(&id, oidstr, strlen(oidstr))) + errx(1, "invalid oid: %s", oidstr); + } else { + char refspec[PATH_MAX]; + snprintf(refspec, sizeof(refspec), "HEAD:%s", path); + if (git_revparse_single(&obj, repo, refspec)) + errx(1, "path not found: %s", path); + git_oid_cpy(&id, git_object_id(obj)); + git_object_free(obj); + } + + if (git_blob_lookup(&blob, repo, &id)) + errx(1, "blob not found: %s", oidstr ? oidstr : path); + + git_oid_tostr(oid, sizeof(oid), &id); + + fprintf(fp, "<blob oid=\"%s\" size=\"%zu\">\n", oid, git_blob_rawsize(blob)); + + if (git_blob_is_binary(blob)) { + fputs("<binary/></blob>\n", fp); + } else { + fputs("<content>\n", fp); + xmlencodeline(fp, git_blob_rawcontent(blob), git_blob_rawsize(blob)); + fputs("</content></blob>\n", fp); + } + + git_blob_free(blob); +} + +void +print_refs(FILE *fp) +{ + git_reference_iterator *it = NULL; + git_reference *ref = NULL; + git_object *obj = NULL; + const git_oid *id; + char oid[GIT_OID_HEXSZ + 1]; + int is_branch; + + fputs("<refs>\n", fp); + + if (git_reference_iterator_new(&it, repo)) + errx(1, "failed to iterate references"); + + while (!git_reference_next(&ref, it)) { + if (!git_reference_is_branch(ref) && !git_reference_is_tag(ref)) { + git_reference_free(ref); + continue; + } + + is_branch = git_reference_is_branch(ref); + + switch (git_reference_type(ref)) { + case GIT_REF_SYMBOLIC: + git_reference_free(ref); + ref = NULL; + if (git_reference_resolve(&ref, ref)) + continue; + break; + case GIT_REF_OID: + break; + default: + git_reference_free(ref); + continue; + } + + if (!git_reference_target(ref) || git_reference_peel(&obj, ref, GIT_OBJ_ANY)) + goto next; + + id = git_object_id(obj); + + if (is_branch) { + fprintf(fp, "<branch name=\""); + } else { + fprintf(fp, "<tag name=\""); + } + xmlencode(fp, git_reference_shorthand(ref), strlen(git_reference_shorthand(ref))); + fputs("\" oid=\"", fp); + git_oid_tostr(oid, sizeof(oid), id); + fprintf(fp, "%s\"/>\n", oid); + +next: + git_object_free(obj); + obj = NULL; + git_reference_free(ref); + ref = NULL; + } + + git_reference_iterator_free(it); + + fputs("</refs>\n", fp); +} + +void +usage(void) +{ + fprintf(stderr, "usage: git-query <command> <repo> [args...]\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "Commands:\n"); + fprintf(stderr, " info <repo> # repository metadata\n"); + fprintf(stderr, " log <repo> [N] # commit log (default 100)\n"); + fprintf(stderr, " commit <repo> [oid] # commit detail (default HEAD)\n"); + fprintf(stderr, " tree <repo> [oid] # tree entries (default HEAD)\n"); + fprintf(stderr, " blob <repo> <path> [oid] # blob content (default HEAD)\n"); + fprintf(stderr, " refs <repo> # branches and tags\n"); + exit(1); +} + +int +main(int argc, char *argv[]) +{ + FILE *fp = stdout; + const char *cmd; + int i; + + if (argc < 3) + usage(); + + cmd = argv[1]; + repodir = argv[2]; + + git_libgit2_init(); + for (i = 1; i <= GIT_CONFIG_LEVEL_APP; i++) + git_libgit2_opts(GIT_OPT_SET_SEARCH_PATH, i, ""); + git_libgit2_opts(GIT_OPT_SET_OWNER_VALIDATION, 0); + + if (git_repository_open_ext(&repo, repodir, + GIT_REPOSITORY_OPEN_NO_SEARCH, NULL) < 0) { + fprintf(stderr, "%s: cannot open repository\n", repodir); + return 1; + } + + if (strcmp(cmd, "info") == 0) { + print_info(fp); + } else if (strcmp(cmd, "log") == 0) { + size_t n = 100; + if (argc > 3) { + n = strtoul(argv[3], NULL, 10); + if (n == 0) + errx(1, "invalid count: %s", argv[3]); + } + print_log(fp, n); + } else if (strcmp(cmd, "commit") == 0) { + const char *oid = (argc > 3) ? argv[3] : NULL; + print_commit(fp, oid); + } else if (strcmp(cmd, "tree") == 0) { + const char *oid = (argc > 3) ? argv[3] : NULL; + print_tree(fp, oid); + } else if (strcmp(cmd, "blob") == 0) { + if (argc < 4) + usage(); + const char *path = argv[3]; + const char *oid = (argc > 4) ? argv[4] : NULL; + print_blob(fp, path, oid); + } else if (strcmp(cmd, "refs") == 0) { + print_refs(fp); + } else { + fprintf(stderr, "unknown command: %s\n", cmd); + usage(); + } + + checkfileerror(fp, "<stdout>", 'w'); + + git_repository_free(repo); + git_libgit2_shutdown(); + + return 0; +}