git-query
git data extraction tool using c and libgit2
git clone https://9o.is/git/git-query.git
git-query.c
(16368B)
1 #define _POSIX_C_SOURCE 200809L
2 #include <sys/param.h>
3 #include <sys/stat.h>
4 #include <sys/types.h>
5
6 #include <err.h>
7 #include <errno.h>
8 #include <limits.h>
9 #include <stdint.h>
10 #include <stdio.h>
11 #include <stdlib.h>
12 #include <string.h>
13 #include <time.h>
14 #include <unistd.h>
15
16 #include <git2.h>
17
18 #include "compat.h"
19
20 #define LEN(s) (sizeof(s)/sizeof(*s))
21
22 struct deltainfo {
23 git_patch *patch;
24
25 size_t addcount;
26 size_t delcount;
27 };
28
29 struct commitinfo {
30 const git_oid *id;
31
32 char oid[GIT_OID_HEXSZ + 1];
33 char parentoid[GIT_OID_HEXSZ + 1];
34
35 const git_signature *author;
36 const char *summary;
37 const char *msg;
38
39 git_diff *diff;
40 git_commit *commit;
41 git_commit *parent;
42 git_tree *commit_tree;
43 git_tree *parent_tree;
44
45 size_t addcount;
46 size_t delcount;
47 size_t filecount;
48
49 struct deltainfo **deltas;
50 size_t ndeltas;
51 };
52
53 static git_repository *repo;
54 static const char *repodir;
55
56 void
57 checkfileerror(FILE *fp, const char *name, int mode)
58 {
59 if (mode == 'r' && ferror(fp))
60 errx(1, "read error: %s", name);
61 else if (mode == 'w' && (fflush(fp) || ferror(fp)))
62 errx(1, "write error: %s", name);
63 }
64
65 void
66 joinpath(char *buf, size_t bufsiz, const char *path, const char *path2)
67 {
68 int r;
69
70 r = snprintf(buf, bufsiz, "%s%s%s",
71 path, path[0] && path[strlen(path) - 1] != '/' ? "/" : "", path2);
72 if (r < 0 || (size_t)r >= bufsiz)
73 errx(1, "path truncated: '%s%s%s'",
74 path, path[0] && path[strlen(path) - 1] != '/' ? "/" : "", path2);
75 }
76
77 void
78 xmlencode(FILE *fp, const char *s, size_t len)
79 {
80 size_t i;
81
82 for (i = 0; *s && i < len; s++, i++) {
83 switch(*s) {
84 case '<': fputs("<", fp); break;
85 case '>': fputs(">", fp); break;
86 case '\'': fputs("'", fp); break;
87 case '&': fputs("&", fp); break;
88 case '"': fputs(""", fp); break;
89 default: putc(*s, fp);
90 }
91 }
92 }
93
94 void
95 xmlencodeline(FILE *fp, const char *s, size_t len)
96 {
97 size_t i;
98
99 for (i = 0; *s && i < len; s++, i++) {
100 switch(*s) {
101 case '<': fputs("<", fp); break;
102 case '>': fputs(">", fp); break;
103 case '\'': fputs("'", fp); break;
104 case '&': fputs("&", fp); break;
105 case '"': fputs(""", fp); break;
106 case '\r': break;
107 case '\n': break;
108 default: putc(*s, fp);
109 }
110 }
111 }
112
113 void
114 printtimez(FILE *fp, const git_time *intime)
115 {
116 struct tm *intm;
117 time_t t;
118 char out[32];
119
120 t = (time_t)intime->time;
121 if (!(intm = gmtime(&t)))
122 return;
123 strftime(out, sizeof(out), "%Y-%m-%dT%H:%M:%SZ", intm);
124 fputs(out, fp);
125 }
126
127 void
128 deltainfo_free(struct deltainfo *di)
129 {
130 if (!di)
131 return;
132 git_patch_free(di->patch);
133 memset(di, 0, sizeof(*di));
134 free(di);
135 }
136
137 void
138 commitinfo_free(struct commitinfo *ci)
139 {
140 size_t i;
141
142 if (!ci)
143 return;
144 if (ci->deltas)
145 for (i = 0; i < ci->ndeltas; i++)
146 deltainfo_free(ci->deltas[i]);
147
148 free(ci->deltas);
149 git_diff_free(ci->diff);
150 git_tree_free(ci->commit_tree);
151 git_tree_free(ci->parent_tree);
152 git_commit_free(ci->commit);
153 git_commit_free(ci->parent);
154 memset(ci, 0, sizeof(*ci));
155 free(ci);
156 }
157
158 struct commitinfo *
159 commitinfo_getbyoid(const git_oid *id)
160 {
161 struct commitinfo *ci;
162
163 if (!(ci = calloc(1, sizeof(struct commitinfo))))
164 err(1, "calloc");
165
166 if (git_commit_lookup(&(ci->commit), repo, id))
167 goto err;
168 ci->id = id;
169
170 git_oid_tostr(ci->oid, sizeof(ci->oid), git_commit_id(ci->commit));
171 git_oid_tostr(ci->parentoid, sizeof(ci->parentoid), git_commit_parent_id(ci->commit, 0));
172
173 ci->author = git_commit_author(ci->commit);
174 ci->summary = git_commit_summary(ci->commit);
175 ci->msg = git_commit_message(ci->commit);
176
177 return ci;
178
179 err:
180 commitinfo_free(ci);
181 return NULL;
182 }
183
184 int
185 commitinfo_getdiff(struct commitinfo *ci)
186 {
187 struct deltainfo *di;
188 git_diff_options opts;
189 git_diff_find_options fopts;
190 const git_diff_delta *delta;
191 const git_diff_hunk *hunk;
192 const git_diff_line *line;
193 git_patch *patch = NULL;
194 size_t ndeltas, nhunks, nhunklines;
195 size_t i, j, k;
196
197 if (git_tree_lookup(&(ci->commit_tree), repo, git_commit_tree_id(ci->commit)))
198 goto err;
199 if (!git_commit_parent(&(ci->parent), ci->commit, 0)) {
200 if (git_tree_lookup(&(ci->parent_tree), repo, git_commit_tree_id(ci->parent))) {
201 ci->parent = NULL;
202 ci->parent_tree = NULL;
203 }
204 }
205
206 git_diff_init_options(&opts, GIT_DIFF_OPTIONS_VERSION);
207 opts.flags |= GIT_DIFF_DISABLE_PATHSPEC_MATCH |
208 GIT_DIFF_IGNORE_SUBMODULES |
209 GIT_DIFF_INCLUDE_TYPECHANGE;
210 if (git_diff_tree_to_tree(&(ci->diff), repo, ci->parent_tree, ci->commit_tree, &opts))
211 goto err;
212
213 if (git_diff_find_init_options(&fopts, GIT_DIFF_FIND_OPTIONS_VERSION))
214 goto err;
215 fopts.flags |= GIT_DIFF_FIND_RENAMES | GIT_DIFF_FIND_COPIES |
216 GIT_DIFF_FIND_EXACT_MATCH_ONLY;
217 if (git_diff_find_similar(ci->diff, &fopts))
218 goto err;
219
220 ndeltas = git_diff_num_deltas(ci->diff);
221 if (ndeltas && !(ci->deltas = calloc(ndeltas, sizeof(struct deltainfo *))))
222 err(1, "calloc");
223
224 for (i = 0; i < ndeltas; i++) {
225 if (git_patch_from_diff(&patch, ci->diff, i))
226 goto err;
227
228 if (!(di = calloc(1, sizeof(struct deltainfo))))
229 err(1, "calloc");
230 di->patch = patch;
231 ci->deltas[i] = di;
232
233 delta = git_patch_get_delta(patch);
234
235 if (delta->flags & GIT_DIFF_FLAG_BINARY)
236 continue;
237
238 nhunks = git_patch_num_hunks(patch);
239 for (j = 0; j < nhunks; j++) {
240 if (git_patch_get_hunk(&hunk, &nhunklines, patch, j))
241 break;
242 for (k = 0; ; k++) {
243 if (git_patch_get_line_in_hunk(&line, patch, j, k))
244 break;
245 if (line->old_lineno == -1) {
246 di->addcount++;
247 ci->addcount++;
248 } else if (line->new_lineno == -1) {
249 di->delcount++;
250 ci->delcount++;
251 }
252 }
253 }
254 }
255 ci->ndeltas = i;
256 ci->filecount = i;
257
258 return 0;
259
260 err:
261 git_diff_free(ci->diff);
262 ci->diff = NULL;
263 git_tree_free(ci->commit_tree);
264 ci->commit_tree = NULL;
265 git_tree_free(ci->parent_tree);
266 ci->parent_tree = NULL;
267 git_commit_free(ci->parent);
268 ci->parent = NULL;
269
270 if (ci->deltas)
271 for (i = 0; i < ci->ndeltas; i++)
272 deltainfo_free(ci->deltas[i]);
273 free(ci->deltas);
274 ci->deltas = NULL;
275 ci->ndeltas = 0;
276 ci->addcount = 0;
277 ci->delcount = 0;
278 ci->filecount = 0;
279
280 return -1;
281 }
282
283 void
284 print_log(FILE *fp, size_t nlogcommits)
285 {
286 git_object *obj = NULL;
287 const git_oid *head = NULL;
288 struct commitinfo *ci;
289 git_revwalk *w = NULL;
290 git_oid id;
291 size_t count = 0;
292
293 if (git_revparse_single(&obj, repo, "HEAD"))
294 errx(1, "cannot resolve HEAD");
295 head = git_object_id(obj);
296
297 fprintf(fp, "<log count=\"%zu\">\n", nlogcommits);
298
299 git_revwalk_new(&w, repo);
300 git_revwalk_push(w, head);
301
302 while (!git_revwalk_next(&id, w)) {
303 if (nlogcommits && count >= nlogcommits)
304 break;
305
306 if (!(ci = commitinfo_getbyoid(&id)))
307 break;
308 if (commitinfo_getdiff(ci) == -1)
309 goto next;
310
311 fprintf(fp, "<commit oid=\"%s\"", ci->oid);
312 if (ci->parentoid[0])
313 fprintf(fp, " parent=\"%s\"", ci->parentoid);
314 fputs(">\n", fp);
315
316 if (ci->author) {
317 fputs("<author name=\"", fp);
318 xmlencode(fp, ci->author->name, strlen(ci->author->name));
319 fputs("\" email=\"", fp);
320 xmlencode(fp, ci->author->email, strlen(ci->author->email));
321 fputs("\" when=\"", fp);
322 printtimez(fp, &(ci->author->when));
323 fputs("\"/>\n", fp);
324 }
325
326 fputs("<summary>", fp);
327 xmlencode(fp, ci->summary, strlen(ci->summary));
328 fputs("</summary>\n", fp);
329
330 fputs("<message>", fp);
331 xmlencode(fp, ci->msg, strlen(ci->msg));
332 fputs("</message>\n", fp);
333
334 fprintf(fp, "<stats files=\"%zu\" add=\"%zu\" del=\"%zu\"/>\n",
335 ci->filecount, ci->addcount, ci->delcount);
336
337 fputs("</commit>\n", fp);
338 count++;
339
340 next:
341 commitinfo_free(ci);
342 }
343
344 git_revwalk_free(w);
345 git_object_free(obj);
346
347 fputs("</log>\n", fp);
348 }
349
350 void
351 print_commit(FILE *fp, const char *oidstr)
352 {
353 struct commitinfo *ci;
354 git_oid id;
355 git_object *obj = NULL;
356 const git_diff_delta *delta;
357 const git_diff_hunk *hunk;
358 const git_diff_line *line;
359 git_patch *patch;
360 size_t nhunks, nhunklines;
361 size_t i, j, k;
362 int c;
363
364 if (!oidstr) {
365 if (git_revparse_single(&obj, repo, "HEAD"))
366 errx(1, "cannot resolve HEAD");
367 git_oid_cpy(&id, git_object_id(obj));
368 git_object_free(obj);
369 } else {
370 if (git_oid_fromstrn(&id, oidstr, strlen(oidstr)))
371 errx(1, "invalid oid: %s", oidstr);
372 }
373
374 if (!(ci = commitinfo_getbyoid(&id)))
375 errx(1, "commit not found: %s", oidstr);
376
377 if (commitinfo_getdiff(ci) == -1)
378 errx(1, "failed to get diff for commit: %s", oidstr);
379
380 fprintf(fp, "<commit oid=\"%s\"", ci->oid);
381 if (ci->parentoid[0])
382 fprintf(fp, " parent=\"%s\"", ci->parentoid);
383 fputs(">\n", fp);
384
385 if (ci->author) {
386 fputs("<author name=\"", fp);
387 xmlencode(fp, ci->author->name, strlen(ci->author->name));
388 fputs("\" email=\"", fp);
389 xmlencode(fp, ci->author->email, strlen(ci->author->email));
390 fputs("\" when=\"", fp);
391 printtimez(fp, &(ci->author->when));
392 fputs("\"/>\n", fp);
393 }
394
395 fputs("<message>", fp);
396 xmlencode(fp, ci->msg, strlen(ci->msg));
397 fputs("</message>\n", fp);
398
399 fputs("<diff>\n", fp);
400 for (i = 0; i < ci->ndeltas; i++) {
401 patch = ci->deltas[i]->patch;
402 delta = git_patch_get_delta(patch);
403
404 switch (delta->status) {
405 case GIT_DELTA_ADDED: c = 'A'; break;
406 case GIT_DELTA_COPIED: c = 'C'; break;
407 case GIT_DELTA_DELETED: c = 'D'; break;
408 case GIT_DELTA_MODIFIED: c = 'M'; break;
409 case GIT_DELTA_RENAMED: c = 'R'; break;
410 case GIT_DELTA_TYPECHANGE: c = 'T'; break;
411 default: c = ' '; break;
412 }
413
414 fprintf(fp, "<file path=\"");
415 xmlencode(fp, delta->new_file.path, strlen(delta->new_file.path));
416 fprintf(fp, "\" status=\"%c\" add=\"%zu\" del=\"%zu\">\n",
417 c, ci->deltas[i]->addcount, ci->deltas[i]->delcount);
418
419 if (delta->flags & GIT_DIFF_FLAG_BINARY) {
420 fputs("<binary/>\n", fp);
421 fputs("</file>\n", fp);
422 continue;
423 }
424
425 nhunks = git_patch_num_hunks(patch);
426 for (j = 0; j < nhunks; j++) {
427 if (git_patch_get_hunk(&hunk, &nhunklines, patch, j))
428 break;
429
430 fputs("<hunk>", fp);
431 xmlencode(fp, hunk->header, hunk->header_len);
432 fputs("</hunk>\n", fp);
433
434 for (k = 0; ; k++) {
435 if (git_patch_get_line_in_hunk(&line, patch, j, k))
436 break;
437
438 if (line->old_lineno == -1)
439 fputs("<line type=\"added\">", fp);
440 else if (line->new_lineno == -1)
441 fputs("<line type=\"removed\">", fp);
442 else
443 fputs("<line type=\"context\">", fp);
444
445 xmlencodeline(fp, line->content, line->content_len);
446 fputs("</line>\n", fp);
447 }
448 }
449 fputs("</file>\n", fp);
450 }
451 fputs("</diff>\n", fp);
452 fputs("</commit>\n", fp);
453
454 commitinfo_free(ci);
455 }
456
457 void
458 print_tree(FILE *fp, const char *oidstr)
459 {
460 git_tree *tree = NULL;
461 git_commit *commit = NULL;
462 git_oid id, tree_id;
463 const git_tree_entry *entry = NULL;
464 const char *entryname;
465 size_t count, i;
466 char oid[GIT_OID_HEXSZ + 1];
467
468 if (oidstr) {
469 if (git_oid_fromstrn(&id, oidstr, strlen(oidstr)))
470 errx(1, "invalid oid: %s", oidstr);
471 if (git_commit_lookup(&commit, repo, &id))
472 errx(1, "commit not found: %s", oidstr);
473 git_commit_tree(&tree, commit);
474 } else {
475 git_object *obj = NULL;
476 if (git_revparse_single(&obj, repo, "HEAD"))
477 errx(1, "cannot resolve HEAD");
478 git_commit_lookup(&commit, repo, git_object_id(obj));
479 git_commit_tree(&tree, commit);
480 git_object_free(obj);
481 }
482
483 git_oid_tostr(oid, sizeof(oid), git_tree_id(tree));
484 fprintf(fp, "<tree oid=\"%s\">\n", oid);
485
486 count = git_tree_entrycount(tree);
487 for (i = 0; i < count; i++) {
488 if (!(entry = git_tree_entry_byindex(tree, i)) ||
489 !(entryname = git_tree_entry_name(entry)))
490 continue;
491
492 fprintf(fp, "<entry path=\"", fp);
493 xmlencode(fp, entryname, strlen(entryname));
494 fputs("\" mode=\"", fp);
495
496 switch (git_tree_entry_type(entry)) {
497 case GIT_OBJ_BLOB: {
498 git_object *blobobj = NULL;
499 if (!git_tree_entry_to_object(&blobobj, repo, entry)) {
500 git_blob *b = (git_blob *)blobobj;
501 fprintf(fp, "%o\" type=\"blob\" oid=\"", git_tree_entry_filemode(entry));
502 git_oid_tostr(oid, sizeof(oid), git_tree_entry_id(entry));
503 fprintf(fp, "%s\" size=\"%zu\"/>\n", oid, git_blob_rawsize(b));
504 git_object_free(blobobj);
505 }
506 break;
507 }
508 case GIT_OBJ_TREE:
509 fprintf(fp, "%o", git_tree_entry_filemode(entry));
510 fputs("\" type=\"tree\" oid=\"", fp);
511 git_oid_tostr(oid, sizeof(oid), git_tree_entry_id(entry));
512 fputs("\"/>\n", fp);
513 break;
514 case GIT_OBJ_COMMIT:
515 fputs("m---------\" type=\"commit\" oid=\"", fp);
516 git_oid_tostr(oid, sizeof(oid), git_tree_entry_id(entry));
517 fputs("\"/>\n", fp);
518 break;
519 default:
520 break;
521 }
522 }
523
524 fputs("</tree>\n", fp);
525
526 git_tree_free(tree);
527 git_commit_free(commit);
528 }
529
530 void
531 print_blob(FILE *fp, const char *path, const char *oidstr)
532 {
533 git_object *obj = NULL;
534 git_blob *blob = NULL;
535 git_oid id;
536
537 if (oidstr) {
538 if (git_oid_fromstrn(&id, oidstr, strlen(oidstr)))
539 errx(1, "invalid oid: %s", oidstr);
540 } else {
541 char refspec[PATH_MAX];
542 snprintf(refspec, sizeof(refspec), "HEAD:%s", path);
543 if (git_revparse_single(&obj, repo, refspec))
544 errx(1, "path not found: %s", path);
545 git_oid_cpy(&id, git_object_id(obj));
546 git_object_free(obj);
547 }
548
549 if (git_blob_lookup(&blob, repo, &id))
550 errx(1, "blob not found: %s", oidstr ? oidstr : path);
551
552 if (git_blob_is_binary(blob)) {
553 fprintf(stderr, "warning: binary file, not outputting content\n");
554 } else {
555 fwrite(git_blob_rawcontent(blob), 1, git_blob_rawsize(blob), fp);
556 }
557
558 git_blob_free(blob);
559 }
560
561 void
562 print_refs(FILE *fp)
563 {
564 git_reference_iterator *it = NULL;
565 git_reference *ref = NULL;
566 git_object *obj = NULL;
567 const git_oid *id;
568 char oid[GIT_OID_HEXSZ + 1];
569 int is_branch;
570
571 fputs("<refs>\n", fp);
572
573 if (git_reference_iterator_new(&it, repo))
574 errx(1, "failed to iterate references");
575
576 while (!git_reference_next(&ref, it)) {
577 if (!git_reference_is_branch(ref) && !git_reference_is_tag(ref)) {
578 git_reference_free(ref);
579 continue;
580 }
581
582 is_branch = git_reference_is_branch(ref);
583
584 switch (git_reference_type(ref)) {
585 case GIT_REF_SYMBOLIC:
586 git_reference_free(ref);
587 ref = NULL;
588 if (git_reference_resolve(&ref, ref))
589 continue;
590 break;
591 case GIT_REF_OID:
592 break;
593 default:
594 git_reference_free(ref);
595 continue;
596 }
597
598 if (!git_reference_target(ref) || git_reference_peel(&obj, ref, GIT_OBJ_ANY))
599 goto next;
600
601 id = git_object_id(obj);
602
603 if (is_branch) {
604 fprintf(fp, "<branch name=\"");
605 } else {
606 fprintf(fp, "<tag name=\"");
607 }
608 xmlencode(fp, git_reference_shorthand(ref), strlen(git_reference_shorthand(ref)));
609 fputs("\" oid=\"", fp);
610 git_oid_tostr(oid, sizeof(oid), id);
611 fprintf(fp, "%s\"/>\n", oid);
612
613 next:
614 git_object_free(obj);
615 obj = NULL;
616 git_reference_free(ref);
617 ref = NULL;
618 }
619
620 git_reference_iterator_free(it);
621
622 fputs("</refs>\n", fp);
623 }
624
625 void
626 usage(void)
627 {
628 fprintf(stderr, "usage: git-query <command> <repo> [args...]\n");
629 fprintf(stderr, "\n");
630 fprintf(stderr, "Commands:\n");
631 fprintf(stderr, " log <repo> [N] # commit log (default 100)\n");
632 fprintf(stderr, " commit <repo> [oid] # commit detail (default HEAD)\n");
633 fprintf(stderr, " tree <repo> [oid] # tree entries (default HEAD)\n");
634 fprintf(stderr, " blob <repo> <path> [oid] # blob content (default HEAD)\n");
635 fprintf(stderr, " refs <repo> # branches and tags\n");
636 exit(1);
637 }
638
639 int
640 main(int argc, char *argv[])
641 {
642 FILE *fp = stdout;
643 const char *cmd;
644 int i;
645
646 if (argc < 3)
647 usage();
648
649 cmd = argv[1];
650 repodir = argv[2];
651
652 git_libgit2_init();
653 for (i = 1; i <= GIT_CONFIG_LEVEL_APP; i++)
654 git_libgit2_opts(GIT_OPT_SET_SEARCH_PATH, i, "");
655 git_libgit2_opts(GIT_OPT_SET_OWNER_VALIDATION, 0);
656
657 if (git_repository_open_ext(&repo, repodir,
658 GIT_REPOSITORY_OPEN_NO_SEARCH, NULL) < 0) {
659 fprintf(stderr, "%s: cannot open repository\n", repodir);
660 return 1;
661 }
662
663 if (strcmp(cmd, "log") == 0) {
664 size_t n = 100;
665 if (argc > 3) {
666 n = strtoul(argv[3], NULL, 10);
667 if (n == 0)
668 errx(1, "invalid count: %s", argv[3]);
669 }
670 print_log(fp, n);
671 } else if (strcmp(cmd, "commit") == 0) {
672 const char *oid = (argc > 3) ? argv[3] : NULL;
673 print_commit(fp, oid);
674 } else if (strcmp(cmd, "tree") == 0) {
675 const char *oid = (argc > 3) ? argv[3] : NULL;
676 print_tree(fp, oid);
677 } else if (strcmp(cmd, "blob") == 0) {
678 if (argc < 4)
679 usage();
680 const char *path = argv[3];
681 const char *oid = (argc > 4) ? argv[4] : NULL;
682 print_blob(fp, path, oid);
683 } else if (strcmp(cmd, "refs") == 0) {
684 print_refs(fp);
685 } else {
686 fprintf(stderr, "unknown command: %s\n", cmd);
687 usage();
688 }
689
690 checkfileerror(fp, "<stdout>", 'w');
691
692 git_repository_free(repo);
693 git_libgit2_shutdown();
694
695 return 0;
696 }