fzy

terminal fuzzy finder picker

git clone https://9o.is/git/fzy.git

match.c

(5849B)


      1 #include <ctype.h>
      2 #include <string.h>
      3 #include <strings.h>
      4 #include <stdio.h>
      5 #include <float.h>
      6 #include <math.h>
      7 #include <stdlib.h>
      8 
      9 #include "match.h"
     10 #include "bonus.h"
     11 
     12 #include "../config.h"
     13 
     14 char *strcasechr(const char *s, char c) {
     15 	const char accept[3] = {c, toupper(c), 0};
     16 	return strpbrk(s, accept);
     17 }
     18 
     19 int has_match(const char *needle, const char *haystack) {
     20 	while (*needle) {
     21 		char nch = *needle++;
     22 
     23 		if (!(haystack = strcasechr(haystack, nch))) {
     24 			return 0;
     25 		}
     26 		haystack++;
     27 	}
     28 	return 1;
     29 }
     30 
     31 #define max(a, b) (((a) > (b)) ? (a) : (b))
     32 
     33 struct match_struct {
     34 	int needle_len;
     35 	int haystack_len;
     36 
     37 	char lower_needle[MATCH_MAX_LEN];
     38 	char lower_haystack[MATCH_MAX_LEN];
     39 
     40 	score_t match_bonus[MATCH_MAX_LEN];
     41 };
     42 
     43 static void precompute_bonus(const char *haystack, score_t *match_bonus) {
     44 	/* Which positions are beginning of words */
     45 	char last_ch = '/';
     46 	for (int i = 0; haystack[i]; i++) {
     47 		char ch = haystack[i];
     48 		match_bonus[i] = COMPUTE_BONUS(last_ch, ch);
     49 		last_ch = ch;
     50 	}
     51 }
     52 
     53 static void setup_match_struct(struct match_struct *match, const char *needle, const char *haystack) {
     54 	match->needle_len = strlen(needle);
     55 	match->haystack_len = strlen(haystack);
     56 
     57 	if (match->haystack_len > MATCH_MAX_LEN || match->needle_len > match->haystack_len) {
     58 		return;
     59 	}
     60 
     61 	for (int i = 0; i < match->needle_len; i++)
     62 		match->lower_needle[i] = tolower(needle[i]);
     63 
     64 	for (int i = 0; i < match->haystack_len; i++)
     65 		match->lower_haystack[i] = tolower(haystack[i]);
     66 
     67 	precompute_bonus(haystack, match->match_bonus);
     68 }
     69 
     70 static inline void match_row(const struct match_struct *match, int row, score_t *curr_D, score_t *curr_M, const score_t *last_D, const score_t *last_M) {
     71 	int n = match->needle_len;
     72 	int m = match->haystack_len;
     73 	int i = row;
     74 
     75 	const char *lower_needle = match->lower_needle;
     76 	const char *lower_haystack = match->lower_haystack;
     77 	const score_t *match_bonus = match->match_bonus;
     78 
     79 	score_t prev_score = SCORE_MIN;
     80 	score_t gap_score = i == n - 1 ? SCORE_GAP_TRAILING : SCORE_GAP_INNER;
     81 
     82 	/* These will not be used with this value, but not all compilers see it */
     83 	score_t prev_M = SCORE_MIN, prev_D = SCORE_MIN;
     84 
     85 	for (int j = 0; j < m; j++) {
     86 		if (lower_needle[i] == lower_haystack[j]) {
     87 			score_t score = SCORE_MIN;
     88 			if (!i) {
     89 				score = (j * SCORE_GAP_LEADING) + match_bonus[j];
     90 			} else if (j) { /* i > 0 && j > 0*/
     91 				score = max(
     92 						prev_M + match_bonus[j],
     93 
     94 						/* consecutive match, doesn't stack with match_bonus */
     95 						prev_D + SCORE_MATCH_CONSECUTIVE);
     96 			}
     97 			prev_D = last_D[j];
     98 			prev_M = last_M[j];
     99 			curr_D[j] = score;
    100 			curr_M[j] = prev_score = max(score, prev_score + gap_score);
    101 		} else {
    102 			prev_D = last_D[j];
    103 			prev_M = last_M[j];
    104 			curr_D[j] = SCORE_MIN;
    105 			curr_M[j] = prev_score = prev_score + gap_score;
    106 		}
    107 	}
    108 }
    109 
    110 score_t match(const char *needle, const char *haystack) {
    111 	if (!*needle)
    112 		return SCORE_MIN;
    113 
    114 	struct match_struct match;
    115 	setup_match_struct(&match, needle, haystack);
    116 
    117 	int n = match.needle_len;
    118 	int m = match.haystack_len;
    119 
    120 	if (m > MATCH_MAX_LEN || n > m) {
    121 		/*
    122 		 * Unreasonably large candidate: return no score
    123 		 * If it is a valid match it will still be returned, it will
    124 		 * just be ranked below any reasonably sized candidates
    125 		 */
    126 		return SCORE_MIN;
    127 	} else if (n == m) {
    128 		/* Since this method can only be called with a haystack which
    129 		 * matches needle. If the lengths of the strings are equal the
    130 		 * strings themselves must also be equal (ignoring case).
    131 		 */
    132 		return SCORE_MAX;
    133 	}
    134 
    135 	/*
    136 	 * D[][] Stores the best score for this position ending with a match.
    137 	 * M[][] Stores the best possible score at this position.
    138 	 */
    139 	score_t D[MATCH_MAX_LEN], M[MATCH_MAX_LEN];
    140 
    141 	for (int i = 0; i < n; i++) {
    142 		match_row(&match, i, D, M, D, M);
    143 	}
    144 
    145 	return M[m - 1];
    146 }
    147 
    148 score_t match_positions(const char *needle, const char *haystack, size_t *positions) {
    149 	if (!*needle)
    150 		return SCORE_MIN;
    151 
    152 	struct match_struct match;
    153 	setup_match_struct(&match, needle, haystack);
    154 
    155 	int n = match.needle_len;
    156 	int m = match.haystack_len;
    157 
    158 	if (m > MATCH_MAX_LEN || n > m) {
    159 		/*
    160 		 * Unreasonably large candidate: return no score
    161 		 * If it is a valid match it will still be returned, it will
    162 		 * just be ranked below any reasonably sized candidates
    163 		 */
    164 		return SCORE_MIN;
    165 	} else if (n == m) {
    166 		/* Since this method can only be called with a haystack which
    167 		 * matches needle. If the lengths of the strings are equal the
    168 		 * strings themselves must also be equal (ignoring case).
    169 		 */
    170 		if (positions)
    171 			for (int i = 0; i < n; i++)
    172 				positions[i] = i;
    173 		return SCORE_MAX;
    174 	}
    175 
    176 	/*
    177 	 * D[][] Stores the best score for this position ending with a match.
    178 	 * M[][] Stores the best possible score at this position.
    179 	 */
    180 	score_t (*D)[MATCH_MAX_LEN], (*M)[MATCH_MAX_LEN];
    181 	M = malloc(sizeof(score_t) * MATCH_MAX_LEN * n);
    182 	D = malloc(sizeof(score_t) * MATCH_MAX_LEN * n);
    183 
    184 	match_row(&match, 0, D[0], M[0], D[0], M[0]);
    185 	for (int i = 1; i < n; i++) {
    186 		match_row(&match, i, D[i], M[i], D[i - 1], M[i - 1]);
    187 	}
    188 
    189 	/* backtrace to find the positions of optimal matching */
    190 	if (positions) {
    191 		int match_required = 0;
    192 		for (int i = n - 1, j = m - 1; i >= 0; i--) {
    193 			for (; j >= 0; j--) {
    194 				/*
    195 				 * There may be multiple paths which result in
    196 				 * the optimal weight.
    197 				 *
    198 				 * For simplicity, we will pick the first one
    199 				 * we encounter, the latest in the candidate
    200 				 * string.
    201 				 */
    202 				if (D[i][j] != SCORE_MIN &&
    203 				    (match_required || D[i][j] == M[i][j])) {
    204 					/* If this score was determined using
    205 					 * SCORE_MATCH_CONSECUTIVE, the
    206 					 * previous character MUST be a match
    207 					 */
    208 					match_required =
    209 					    i && j &&
    210 					    M[i][j] == D[i - 1][j - 1] + SCORE_MATCH_CONSECUTIVE;
    211 					positions[i] = j--;
    212 					break;
    213 				}
    214 			}
    215 		}
    216 	}
    217 
    218 	score_t result = M[n - 1][m - 1];
    219 
    220 	free(M);
    221 	free(D);
    222 
    223 	return result;
    224 }