-
Notifications
You must be signed in to change notification settings - Fork 135
/
fts_fuzzy_match.h
146 lines (114 loc) · 5.24 KB
/
fts_fuzzy_match.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
// LICENSE
//
// This software is dual-licensed to the public domain and under the following
// license: you are granted a perpetual, irrevocable license to copy, modify,
// publish, and distribute this file as you see fit.
// VERSION 0.1.0
#ifndef FTS_FUZZY_MATCH_H
#define FTS_FUZZY_MATCH_H
#include <ctype.h> // ::tolower, ::toupper
namespace fts {
// Returns true if each character in pattern is found sequentially within str
static bool fuzzy_match(char const * pattern, char const * str)
{
while (*pattern != '\0' && *str != '\0') {
if (tolower(*pattern) == tolower(*str))
++pattern;
++str;
}
return *pattern == '\0' ? true : false;
}
// Returns true if each character in pattern is found sequentially within str
// iff found then outScore is also set. Score value has no intrinsic meaning. Range varies with pattern.
// Can only compare scores with same search pattern.
static bool fuzzy_match(char const * pattern, char const * str, int & outScore)
{
// Score consts
const int adjacency_bonus = 5; // bonus for adjacent matches
const int separator_bonus = 10; // bonus if match occurs after a separator
const int camel_bonus = 10; // bonus if match is uppercase and prev is lower
const int leading_letter_penalty = -3; // penalty applied for every letter in str before the first match
const int max_leading_letter_penalty = -9; // maximum penalty for leading letters
const int unmatched_letter_penalty = -1; // penalty for every letter that doesn't matter
// Loop variables
int score = 0;
char const * patternIter = pattern;
char const * strIter = str;
bool prevMatched = false;
bool prevLower = false;
bool prevSeparator = true; // true so if first letter match gets separator bonus
// Use "best" matched letter if multiple string letters match the pattern
char const * bestLetter = NULL;
int bestLetterScore = 0;
// Loop over strings
while (*strIter != '\0')
{
const char patternLetter = *patternIter;
const char strLetter = *strIter;
bool nextMatch = patternLetter != '\0' && tolower(patternLetter) == tolower(strLetter);
bool rematch = bestLetter && tolower(*bestLetter) == tolower(strLetter);
bool advanced = nextMatch && bestLetter;
bool patternRepeat = bestLetter && patternLetter != '\0' && tolower(*bestLetter) == tolower(patternLetter);
if (advanced || patternRepeat)
{
score += bestLetterScore;
bestLetter = NULL;
bestLetterScore = 0;
}
if (nextMatch || rematch)
{
int newScore = 0;
// Apply penalty for each letter before the first pattern match
// Note: std::max because penalties are negative values. So max is smallest penalty.
if (patternIter == pattern)
{
int count = int(strIter - str);
int penalty = leading_letter_penalty * count;
if (penalty < max_leading_letter_penalty)
penalty = max_leading_letter_penalty;
score += penalty;
}
// Apply bonus for consecutive bonuses
if (prevMatched)
newScore += adjacency_bonus;
// Apply bonus for matches after a separator
if (prevSeparator)
newScore += separator_bonus;
// Apply bonus across camel case boundaries
if (prevLower && isupper(strLetter))
newScore += camel_bonus;
// Update pattern iter IFF the next pattern letter was matched
if (nextMatch)
++patternIter;
// Update best letter in str which may be for a "next" letter or a rematch
if (newScore >= bestLetterScore)
{
// Apply penalty for now skipped letter
if (bestLetter != NULL)
score += unmatched_letter_penalty;
bestLetter = strIter;
bestLetterScore = newScore;
}
prevMatched = true;
}
else
{
score += unmatched_letter_penalty;
prevMatched = false;
}
// Separators should be more easily defined
prevLower = islower(strLetter) != 0;
prevSeparator = strLetter == '_' || strLetter == ' ';
++strIter;
}
// Apply score for last match
if (bestLetter)
score += bestLetterScore;
// Did not match full pattern
if (*patternIter != '\0')
return false;
outScore = score;
return true;
}
} // namespace fts
#endif // FTS_FUZZY_MATCH_H