1
0
forked from aniani/vim

patch 8.2.1726: fuzzy matching only works on strings

Problem:    Fuzzy matching only works on strings.
Solution:   Support passing a dict.  Add matchfuzzypos() to also get the match
            positions. (Yegappan Lakshmanan, closes #6947)
This commit is contained in:
Bram Moolenaar
2020-09-22 20:33:50 +02:00
parent 44aaf5416e
commit 4f73b8e9cc
9 changed files with 635 additions and 173 deletions

View File

@@ -2641,7 +2641,10 @@ matcharg({nr}) List arguments of |:match|
matchdelete({id} [, {win}]) Number delete match identified by {id} matchdelete({id} [, {win}]) Number delete match identified by {id}
matchend({expr}, {pat} [, {start} [, {count}]]) matchend({expr}, {pat} [, {start} [, {count}]])
Number position where {pat} ends in {expr} Number position where {pat} ends in {expr}
matchfuzzy({list}, {str}) List fuzzy match {str} in {list} matchfuzzy({list}, {str} [, {dict}])
List fuzzy match {str} in {list}
matchfuzzypos({list}, {str} [, {dict}])
List fuzzy match {str} in {list}
matchlist({expr}, {pat} [, {start} [, {count}]]) matchlist({expr}, {pat} [, {start} [, {count}]])
List match and submatches of {pat} in {expr} List match and submatches of {pat} in {expr}
matchstr({expr}, {pat} [, {start} [, {count}]]) matchstr({expr}, {pat} [, {start} [, {count}]])
@@ -7311,12 +7314,25 @@ matchend({expr}, {pat} [, {start} [, {count}]]) *matchend()*
GetText()->matchend('word') GetText()->matchend('word')
matchfuzzy({list}, {str}) *matchfuzzy()* matchfuzzy({list}, {str} [, {dict}]) *matchfuzzy()*
Returns a list with all the strings in {list} that fuzzy If {list} is a list of strings, then returns a list with all
match {str}. The strings in the returned list are sorted the strings in {list} that fuzzy match {str}. The strings in
based on the matching score. {str} is treated as a literal the returned list are sorted based on the matching score.
string and regular expression matching is NOT supported.
The maximum supported {str} length is 256. If {list} is a list of dictionaries, then the optional {dict}
argument supports the following items:
key key of the item which is fuzzy matched against
{str}. The value of this item should be a
string.
text_cb |Funcref| that will be called for every item
in {list} to get the text for fuzzy matching.
This should accept a dictionary item as the
argument and return the text for that item to
use for fuzzy matching.
{str} is treated as a literal string and regular expression
matching is NOT supported. The maximum supported {str} length
is 256.
If there are no matching strings or there is an error, then an If there are no matching strings or there is an error, then an
empty list is returned. If length of {str} is greater than empty list is returned. If length of {str} is greater than
@@ -7327,11 +7343,36 @@ matchfuzzy({list}, {str}) *matchfuzzy()*
< results in ["clay"]. > < results in ["clay"]. >
:echo getbufinfo()->map({_, v -> v.name})->matchfuzzy("ndl") :echo getbufinfo()->map({_, v -> v.name})->matchfuzzy("ndl")
< results in a list of buffer names fuzzy matching "ndl". > < results in a list of buffer names fuzzy matching "ndl". >
:echo getbufinfo()->matchfuzzy("ndl", {'key' : 'name'})
< results in a list of buffer information dicts with buffer
names fuzzy matching "ndl". >
:echo getbufinfo()->matchfuzzy("spl",
\ {'text_cb' : {v -> v.name}})
< results in a list of buffer information dicts with buffer
names fuzzy matching "spl". >
:echo v:oldfiles->matchfuzzy("test") :echo v:oldfiles->matchfuzzy("test")
< results in a list of file names fuzzy matching "test". > < results in a list of file names fuzzy matching "test". >
:let l = readfile("buffer.c")->matchfuzzy("str") :let l = readfile("buffer.c")->matchfuzzy("str")
< results in a list of lines in "buffer.c" fuzzy matching "str". < results in a list of lines in "buffer.c" fuzzy matching "str".
matchfuzzypos({list}, {str} [, {dict}]) *matchfuzzypos()*
Same as |matchfuzzy()|, but returns the list of matched
strings and the list of character positions where characters
in {str} matches.
If {str} matches multiple times in a string, then only the
positions for the best match is returned.
If there are no matching strings or there is an error, then a
list with two empty list items is returned.
Example: >
:echo matchfuzzypos(['testing'], 'tsg')
< results in [['testing'], [[0, 2, 6]]] >
:echo matchfuzzypos(['clay', 'lacy'], 'la')
< results in [['lacy', 'clay'], [[0, 1], [1, 2]]] >
:echo [{'text': 'hello', 'id' : 10}]->matchfuzzypos('ll', {'key' : 'text'})
< results in [{'id': 10, 'text': 'hello'}] [[2, 3]]
matchlist({expr}, {pat} [, {start} [, {count}]]) *matchlist()* matchlist({expr}, {pat} [, {start} [, {count}]]) *matchlist()*
Same as |match()|, but return a |List|. The first item in the Same as |match()|, but return a |List|. The first item in the

View File

@@ -604,6 +604,7 @@ String manipulation: *string-functions*
match() position where a pattern matches in a string match() position where a pattern matches in a string
matchend() position where a pattern match ends in a string matchend() position where a pattern match ends in a string
matchfuzzy() fuzzy matches a string in a list of strings matchfuzzy() fuzzy matches a string in a list of strings
matchfuzzypos() fuzzy matches a string in a list of strings
matchstr() match of a pattern in a string matchstr() match of a pattern in a string
matchstrpos() match and positions of a pattern in a string matchstrpos() match and positions of a pattern in a string
matchlist() like matchstr() and also return submatches matchlist() like matchstr() and also return submatches

View File

@@ -753,7 +753,8 @@ static funcentry_T global_functions[] =
{"matcharg", 1, 1, FEARG_1, ret_list_string, f_matcharg}, {"matcharg", 1, 1, FEARG_1, ret_list_string, f_matcharg},
{"matchdelete", 1, 2, FEARG_1, ret_number, f_matchdelete}, {"matchdelete", 1, 2, FEARG_1, ret_number, f_matchdelete},
{"matchend", 2, 4, FEARG_1, ret_number, f_matchend}, {"matchend", 2, 4, FEARG_1, ret_number, f_matchend},
{"matchfuzzy", 2, 2, FEARG_1, ret_list_string, f_matchfuzzy}, {"matchfuzzy", 2, 3, FEARG_1, ret_list_string, f_matchfuzzy},
{"matchfuzzypos", 2, 3, FEARG_1, ret_list_any, f_matchfuzzypos},
{"matchlist", 2, 4, FEARG_1, ret_list_string, f_matchlist}, {"matchlist", 2, 4, FEARG_1, ret_list_string, f_matchlist},
{"matchstr", 2, 4, FEARG_1, ret_string, f_matchstr}, {"matchstr", 2, 4, FEARG_1, ret_string, f_matchstr},
{"matchstrpos", 2, 4, FEARG_1, ret_list_any, f_matchstrpos}, {"matchstrpos", 2, 4, FEARG_1, ret_list_any, f_matchstrpos},

View File

@@ -37,4 +37,5 @@ spat_T *get_spat(int idx);
int get_spat_last_idx(void); int get_spat_last_idx(void);
void f_searchcount(typval_T *argvars, typval_T *rettv); void f_searchcount(typval_T *argvars, typval_T *rettv);
void f_matchfuzzy(typval_T *argvars, typval_T *rettv); void f_matchfuzzy(typval_T *argvars, typval_T *rettv);
void f_matchfuzzypos(typval_T *argvars, typval_T *rettv);
/* vim: set ft=c : */ /* vim: set ft=c : */

View File

@@ -4216,33 +4216,144 @@ the_end:
*/ */
typedef struct typedef struct
{ {
listitem_T *item; listitem_T *item;
int score; int score;
list_T *lmatchpos;
} fuzzyItem_T; } fuzzyItem_T;
// bonus for adjacent matches
#define SEQUENTIAL_BONUS 15
// bonus if match occurs after a separator
#define SEPARATOR_BONUS 30
// bonus if match is uppercase and prev is lower
#define CAMEL_BONUS 30
// bonus if the first letter is matched
#define FIRST_LETTER_BONUS 15
// penalty applied for every letter in str before the first match
#define LEADING_LETTER_PENALTY -5
// maximum penalty for leading letters
#define MAX_LEADING_LETTER_PENALTY -15
// penalty for every letter that doesn't match
#define UNMATCHED_LETTER_PENALTY -1
// Score for a string that doesn't fuzzy match the pattern
#define SCORE_NONE -9999
#define FUZZY_MATCH_RECURSION_LIMIT 10
// Maximum number of characters that can be fuzzy matched
#define MAXMATCHES 256
typedef int_u matchidx_T;
/*
* Compute a score for a fuzzy matched string. The matching character locations
* are in 'matches'.
*/
static int
fuzzy_match_compute_score(
char_u *str,
int strSz,
matchidx_T *matches,
int numMatches)
{
int score;
int penalty;
int unmatched;
int i;
char_u *p = str;
matchidx_T sidx = 0;
// Initialize score
score = 100;
// Apply leading letter penalty
penalty = LEADING_LETTER_PENALTY * matches[0];
if (penalty < MAX_LEADING_LETTER_PENALTY)
penalty = MAX_LEADING_LETTER_PENALTY;
score += penalty;
// Apply unmatched penalty
unmatched = strSz - numMatches;
score += UNMATCHED_LETTER_PENALTY * unmatched;
// Apply ordering bonuses
for (i = 0; i < numMatches; ++i)
{
matchidx_T currIdx = matches[i];
if (i > 0)
{
matchidx_T prevIdx = matches[i - 1];
// Sequential
if (currIdx == (prevIdx + 1))
score += SEQUENTIAL_BONUS;
}
// Check for bonuses based on neighbor character value
if (currIdx > 0)
{
// Camel case
int neighbor;
int curr;
int neighborSeparator;
if (has_mbyte)
{
while (sidx < currIdx)
{
neighbor = (*mb_ptr2char)(p);
(void)mb_ptr2char_adv(&p);
sidx++;
}
curr = (*mb_ptr2char)(p);
}
else
{
neighbor = str[currIdx - 1];
curr = str[currIdx];
}
if (vim_islower(neighbor) && vim_isupper(curr))
score += CAMEL_BONUS;
// Separator
neighborSeparator = neighbor == '_' || neighbor == ' ';
if (neighborSeparator)
score += SEPARATOR_BONUS;
}
else
{
// First letter
score += FIRST_LETTER_BONUS;
}
}
return score;
}
static int static int
fuzzy_match_recursive( fuzzy_match_recursive(
char_u *fuzpat, char_u *fuzpat,
char_u *str, char_u *str,
int *outScore, matchidx_T strIdx,
char_u *strBegin, int *outScore,
char_u *srcMatches, char_u *strBegin,
char_u *matches, int strLen,
int maxMatches, matchidx_T *srcMatches,
int nextMatch, matchidx_T *matches,
int *recursionCount, int maxMatches,
int recursionLimit) int nextMatch,
int *recursionCount)
{ {
// Recursion params // Recursion params
int recursiveMatch = FALSE; int recursiveMatch = FALSE;
char_u bestRecursiveMatches[256]; matchidx_T bestRecursiveMatches[MAXMATCHES];
int bestRecursiveScore = 0; int bestRecursiveScore = 0;
int first_match; int first_match;
int matched; int matched;
// Count recursions // Count recursions
++*recursionCount; ++*recursionCount;
if (*recursionCount >= recursionLimit) if (*recursionCount >= FUZZY_MATCH_RECURSION_LIMIT)
return FALSE; return FALSE;
// Detect end of strings // Detect end of strings
@@ -4251,13 +4362,20 @@ fuzzy_match_recursive(
// Loop through fuzpat and str looking for a match // Loop through fuzpat and str looking for a match
first_match = TRUE; first_match = TRUE;
while (*fuzpat != '\0' && *str != '\0') while (*fuzpat != NUL && *str != NUL)
{ {
int c1;
int c2;
c1 = PTR2CHAR(fuzpat);
c2 = PTR2CHAR(str);
// Found match // Found match
if (vim_tolower(*fuzpat) == vim_tolower(*str)) if (vim_tolower(c1) == vim_tolower(c2))
{ {
char_u recursiveMatches[256]; matchidx_T recursiveMatches[MAXMATCHES];
int recursiveScore = 0; int recursiveScore = 0;
char_u *next_char;
// Supplied matches buffer was too short // Supplied matches buffer was too short
if (nextMatch >= maxMatches) if (nextMatch >= maxMatches)
@@ -4266,116 +4384,58 @@ fuzzy_match_recursive(
// "Copy-on-Write" srcMatches into matches // "Copy-on-Write" srcMatches into matches
if (first_match && srcMatches) if (first_match && srcMatches)
{ {
memcpy(matches, srcMatches, nextMatch); memcpy(matches, srcMatches, nextMatch * sizeof(srcMatches[0]));
first_match = FALSE; first_match = FALSE;
} }
// Recursive call that "skips" this match // Recursive call that "skips" this match
if (fuzzy_match_recursive(fuzpat, str + 1, &recursiveScore, if (has_mbyte)
strBegin, matches, recursiveMatches, next_char = str + (*mb_ptr2len)(str);
sizeof(recursiveMatches), nextMatch, recursionCount, else
recursionLimit)) next_char = str + 1;
if (fuzzy_match_recursive(fuzpat, next_char, strIdx + 1,
&recursiveScore, strBegin, strLen, matches,
recursiveMatches,
sizeof(recursiveMatches)/sizeof(recursiveMatches[0]),
nextMatch, recursionCount))
{ {
// Pick best recursive score // Pick best recursive score
if (!recursiveMatch || recursiveScore > bestRecursiveScore) if (!recursiveMatch || recursiveScore > bestRecursiveScore)
{ {
memcpy(bestRecursiveMatches, recursiveMatches, 256); memcpy(bestRecursiveMatches, recursiveMatches,
MAXMATCHES * sizeof(recursiveMatches[0]));
bestRecursiveScore = recursiveScore; bestRecursiveScore = recursiveScore;
} }
recursiveMatch = TRUE; recursiveMatch = TRUE;
} }
// Advance // Advance
matches[nextMatch++] = (char_u)(str - strBegin); matches[nextMatch++] = strIdx;
++fuzpat; if (has_mbyte)
(void)mb_ptr2char_adv(&fuzpat);
else
++fuzpat;
} }
++str; if (has_mbyte)
(void)mb_ptr2char_adv(&str);
else
++str;
strIdx++;
} }
// Determine if full fuzpat was matched // Determine if full fuzpat was matched
matched = *fuzpat == '\0' ? TRUE : FALSE; matched = *fuzpat == NUL ? TRUE : FALSE;
// Calculate score // Calculate score
if (matched) if (matched)
{ *outScore = fuzzy_match_compute_score(strBegin, strLen, matches,
// bonus for adjacent matches nextMatch);
int sequential_bonus = 15;
// bonus if match occurs after a separator
int separator_bonus = 30;
// bonus if match is uppercase and prev is lower
int camel_bonus = 30;
// bonus if the first letter is matched
int first_letter_bonus = 15;
// penalty applied for every letter in str before the first match
int leading_letter_penalty = -5;
// maximum penalty for leading letters
int max_leading_letter_penalty = -15;
// penalty for every letter that doesn't matter
int unmatched_letter_penalty = -1;
int penalty;
int unmatched;
int i;
// Iterate str to end
while (*str != '\0')
++str;
// Initialize score
*outScore = 100;
// Apply leading letter penalty
penalty = leading_letter_penalty * matches[0];
if (penalty < max_leading_letter_penalty)
penalty = max_leading_letter_penalty;
*outScore += penalty;
// Apply unmatched penalty
unmatched = (int)(str - strBegin) - nextMatch;
*outScore += unmatched_letter_penalty * unmatched;
// Apply ordering bonuses
for (i = 0; i < nextMatch; ++i)
{
char_u currIdx = matches[i];
if (i > 0)
{
char_u prevIdx = matches[i - 1];
// Sequential
if (currIdx == (prevIdx + 1))
*outScore += sequential_bonus;
}
// Check for bonuses based on neighbor character value
if (currIdx > 0)
{
// Camel case
char_u neighbor = strBegin[currIdx - 1];
char_u curr = strBegin[currIdx];
int neighborSeparator;
if (islower(neighbor) && isupper(curr))
*outScore += camel_bonus;
// Separator
neighborSeparator = neighbor == '_' || neighbor == ' ';
if (neighborSeparator)
*outScore += separator_bonus;
}
else
{
// First letter
*outScore += first_letter_bonus;
}
}
}
// Return best result // Return best result
if (recursiveMatch && (!matched || bestRecursiveScore > *outScore)) if (recursiveMatch && (!matched || bestRecursiveScore > *outScore))
{ {
// Recursive score is better than "this" // Recursive score is better than "this"
memcpy(matches, bestRecursiveMatches, maxMatches); memcpy(matches, bestRecursiveMatches, maxMatches * sizeof(matches[0]));
*outScore = bestRecursiveScore; *outScore = bestRecursiveScore;
return TRUE; return TRUE;
} }
@@ -4394,22 +4454,27 @@ fuzzy_match_recursive(
* normalized and varies with pattern. * normalized and varies with pattern.
* Recursion is limited internally (default=10) to prevent degenerate cases * Recursion is limited internally (default=10) to prevent degenerate cases
* (fuzpat="aaaaaa" str="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"). * (fuzpat="aaaaaa" str="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa").
* Uses char_u for match indices. Therefore patterns are limited to 256 * Uses char_u for match indices. Therefore patterns are limited to MAXMATCHES
* characters. * characters.
* *
* Returns TRUE if fuzpat is found AND calculates a score. * Returns TRUE if 'fuzpat' matches 'str'. Also returns the match score in
* 'outScore' and the matching character positions in 'matches'.
*/ */
static int static int
fuzzy_match(char_u *str, char_u *fuzpat, int *outScore) fuzzy_match(
char_u *str,
char_u *fuzpat,
int *outScore,
matchidx_T *matches,
int maxMatches)
{ {
char_u matches[256];
int recursionCount = 0; int recursionCount = 0;
int recursionLimit = 10; int len = MB_CHARLEN(str);
*outScore = 0; *outScore = 0;
return fuzzy_match_recursive(fuzpat, str, outScore, str, NULL, matches, return fuzzy_match_recursive(fuzpat, str, 0, outScore, str, len, NULL,
sizeof(matches), 0, &recursionCount, recursionLimit); matches, maxMatches, 0, &recursionCount);
} }
/* /*
@@ -4425,84 +4490,269 @@ fuzzy_item_compare(const void *s1, const void *s2)
} }
/* /*
* Fuzzy search the string 'str' in 'strlist' and return the matching strings * Fuzzy search the string 'str' in a list of 'items' and return the matching
* in 'fmatchlist'. * strings in 'fmatchlist'.
* If 'items' is a list of strings, then search for 'str' in the list.
* If 'items' is a list of dicts, then either use 'key' to lookup the string
* for each item or use 'item_cb' Funcref function to get the string.
* If 'retmatchpos' is TRUE, then return a list of positions where 'str'
* matches for each item.
*/ */
static void static void
match_fuzzy(list_T *strlist, char_u *str, list_T *fmatchlist) match_fuzzy(
list_T *items,
char_u *str,
char_u *key,
callback_T *item_cb,
int retmatchpos,
list_T *fmatchlist)
{ {
long len; long len;
fuzzyItem_T *ptrs; fuzzyItem_T *ptrs;
listitem_T *li; listitem_T *li;
long i = 0; long i = 0;
int found_match = FALSE; int found_match = FALSE;
matchidx_T matches[MAXMATCHES];
len = list_len(strlist); len = list_len(items);
if (len == 0) if (len == 0)
return; return;
ptrs = ALLOC_MULT(fuzzyItem_T, len); ptrs = ALLOC_CLEAR_MULT(fuzzyItem_T, len);
if (ptrs == NULL) if (ptrs == NULL)
return; return;
// For all the string items in strlist, get the fuzzy matching score // For all the string items in items, get the fuzzy matching score
FOR_ALL_LIST_ITEMS(strlist, li) FOR_ALL_LIST_ITEMS(items, li)
{ {
int score; int score;
char_u *itemstr;
typval_T rettv;
ptrs[i].item = li; ptrs[i].item = li;
ptrs[i].score = -9999; ptrs[i].score = SCORE_NONE;
// ignore non-string items in the list itemstr = NULL;
if (li->li_tv.v_type == VAR_STRING && li->li_tv.vval.v_string != NULL) rettv.v_type = VAR_UNKNOWN;
if (fuzzy_match(li->li_tv.vval.v_string, str, &score)) if (li->li_tv.v_type == VAR_STRING) // list of strings
itemstr = li->li_tv.vval.v_string;
else if (li->li_tv.v_type == VAR_DICT &&
(key != NULL || item_cb->cb_name != NULL))
{
// For a dict, either use the specified key to lookup the string or
// use the specified callback function to get the string.
if (key != NULL)
itemstr = dict_get_string(li->li_tv.vval.v_dict, key, FALSE);
else
{ {
ptrs[i].score = score; typval_T argv[2];
found_match = TRUE;
// Invoke the supplied callback (if any) to get the dict item
li->li_tv.vval.v_dict->dv_refcount++;
argv[0].v_type = VAR_DICT;
argv[0].vval.v_dict = li->li_tv.vval.v_dict;
argv[1].v_type = VAR_UNKNOWN;
if (call_callback(item_cb, -1, &rettv, 1, argv) != FAIL)
{
if (rettv.v_type == VAR_STRING)
itemstr = rettv.vval.v_string;
}
dict_unref(li->li_tv.vval.v_dict);
} }
}
if (itemstr != NULL
&& fuzzy_match(itemstr, str, &score, matches,
sizeof(matches) / sizeof(matches[0])))
{
// Copy the list of matching positions in itemstr to a list, if
// 'retmatchpos' is set.
if (retmatchpos)
{
int j;
int strsz;
ptrs[i].lmatchpos = list_alloc();
if (ptrs[i].lmatchpos == NULL)
goto done;
strsz = MB_CHARLEN(str);
for (j = 0; j < strsz; j++)
{
if (list_append_number(ptrs[i].lmatchpos,
matches[j]) == FAIL)
goto done;
}
}
ptrs[i].score = score;
found_match = TRUE;
}
++i; ++i;
clear_tv(&rettv);
} }
if (found_match) if (found_match)
{ {
list_T *l;
// Sort the list by the descending order of the match score // Sort the list by the descending order of the match score
qsort((void *)ptrs, (size_t)len, sizeof(fuzzyItem_T), qsort((void *)ptrs, (size_t)len, sizeof(fuzzyItem_T),
fuzzy_item_compare); fuzzy_item_compare);
// Copy the matching strings with 'score != -9999' to the return list // For matchfuzzy(), return a list of matched strings.
// ['str1', 'str2', 'str3']
// For matchfuzzypos(), return a list with two items.
// The first item is a list of matched strings. The second item
// is a list of lists where each list item is a list of matched
// character positions.
// [['str1', 'str2', 'str3'], [[1, 3], [1, 3], [1, 3]]]
if (retmatchpos)
{
li = list_find(fmatchlist, 0);
if (li == NULL || li->li_tv.vval.v_list == NULL)
goto done;
l = li->li_tv.vval.v_list;
}
else
l = fmatchlist;
// Copy the matching strings with a valid score to the return list
for (i = 0; i < len; i++) for (i = 0; i < len; i++)
{ {
if (ptrs[i].score == -9999) if (ptrs[i].score == SCORE_NONE)
break; break;
list_append_string(fmatchlist, ptrs[i].item->li_tv.vval.v_string, list_append_tv(l, &ptrs[i].item->li_tv);
-1); }
// next copy the list of matching positions
if (retmatchpos)
{
li = list_find(fmatchlist, -1);
if (li == NULL || li->li_tv.vval.v_list == NULL)
goto done;
l = li->li_tv.vval.v_list;
for (i = 0; i < len; i++)
{
if (ptrs[i].score == SCORE_NONE)
break;
if (ptrs[i].lmatchpos != NULL &&
list_append_list(l, ptrs[i].lmatchpos) == FAIL)
goto done;
}
} }
} }
done:
vim_free(ptrs); vim_free(ptrs);
} }
/*
* Do fuzzy matching. Returns the list of matched strings in 'rettv'.
* If 'retmatchpos' is TRUE, also returns the matching character positions.
*/
static void
do_fuzzymatch(typval_T *argvars, typval_T *rettv, int retmatchpos)
{
callback_T cb;
char_u *key = NULL;
int ret;
CLEAR_POINTER(&cb);
// validate and get the arguments
if (argvars[0].v_type != VAR_LIST || argvars[0].vval.v_list == NULL)
{
semsg(_(e_listarg), retmatchpos ? "matchfuzzypos()" : "matchfuzzy()");
return;
}
if (argvars[1].v_type != VAR_STRING
|| argvars[1].vval.v_string == NULL)
{
semsg(_(e_invarg2), tv_get_string(&argvars[1]));
return;
}
if (argvars[2].v_type != VAR_UNKNOWN)
{
dict_T *d;
dictitem_T *di;
if (argvars[2].v_type != VAR_DICT || argvars[2].vval.v_dict == NULL)
{
emsg(_(e_dictreq));
return;
}
// To search a dict, either a callback function or a key can be
// specified.
d = argvars[2].vval.v_dict;
if ((di = dict_find(d, (char_u *)"key", -1)) != NULL)
{
if (di->di_tv.v_type != VAR_STRING
|| di->di_tv.vval.v_string == NULL
|| *di->di_tv.vval.v_string == NUL)
{
semsg(_(e_invarg2), tv_get_string(&di->di_tv));
return;
}
key = tv_get_string(&di->di_tv);
}
else if ((di = dict_find(d, (char_u *)"text_cb", -1)) != NULL)
{
cb = get_callback(&di->di_tv);
if (cb.cb_name == NULL)
{
semsg(_(e_invargval), "text_cb");
return;
}
}
}
// get the fuzzy matches
ret = rettv_list_alloc(rettv);
if (ret != OK)
goto done;
if (retmatchpos)
{
list_T *l;
// For matchfuzzypos(), a list with two items are returned. First item
// is a list of matching strings and the second item is a list of
// lists with matching positions within each string.
l = list_alloc();
if (l == NULL)
goto done;
if (list_append_list(rettv->vval.v_list, l) == FAIL)
goto done;
l = list_alloc();
if (l == NULL)
goto done;
if (list_append_list(rettv->vval.v_list, l) == FAIL)
goto done;
}
match_fuzzy(argvars[0].vval.v_list, tv_get_string(&argvars[1]), key,
&cb, retmatchpos, rettv->vval.v_list);
done:
free_callback(&cb);
}
/* /*
* "matchfuzzy()" function * "matchfuzzy()" function
*/ */
void void
f_matchfuzzy(typval_T *argvars, typval_T *rettv) f_matchfuzzy(typval_T *argvars, typval_T *rettv)
{ {
if (argvars[0].v_type != VAR_LIST) do_fuzzymatch(argvars, rettv, FALSE);
{ }
emsg(_(e_listreq));
return; /*
} * "matchfuzzypos()" function
if (argvars[0].vval.v_list == NULL) */
return; void
if (argvars[1].v_type != VAR_STRING f_matchfuzzypos(typval_T *argvars, typval_T *rettv)
|| argvars[1].vval.v_string == NULL) {
{ do_fuzzymatch(argvars, rettv, TRUE);
semsg(_(e_invarg2), tv_get_string(&argvars[1]));
return;
}
if (rettv_list_alloc(rettv) == OK)
match_fuzzy(argvars[0].vval.v_list, tv_get_string(&argvars[1]),
rettv->vval.v_list);
} }
#endif #endif

View File

@@ -184,6 +184,7 @@ NEW_TESTS = \
test_match \ test_match \
test_matchadd_conceal \ test_matchadd_conceal \
test_matchadd_conceal_utf8 \ test_matchadd_conceal_utf8 \
test_matchfuzzy \
test_memory_usage \ test_memory_usage \
test_menu \ test_menu \
test_messages \ test_messages \
@@ -420,6 +421,7 @@ NEW_TESTS_RES = \
test_match.res \ test_match.res \
test_matchadd_conceal.res \ test_matchadd_conceal.res \
test_matchadd_conceal_utf8.res \ test_matchadd_conceal_utf8.res \
test_matchfuzzy.res \
test_memory_usage.res \ test_memory_usage.res \
test_menu.res \ test_menu.res \
test_messages.res \ test_messages.res \

View File

@@ -2554,28 +2554,4 @@ func Test_browsedir()
call assert_fails('call browsedir("open", [])', 'E730:') call assert_fails('call browsedir("open", [])', 'E730:')
endfunc endfunc
" Test for matchfuzzy()
func Test_matchfuzzy()
call assert_fails('call matchfuzzy(10, "abc")', 'E714:')
call assert_fails('call matchfuzzy(["abc"], [])', 'E730:')
call assert_equal([], matchfuzzy([], 'abc'))
call assert_equal([], matchfuzzy(['abc'], ''))
call assert_equal(['abc'], matchfuzzy(['abc', 10], 'ac'))
call assert_equal([], matchfuzzy([10, 20], 'ac'))
call assert_equal(['abc'], matchfuzzy(['abc'], 'abc'))
call assert_equal(['crayon', 'camera'], matchfuzzy(['camera', 'crayon'], 'cra'))
call assert_equal(['aabbaa', 'aaabbbaaa', 'aaaabbbbaaaa', 'aba'], matchfuzzy(['aba', 'aabbaa', 'aaabbbaaa', 'aaaabbbbaaaa'], 'aa'))
call assert_equal(['one'], matchfuzzy(['one', 'two'], 'one'))
call assert_equal(['oneTwo', 'onetwo'], matchfuzzy(['onetwo', 'oneTwo'], 'oneTwo'))
call assert_equal(['one_two', 'onetwo'], matchfuzzy(['onetwo', 'one_two'], 'oneTwo'))
call assert_equal(['aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'], matchfuzzy(['aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'], 'aa'))
call assert_equal([], matchfuzzy([repeat('a', 300)], repeat('a', 257)))
%bw!
eval ['somebuf', 'anotherone', 'needle', 'yetanotherone']->map({_, v -> bufadd(v) + bufload(v)})
let l = getbufinfo()->map({_, v -> v.name})->matchfuzzy('ndl')
call assert_equal(1, len(l))
call assert_match('needle', l[0])
endfunc
" vim: shiftwidth=2 sts=2 expandtab " vim: shiftwidth=2 sts=2 expandtab

View File

@@ -0,0 +1,188 @@
" Tests for fuzzy matching
source shared.vim
source check.vim
" Test for matchfuzzy()
func Test_matchfuzzy()
call assert_fails('call matchfuzzy(10, "abc")', 'E686:')
call assert_fails('call matchfuzzy(["abc"], [])', 'E730:')
call assert_fails("let x = matchfuzzy(test_null_list(), 'foo')", 'E686:')
call assert_fails('call matchfuzzy(["abc"], test_null_string())', 'E475:')
call assert_equal([], matchfuzzy([], 'abc'))
call assert_equal([], matchfuzzy(['abc'], ''))
call assert_equal(['abc'], matchfuzzy(['abc', 10], 'ac'))
call assert_equal([], matchfuzzy([10, 20], 'ac'))
call assert_equal(['abc'], matchfuzzy(['abc'], 'abc'))
call assert_equal(['crayon', 'camera'], matchfuzzy(['camera', 'crayon'], 'cra'))
call assert_equal(['aabbaa', 'aaabbbaaa', 'aaaabbbbaaaa', 'aba'], matchfuzzy(['aba', 'aabbaa', 'aaabbbaaa', 'aaaabbbbaaaa'], 'aa'))
call assert_equal(['one'], matchfuzzy(['one', 'two'], 'one'))
call assert_equal(['oneTwo', 'onetwo'], matchfuzzy(['onetwo', 'oneTwo'], 'oneTwo'))
call assert_equal(['one_two', 'onetwo'], matchfuzzy(['onetwo', 'one_two'], 'oneTwo'))
call assert_equal(['aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'], matchfuzzy(['aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'], 'aa'))
call assert_equal(256, matchfuzzy([repeat('a', 256)], repeat('a', 256))[0]->len())
call assert_equal([], matchfuzzy([repeat('a', 300)], repeat('a', 257)))
" Tests for match preferences
" preference for camel case match
call assert_equal(['oneTwo', 'onetwo'], ['onetwo', 'oneTwo']->matchfuzzy('onetwo'))
" preference for match after a separator (_ or space)
call assert_equal(['one_two', 'one two', 'onetwo'], ['onetwo', 'one_two', 'one two']->matchfuzzy('onetwo'))
" preference for leading letter match
call assert_equal(['onetwo', 'xonetwo'], ['xonetwo', 'onetwo']->matchfuzzy('onetwo'))
" preference for sequential match
call assert_equal(['onetwo', 'oanbectdweo'], ['oanbectdweo', 'onetwo']->matchfuzzy('onetwo'))
" non-matching leading letter(s) penalty
call assert_equal(['xonetwo', 'xxonetwo'], ['xxonetwo', 'xonetwo']->matchfuzzy('onetwo'))
" total non-matching letter(s) penalty
call assert_equal(['one', 'onex', 'onexx'], ['onexx', 'one', 'onex']->matchfuzzy('one'))
%bw!
eval ['somebuf', 'anotherone', 'needle', 'yetanotherone']->map({_, v -> bufadd(v) + bufload(v)})
let l = getbufinfo()->map({_, v -> v.name})->matchfuzzy('ndl')
call assert_equal(1, len(l))
call assert_match('needle', l[0])
let l = [{'id' : 5, 'val' : 'crayon'}, {'id' : 6, 'val' : 'camera'}]
call assert_equal([{'id' : 6, 'val' : 'camera'}], matchfuzzy(l, 'cam', {'text_cb' : {v -> v.val}}))
call assert_equal([{'id' : 6, 'val' : 'camera'}], matchfuzzy(l, 'cam', {'key' : 'val'}))
call assert_equal([], matchfuzzy(l, 'day', {'text_cb' : {v -> v.val}}))
call assert_equal([], matchfuzzy(l, 'day', {'key' : 'val'}))
call assert_fails("let x = matchfuzzy(l, 'cam', 'random')", 'E715:')
call assert_equal([], matchfuzzy(l, 'day', {'text_cb' : {v -> []}}))
call assert_equal([], matchfuzzy(l, 'day', {'text_cb' : {v -> 1}}))
call assert_fails("let x = matchfuzzy(l, 'day', {'text_cb' : {a, b -> 1}})", 'E119:')
call assert_equal([], matchfuzzy(l, 'cam'))
call assert_fails("let x = matchfuzzy(l, 'cam', {'text_cb' : []})", 'E921:')
call assert_fails("let x = matchfuzzy(l, 'foo', {'key' : []})", 'E730:')
call assert_fails("let x = matchfuzzy(l, 'cam', test_null_dict())", 'E715:')
call assert_fails("let x = matchfuzzy(l, 'foo', {'key' : test_null_string()})", 'E475:')
call assert_fails("let x = matchfuzzy(l, 'foo', {'text_cb' : test_null_function()})", 'E475:')
let l = [{'id' : 5, 'name' : 'foo'}, {'id' : 6, 'name' : []}, {'id' : 7}]
call assert_fails("let x = matchfuzzy(l, 'foo', {'key' : 'name'})", 'E730:')
" Test in latin1 encoding
let save_enc = &encoding
set encoding=latin1
call assert_equal(['abc'], matchfuzzy(['abc'], 'abc'))
let &encoding = save_enc
endfunc
" Test for the fuzzymatchpos() function
func Test_matchfuzzypos()
call assert_equal([['curl', 'world'], [[2,3], [2,3]]], matchfuzzypos(['world', 'curl'], 'rl'))
call assert_equal([['curl', 'world'], [[2,3], [2,3]]], matchfuzzypos(['world', 'one', 'curl'], 'rl'))
call assert_equal([['hello', 'hello world hello world'],
\ [[0, 1, 2, 3, 4], [0, 1, 2, 3, 4]]],
\ matchfuzzypos(['hello world hello world', 'hello', 'world'], 'hello'))
call assert_equal([['aaaaaaa'], [[0, 1, 2]]], matchfuzzypos(['aaaaaaa'], 'aaa'))
call assert_equal([[], []], matchfuzzypos(['world', 'curl'], 'ab'))
let x = matchfuzzypos([repeat('a', 256)], repeat('a', 256))
call assert_equal(range(256), x[1][0])
call assert_equal([[], []], matchfuzzypos([repeat('a', 300)], repeat('a', 257)))
call assert_equal([[], []], matchfuzzypos([], 'abc'))
" match in a long string
call assert_equal([[repeat('x', 300) .. 'abc'], [[300, 301, 302]]],
\ matchfuzzypos([repeat('x', 300) .. 'abc'], 'abc'))
" preference for camel case match
call assert_equal([['xabcxxaBc'], [[6, 7, 8]]], matchfuzzypos(['xabcxxaBc'], 'abc'))
" preference for match after a separator (_ or space)
call assert_equal([['xabx_ab'], [[5, 6]]], matchfuzzypos(['xabx_ab'], 'ab'))
" preference for leading letter match
call assert_equal([['abcxabc'], [[0, 1]]], matchfuzzypos(['abcxabc'], 'ab'))
" preference for sequential match
call assert_equal([['aobncedone'], [[7, 8, 9]]], matchfuzzypos(['aobncedone'], 'one'))
" best recursive match
call assert_equal([['xoone'], [[2, 3, 4]]], matchfuzzypos(['xoone'], 'one'))
let l = [{'id' : 5, 'val' : 'crayon'}, {'id' : 6, 'val' : 'camera'}]
call assert_equal([[{'id' : 6, 'val' : 'camera'}], [[0, 1, 2]]],
\ matchfuzzypos(l, 'cam', {'text_cb' : {v -> v.val}}))
call assert_equal([[{'id' : 6, 'val' : 'camera'}], [[0, 1, 2]]],
\ matchfuzzypos(l, 'cam', {'key' : 'val'}))
call assert_equal([[], []], matchfuzzypos(l, 'day', {'text_cb' : {v -> v.val}}))
call assert_equal([[], []], matchfuzzypos(l, 'day', {'key' : 'val'}))
call assert_fails("let x = matchfuzzypos(l, 'cam', 'random')", 'E715:')
call assert_equal([[], []], matchfuzzypos(l, 'day', {'text_cb' : {v -> []}}))
call assert_equal([[], []], matchfuzzypos(l, 'day', {'text_cb' : {v -> 1}}))
call assert_fails("let x = matchfuzzypos(l, 'day', {'text_cb' : {a, b -> 1}})", 'E119:')
call assert_equal([[], []], matchfuzzypos(l, 'cam'))
call assert_fails("let x = matchfuzzypos(l, 'cam', {'text_cb' : []})", 'E921:')
call assert_fails("let x = matchfuzzypos(l, 'foo', {'key' : []})", 'E730:')
call assert_fails("let x = matchfuzzypos(l, 'cam', test_null_dict())", 'E715:')
call assert_fails("let x = matchfuzzypos(l, 'foo', {'key' : test_null_string()})", 'E475:')
call assert_fails("let x = matchfuzzypos(l, 'foo', {'text_cb' : test_null_function()})", 'E475:')
let l = [{'id' : 5, 'name' : 'foo'}, {'id' : 6, 'name' : []}, {'id' : 7}]
call assert_fails("let x = matchfuzzypos(l, 'foo', {'key' : 'name'})", 'E730:')
endfunc
func Test_matchfuzzy_mbyte()
CheckFeature multi_lang
call assert_equal(['ンヹㄇヺヴ'], matchfuzzy(['ンヹㄇヺヴ'], 'ヹヺ'))
" reverse the order of characters
call assert_equal([], matchfuzzy(['ンヹㄇヺヴ'], 'ヺヹ'))
call assert_equal(['αβΩxxx', 'xαxβxΩx'],
\ matchfuzzy(['αβΩxxx', 'xαxβxΩx'], 'αβΩ'))
call assert_equal(['ππbbππ', 'πππbbbπππ', 'ππππbbbbππππ', 'πbπ'],
\ matchfuzzy(['πbπ', 'ππbbππ', 'πππbbbπππ', 'ππππbbbbππππ'], 'ππ'))
" preference for camel case match
call assert_equal(['oneĄwo', 'oneąwo'],
\ ['oneąwo', 'oneĄwo']->matchfuzzy('oneąwo'))
" preference for match after a separator (_ or space)
call assert_equal(['Ⅱa_bㄟㄠ', 'Ⅱa bㄟㄠ', 'Ⅱabㄟㄠ'],
\ ['Ⅱabㄟㄠ', 'Ⅱa_bㄟㄠ', 'Ⅱa bㄟㄠ']->matchfuzzy('Ⅱabㄟㄠ'))
" preference for leading letter match
call assert_equal(['ŗŝţũŵż', 'xŗŝţũŵż'],
\ ['xŗŝţũŵż', 'ŗŝţũŵż']->matchfuzzy('ŗŝţũŵż'))
" preference for sequential match
call assert_equal(['ㄞㄡㄤfffifl', 'ㄞaㄡbㄤcffdfiefl'],
\ ['ㄞaㄡbㄤcffdfiefl', 'ㄞㄡㄤfffifl']->matchfuzzy('ㄞㄡㄤfffifl'))
" non-matching leading letter(s) penalty
call assert_equal(['xㄞㄡㄤfffifl', 'xxㄞㄡㄤfffifl'],
\ ['xxㄞㄡㄤfffifl', 'xㄞㄡㄤfffifl']->matchfuzzy('ㄞㄡㄤfffifl'))
" total non-matching letter(s) penalty
call assert_equal(['ŗŝţ', 'ŗŝţx', 'ŗŝţxx'],
\ ['ŗŝţxx', 'ŗŝţ', 'ŗŝţx']->matchfuzzy('ŗŝţ'))
endfunc
func Test_matchfuzzypos_mbyte()
CheckFeature multi_lang
call assert_equal([['こんにちは世界'], [[0, 1, 2, 3, 4]]],
\ matchfuzzypos(['こんにちは世界'], 'こんにちは'))
call assert_equal([['ンヹㄇヺヴ'], [[1, 3]]], matchfuzzypos(['ンヹㄇヺヴ'], 'ヹヺ'))
" reverse the order of characters
call assert_equal([[], []], matchfuzzypos(['ンヹㄇヺヴ'], 'ヺヹ'))
call assert_equal([['αβΩxxx', 'xαxβxΩx'], [[0, 1, 2], [1, 3, 5]]],
\ matchfuzzypos(['αβΩxxx', 'xαxβxΩx'], 'αβΩ'))
call assert_equal([['ππbbππ', 'πππbbbπππ', 'ππππbbbbππππ', 'πbπ'],
\ [[0, 1], [0, 1], [0, 1], [0, 2]]],
\ matchfuzzypos(['πbπ', 'ππbbππ', 'πππbbbπππ', 'ππππbbbbππππ'], 'ππ'))
call assert_equal([['ααααααα'], [[0, 1, 2]]],
\ matchfuzzypos(['ααααααα'], 'ααα'))
call assert_equal([[], []], matchfuzzypos(['ンヹㄇ', 'ŗŝţ'], 'fffifl'))
let x = matchfuzzypos([repeat('Ψ', 256)], repeat('Ψ', 256))
call assert_equal(range(256), x[1][0])
call assert_equal([[], []], matchfuzzypos([repeat('✓', 300)], repeat('✓', 257)))
" match in a long string
call assert_equal([[repeat('♪', 300) .. '✗✗✗'], [[300, 301, 302]]],
\ matchfuzzypos([repeat('♪', 300) .. '✗✗✗'], '✗✗✗'))
" preference for camel case match
call assert_equal([['xѳѵҁxxѳѴҁ'], [[6, 7, 8]]], matchfuzzypos(['xѳѵҁxxѳѴҁ'], 'ѳѵҁ'))
" preference for match after a separator (_ or space)
call assert_equal([['xちだx_ちだ'], [[5, 6]]], matchfuzzypos(['xちだx_ちだ'], 'ちだ'))
" preference for leading letter match
call assert_equal([[ѵҁxѳѵҁ'], [[0, 1]]], matchfuzzypos([ѵҁxѳѵҁ'], 'ѳѵ'))
" preference for sequential match
call assert_equal([['aンbヹcㄇdンヹㄇ'], [[7, 8, 9]]], matchfuzzypos(['aンbヹcㄇdンヹㄇ'], 'ンヹㄇ'))
" best recursive match
call assert_equal([['xффйд'], [[2, 3, 4]]], matchfuzzypos(['xффйд'], 'фйд'))
endfunc
" vim: shiftwidth=2 sts=2 expandtab

View File

@@ -750,6 +750,8 @@ static char *(features[]) =
static int included_patches[] = static int included_patches[] =
{ /* Add new patch number below this line */ { /* Add new patch number below this line */
/**/
1726,
/**/ /**/
1725, 1725,
/**/ /**/