mirror of
https://github.com/vim/vim.git
synced 2025-09-26 04:04:07 -04:00
patch 8.0.0190: finding duplicate tags uses a slow linear search
Problem: Detecting duplicate tags uses a slow linear search. Solution: Use a much faster hash table solution. (James McCoy, closes #1046) But don't add hi_keylen, it makes hash tables 50% bigger.
This commit is contained in:
168
src/tag.c
168
src/tag.c
@@ -35,9 +35,9 @@ typedef struct tag_pointers
|
|||||||
} tagptrs_T;
|
} tagptrs_T;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The matching tags are first stored in ga_match[]. In which one depends on
|
* The matching tags are first stored in one of the ht_match[] hash tables. In
|
||||||
* the priority of the match.
|
* which one depends on the priority of the match.
|
||||||
* At the end, the matches from ga_match[] are concatenated, to make a list
|
* At the end, all the matches from ht_match[] are concatenated, to make a list
|
||||||
* sorted on priority.
|
* sorted on priority.
|
||||||
*/
|
*/
|
||||||
#define MT_ST_CUR 0 /* static match in current file */
|
#define MT_ST_CUR 0 /* static match in current file */
|
||||||
@@ -1341,12 +1341,9 @@ find_tags(
|
|||||||
int is_etag; /* current file is emaces style */
|
int is_etag; /* current file is emaces style */
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
struct match_found
|
char_u *mfp;
|
||||||
{
|
hashtab_T ht_match[MT_COUNT];
|
||||||
int len; /* nr of chars of match[] to be compared */
|
hash_T hash = 0;
|
||||||
char_u match[1]; /* actually longer */
|
|
||||||
} *mfp, *mfp2;
|
|
||||||
garray_T ga_match[MT_COUNT];
|
|
||||||
int match_count = 0; /* number of matches found */
|
int match_count = 0; /* number of matches found */
|
||||||
char_u **matches;
|
char_u **matches;
|
||||||
int mtt;
|
int mtt;
|
||||||
@@ -1411,7 +1408,7 @@ find_tags(
|
|||||||
ebuf = alloc(LSIZE);
|
ebuf = alloc(LSIZE);
|
||||||
#endif
|
#endif
|
||||||
for (mtt = 0; mtt < MT_COUNT; ++mtt)
|
for (mtt = 0; mtt < MT_COUNT; ++mtt)
|
||||||
ga_init2(&ga_match[mtt], (int)sizeof(struct match_found *), 100);
|
hash_init(&ht_match[mtt]);
|
||||||
|
|
||||||
/* check for out of memory situation */
|
/* check for out of memory situation */
|
||||||
if (lbuf == NULL || tag_fname == NULL
|
if (lbuf == NULL || tag_fname == NULL
|
||||||
@@ -2206,10 +2203,12 @@ parse_line:
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If a match is found, add it to ga_match[].
|
* If a match is found, add it to ht_match[].
|
||||||
*/
|
*/
|
||||||
if (match)
|
if (match)
|
||||||
{
|
{
|
||||||
|
int len = 0;
|
||||||
|
|
||||||
#ifdef FEAT_CSCOPE
|
#ifdef FEAT_CSCOPE
|
||||||
if (use_cscope)
|
if (use_cscope)
|
||||||
{
|
{
|
||||||
@@ -2262,15 +2261,10 @@ parse_line:
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Add the found match in ga_match[mtt], avoiding duplicates.
|
* Add the found match in ht_match[mtt].
|
||||||
* Store the info we need later, which depends on the kind of
|
* Store the info we need later, which depends on the kind of
|
||||||
* tags we are dealing with.
|
* tags we are dealing with.
|
||||||
*/
|
*/
|
||||||
if (ga_grow(&ga_match[mtt], 1) == OK)
|
|
||||||
{
|
|
||||||
int len;
|
|
||||||
int heuristic;
|
|
||||||
|
|
||||||
if (help_only)
|
if (help_only)
|
||||||
{
|
{
|
||||||
#ifdef FEAT_MULTI_LANG
|
#ifdef FEAT_MULTI_LANG
|
||||||
@@ -2279,21 +2273,19 @@ parse_line:
|
|||||||
# define ML_EXTRA 0
|
# define ML_EXTRA 0
|
||||||
#endif
|
#endif
|
||||||
/*
|
/*
|
||||||
* Append the help-heuristic number after the
|
* Append the help-heuristic number after the tagname, for
|
||||||
* tagname, for sorting it later.
|
* sorting it later. The heuristic is ignored for
|
||||||
|
* detecting duplicates.
|
||||||
|
* The format is {tagname}@{lang}NUL{heuristic}NUL
|
||||||
*/
|
*/
|
||||||
*tagp.tagname_end = NUL;
|
*tagp.tagname_end = NUL;
|
||||||
len = (int)(tagp.tagname_end - tagp.tagname);
|
len = (int)(tagp.tagname_end - tagp.tagname);
|
||||||
mfp = (struct match_found *)
|
mfp = (char_u *)alloc((int)sizeof(char_u) + len + 10 + ML_EXTRA + 1);
|
||||||
alloc((int)sizeof(struct match_found) + len
|
|
||||||
+ 10 + ML_EXTRA);
|
|
||||||
if (mfp != NULL)
|
if (mfp != NULL)
|
||||||
{
|
{
|
||||||
/* "len" includes the language and the NUL, but
|
int heuristic;
|
||||||
* not the priority. */
|
|
||||||
mfp->len = len + ML_EXTRA + 1;
|
p = mfp;
|
||||||
#define ML_HELP_LEN 6
|
|
||||||
p = mfp->match;
|
|
||||||
STRCPY(p, tagp.tagname);
|
STRCPY(p, tagp.tagname);
|
||||||
#ifdef FEAT_MULTI_LANG
|
#ifdef FEAT_MULTI_LANG
|
||||||
p[len] = '@';
|
p[len] = '@';
|
||||||
@@ -2325,14 +2317,9 @@ parse_line:
|
|||||||
if (tagp.command + 2 < temp_end)
|
if (tagp.command + 2 < temp_end)
|
||||||
{
|
{
|
||||||
len = (int)(temp_end - tagp.command - 2);
|
len = (int)(temp_end - tagp.command - 2);
|
||||||
mfp = (struct match_found *)alloc(
|
mfp = (char_u *)alloc((int)sizeof(char_u) + len + 1);
|
||||||
(int)sizeof(struct match_found) + len);
|
|
||||||
if (mfp != NULL)
|
if (mfp != NULL)
|
||||||
{
|
vim_strncpy(mfp, tagp.command + 2, len);
|
||||||
mfp->len = len + 1; /* include the NUL */
|
|
||||||
p = mfp->match;
|
|
||||||
vim_strncpy(p, tagp.command + 2, len);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
mfp = NULL;
|
mfp = NULL;
|
||||||
@@ -2341,14 +2328,9 @@ parse_line:
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
len = (int)(tagp.tagname_end - tagp.tagname);
|
len = (int)(tagp.tagname_end - tagp.tagname);
|
||||||
mfp = (struct match_found *)alloc(
|
mfp = (char_u *)alloc((int)sizeof(char_u) + len + 1);
|
||||||
(int)sizeof(struct match_found) + len);
|
|
||||||
if (mfp != NULL)
|
if (mfp != NULL)
|
||||||
{
|
vim_strncpy(mfp, tagp.tagname, len);
|
||||||
mfp->len = len + 1; /* include the NUL */
|
|
||||||
p = mfp->match;
|
|
||||||
vim_strncpy(p, tagp.tagname, len);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* if wanted, re-read line to get long form too */
|
/* if wanted, re-read line to get long form too */
|
||||||
if (State & INSERT)
|
if (State & INSERT)
|
||||||
@@ -2357,25 +2339,33 @@ parse_line:
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
#define TAG_SEP 0x01
|
||||||
|
size_t tag_fname_len = STRLEN(tag_fname);
|
||||||
|
#ifdef FEAT_EMACS_TAGS
|
||||||
|
size_t ebuf_len = 0;
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Save the tag in a buffer.
|
/* Save the tag in a buffer.
|
||||||
* Emacs tag: <mtt><tag_fname><NUL><ebuf><NUL><lbuf>
|
* Use 0x01 to separate fields (Can't use NUL, because the
|
||||||
* other tag: <mtt><tag_fname><NUL><NUL><lbuf>
|
* hash key is terminated by NUL).
|
||||||
* without Emacs tags: <mtt><tag_fname><NUL><lbuf>
|
* Emacs tag: <mtt><tag_fname><0x01><ebuf><0x01><lbuf><NUL>
|
||||||
|
* other tag: <mtt><tag_fname><0x01><0x01><lbuf><NUL>
|
||||||
|
* without Emacs tags: <mtt><tag_fname><0x01><lbuf><NUL>
|
||||||
*/
|
*/
|
||||||
len = (int)STRLEN(tag_fname)
|
len = (int)tag_fname_len + (int)STRLEN(lbuf) + 3;
|
||||||
+ (int)STRLEN(lbuf) + 3;
|
|
||||||
#ifdef FEAT_EMACS_TAGS
|
#ifdef FEAT_EMACS_TAGS
|
||||||
if (is_etag)
|
if (is_etag)
|
||||||
len += (int)STRLEN(ebuf) + 1;
|
{
|
||||||
|
ebuf_len = STRLEN(ebuf);
|
||||||
|
len += (int)ebuf_len + 1;
|
||||||
|
}
|
||||||
else
|
else
|
||||||
++len;
|
++len;
|
||||||
#endif
|
#endif
|
||||||
mfp = (struct match_found *)alloc(
|
mfp = (char_u *)alloc((int)sizeof(char_u) + len + 1);
|
||||||
(int)sizeof(struct match_found) + len);
|
|
||||||
if (mfp != NULL)
|
if (mfp != NULL)
|
||||||
{
|
{
|
||||||
mfp->len = len;
|
p = mfp;
|
||||||
p = mfp->match;
|
|
||||||
p[0] = mtt;
|
p[0] = mtt;
|
||||||
STRCPY(p + 1, tag_fname);
|
STRCPY(p + 1, tag_fname);
|
||||||
#ifdef BACKSLASH_IN_FILENAME
|
#ifdef BACKSLASH_IN_FILENAME
|
||||||
@@ -2383,15 +2373,17 @@ parse_line:
|
|||||||
* both path/file and path\file. */
|
* both path/file and path\file. */
|
||||||
slash_adjust(p + 1);
|
slash_adjust(p + 1);
|
||||||
#endif
|
#endif
|
||||||
s = p + 1 + STRLEN(tag_fname) + 1;
|
p[tag_fname_len + 1] = TAG_SEP;
|
||||||
|
s = p + 1 + tag_fname_len + 1;
|
||||||
#ifdef FEAT_EMACS_TAGS
|
#ifdef FEAT_EMACS_TAGS
|
||||||
if (is_etag)
|
if (is_etag)
|
||||||
{
|
{
|
||||||
STRCPY(s, ebuf);
|
STRCPY(s, ebuf);
|
||||||
s += STRLEN(ebuf) + 1;
|
s[ebuf_len] = TAG_SEP;
|
||||||
|
s += ebuf_len + 1;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
*s++ = NUL;
|
*s++ = TAG_SEP;
|
||||||
#endif
|
#endif
|
||||||
STRCPY(s, lbuf);
|
STRCPY(s, lbuf);
|
||||||
}
|
}
|
||||||
@@ -2399,43 +2391,40 @@ parse_line:
|
|||||||
|
|
||||||
if (mfp != NULL)
|
if (mfp != NULL)
|
||||||
{
|
{
|
||||||
|
hashitem_T *hi;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Don't add identical matches.
|
* Don't add identical matches.
|
||||||
* This can take a lot of time when finding many
|
|
||||||
* matches, check for CTRL-C now and then.
|
|
||||||
* Add all cscope tags, because they are all listed.
|
* Add all cscope tags, because they are all listed.
|
||||||
|
* "mfp" is used as a hash key, there is a NUL byte to end
|
||||||
|
* the part matters for comparing, more bytes may follow
|
||||||
|
* after it. E.g. help tags store the priority after the
|
||||||
|
* NUL.
|
||||||
*/
|
*/
|
||||||
#ifdef FEAT_CSCOPE
|
#ifdef FEAT_CSCOPE
|
||||||
if (use_cscope)
|
if (use_cscope)
|
||||||
i = -1;
|
hash++;
|
||||||
else
|
else
|
||||||
#endif
|
#endif
|
||||||
for (i = ga_match[mtt].ga_len; --i >= 0 && !got_int; )
|
hash = hash_hash(mfp);
|
||||||
|
hi = hash_lookup(&ht_match[mtt], mfp, hash);
|
||||||
|
if (HASHITEM_EMPTY(hi))
|
||||||
{
|
{
|
||||||
mfp2 = ((struct match_found **)
|
if (hash_add_item(&ht_match[mtt], hi, mfp, hash)
|
||||||
(ga_match[mtt].ga_data))[i];
|
== FAIL)
|
||||||
if (mfp2->len == mfp->len
|
|
||||||
&& memcmp(mfp2->match, mfp->match,
|
|
||||||
(size_t)mfp->len) == 0)
|
|
||||||
break;
|
|
||||||
fast_breakcheck();
|
|
||||||
}
|
|
||||||
if (i < 0)
|
|
||||||
{
|
|
||||||
((struct match_found **)(ga_match[mtt].ga_data))
|
|
||||||
[ga_match[mtt].ga_len++] = mfp;
|
|
||||||
++match_count;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
vim_free(mfp);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else /* Out of memory! Just forget about the rest. */
|
|
||||||
{
|
{
|
||||||
|
/* Out of memory! Just forget about the rest. */
|
||||||
retval = OK;
|
retval = OK;
|
||||||
stop_searching = TRUE;
|
stop_searching = TRUE;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
++match_count;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
/* duplicate tag, drop it */
|
||||||
|
vim_free(mfp);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
#ifdef FEAT_CSCOPE
|
#ifdef FEAT_CSCOPE
|
||||||
if (use_cscope && eof)
|
if (use_cscope && eof)
|
||||||
@@ -2532,7 +2521,7 @@ findtag_end:
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Move the matches from the ga_match[] arrays into one list of
|
* Move the matches from the ht_match[] arrays into one list of
|
||||||
* matches. When retval == FAIL, free the matches.
|
* matches. When retval == FAIL, free the matches.
|
||||||
*/
|
*/
|
||||||
if (retval == FAIL)
|
if (retval == FAIL)
|
||||||
@@ -2546,22 +2535,29 @@ findtag_end:
|
|||||||
match_count = 0;
|
match_count = 0;
|
||||||
for (mtt = 0; mtt < MT_COUNT; ++mtt)
|
for (mtt = 0; mtt < MT_COUNT; ++mtt)
|
||||||
{
|
{
|
||||||
for (i = 0; i < ga_match[mtt].ga_len; ++i)
|
hashitem_T *hi;
|
||||||
|
long_u todo;
|
||||||
|
|
||||||
|
todo = (long)ht_match[mtt].ht_used;
|
||||||
|
for (hi = ht_match[mtt].ht_array; todo > 0; ++hi)
|
||||||
{
|
{
|
||||||
mfp = ((struct match_found **)(ga_match[mtt].ga_data))[i];
|
if (!HASHITEM_EMPTY(hi))
|
||||||
|
{
|
||||||
|
mfp = hi->hi_key;
|
||||||
if (matches == NULL)
|
if (matches == NULL)
|
||||||
vim_free(mfp);
|
vim_free(mfp);
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
/* To avoid allocating memory again we turn the struct
|
/* now change the TAG_SEP back to NUL */
|
||||||
* match_found into a string. For help the priority was not
|
for (p = mfp; *p != NUL; ++p)
|
||||||
* included in the length. */
|
if (*p == TAG_SEP)
|
||||||
mch_memmove(mfp, mfp->match,
|
*p = NUL;
|
||||||
(size_t)(mfp->len + (help_only ? ML_HELP_LEN : 0)));
|
|
||||||
matches[match_count++] = (char_u *)mfp;
|
matches[match_count++] = (char_u *)mfp;
|
||||||
}
|
}
|
||||||
|
todo--;
|
||||||
}
|
}
|
||||||
ga_clear(&ga_match[mtt]);
|
}
|
||||||
|
hash_clear(&ht_match[mtt]);
|
||||||
}
|
}
|
||||||
|
|
||||||
*matchesp = matches;
|
*matchesp = matches;
|
||||||
|
@@ -764,6 +764,8 @@ static char *(features[]) =
|
|||||||
|
|
||||||
static int included_patches[] =
|
static int included_patches[] =
|
||||||
{ /* Add new patch number below this line */
|
{ /* Add new patch number below this line */
|
||||||
|
/**/
|
||||||
|
190,
|
||||||
/**/
|
/**/
|
||||||
189,
|
189,
|
||||||
/**/
|
/**/
|
||||||
|
Reference in New Issue
Block a user