forked from aniani/vim
updated for version 7.3.1149
Problem: New regexp engine: Matching plain text could be faster.
Solution: Detect a plain text match and handle it specifically. Add
vim_regfree().
This commit is contained in:
171
src/regexp_nfa.c
171
src/regexp_nfa.c
@@ -270,6 +270,7 @@ static int nfa_ll_index = 0;
|
||||
static int nfa_regcomp_start __ARGS((char_u *expr, int re_flags));
|
||||
static int nfa_get_reganch __ARGS((nfa_state_T *start, int depth));
|
||||
static int nfa_get_regstart __ARGS((nfa_state_T *start, int depth));
|
||||
static char_u *nfa_get_match_text __ARGS((nfa_state_T *start));
|
||||
static int nfa_recognize_char_class __ARGS((char_u *start, char_u *end, int extra_newl));
|
||||
static int nfa_emit_equi_class __ARGS((int c));
|
||||
static int nfa_regatom __ARGS((void));
|
||||
@@ -295,6 +296,7 @@ static int nfa_re_num_cmp __ARGS((long_u val, int op, long_u pos));
|
||||
static long nfa_regtry __ARGS((nfa_regprog_T *prog, colnr_T col));
|
||||
static long nfa_regexec_both __ARGS((char_u *line, colnr_T col));
|
||||
static regprog_T *nfa_regcomp __ARGS((char_u *expr, int re_flags));
|
||||
static void nfa_regfree __ARGS((regprog_T *prog));
|
||||
static int nfa_regexec __ARGS((regmatch_T *rmp, char_u *line, colnr_T col));
|
||||
static long nfa_regexec_multi __ARGS((regmmatch_T *rmp, win_T *win, buf_T *buf, linenr_T lnum, colnr_T col, proftime_T *tm));
|
||||
|
||||
@@ -492,6 +494,52 @@ nfa_get_regstart(start, depth)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Figure out if the NFA state list contains just literal text and nothing
|
||||
* else. If so return a string with what must match after regstart.
|
||||
* Otherwise return NULL.
|
||||
*/
|
||||
static char_u *
|
||||
nfa_get_match_text(start)
|
||||
nfa_state_T *start;
|
||||
{
|
||||
nfa_state_T *p = start;
|
||||
int len = 0;
|
||||
char_u *ret;
|
||||
char_u *s;
|
||||
|
||||
if (p->c != NFA_MOPEN)
|
||||
return NULL; /* just in case */
|
||||
p = p->out;
|
||||
while (p->c > 0)
|
||||
{
|
||||
len += MB_CHAR2LEN(p->c);
|
||||
p = p->out;
|
||||
}
|
||||
if (p->c != NFA_MCLOSE || p->out->c != NFA_MATCH)
|
||||
return NULL;
|
||||
|
||||
ret = alloc(len);
|
||||
if (ret != NULL)
|
||||
{
|
||||
len = 0;
|
||||
p = start->out->out; /* skip first char, it goes into regstart */
|
||||
s = ret;
|
||||
while (p->c > 0)
|
||||
{
|
||||
#ifdef FEAT_MBYTE
|
||||
if (has_mbyte)
|
||||
s += (*mb_char2bytes)(p->c, s);
|
||||
else
|
||||
#endif
|
||||
*s++ = p->c;
|
||||
p = p->out;
|
||||
}
|
||||
*s = NUL;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate more space for post_start. Called when
|
||||
* running above the estimated number of states.
|
||||
@@ -2280,8 +2328,13 @@ nfa_dump(prog)
|
||||
{
|
||||
nfa_print_state(debugf, prog->start);
|
||||
|
||||
fprintf(debugf, "reganch: %d\n", prog->reganch);
|
||||
fprintf(debugf, "regstart: %d\n", prog->regstart);
|
||||
if (prog->reganch)
|
||||
fprintf(debugf, "reganch: %d\n", prog->reganch);
|
||||
if (prog->regstart != NUL)
|
||||
fprintf(debugf, "regstart: %c (decimal: %d)\n",
|
||||
prog->regstart, prog->regstart);
|
||||
if (prog->match_text != NULL)
|
||||
fprintf(debugf, "match_text: \"%s\"\n", prog->match_text);
|
||||
|
||||
fclose(debugf);
|
||||
}
|
||||
@@ -4154,6 +4207,7 @@ recursive_regmatch(state, prog, submatch, m, listids)
|
||||
|
||||
static int failure_chance __ARGS((nfa_state_T *state, int depth));
|
||||
static int skip_to_start __ARGS((int c, colnr_T *colp));
|
||||
static long find_match_text __ARGS((colnr_T startcol, int regstart, char_u *match_text));
|
||||
|
||||
/*
|
||||
* Estimate the chance of a match with "state" failing.
|
||||
@@ -4330,6 +4384,69 @@ skip_to_start(c, colp)
|
||||
return OK;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check for a match with match_text.
|
||||
* Called after skip_to_start() has find regstart.
|
||||
* Returns zero for no match, 1 for a match.
|
||||
*/
|
||||
static long
|
||||
find_match_text(startcol, regstart, match_text)
|
||||
colnr_T startcol;
|
||||
int regstart;
|
||||
char_u *match_text;
|
||||
{
|
||||
colnr_T col = startcol;
|
||||
int c1, c2;
|
||||
int len1, len2;
|
||||
int match;
|
||||
|
||||
for (;;)
|
||||
{
|
||||
match = TRUE;
|
||||
len2 = MB_CHAR2LEN(regstart); /* skip regstart */
|
||||
for (len1 = 0; match_text[len1] != NUL; len1 += MB_CHAR2LEN(c1))
|
||||
{
|
||||
c1 = PTR2CHAR(match_text + len1);
|
||||
c2 = PTR2CHAR(regline + col + len2);
|
||||
if (c1 != c2 && (!ireg_ic || MB_TOLOWER(c1) != MB_TOLOWER(c2)))
|
||||
{
|
||||
match = FALSE;
|
||||
break;
|
||||
}
|
||||
len2 += MB_CHAR2LEN(c2);
|
||||
}
|
||||
if (match
|
||||
#ifdef FEAT_MBYTE
|
||||
/* check that no composing char follows */
|
||||
&& !(enc_utf8
|
||||
&& utf_iscomposing(PTR2CHAR(regline + col + len2)))
|
||||
#endif
|
||||
)
|
||||
{
|
||||
cleanup_subexpr();
|
||||
if (REG_MULTI)
|
||||
{
|
||||
reg_startpos[0].lnum = reglnum;
|
||||
reg_startpos[0].col = col;
|
||||
reg_endpos[0].lnum = reglnum;
|
||||
reg_endpos[0].col = col + len2;
|
||||
}
|
||||
else
|
||||
{
|
||||
reg_startp[0] = regline + col;
|
||||
reg_endp[0] = regline + col + len2;
|
||||
}
|
||||
return 1L;
|
||||
}
|
||||
|
||||
/* Try finding regstart after the current match. */
|
||||
col += MB_CHAR2LEN(regstart); /* skip regstart */
|
||||
if (skip_to_start(regstart, &col) == FAIL)
|
||||
break;
|
||||
}
|
||||
return 0L;
|
||||
}
|
||||
|
||||
/*
|
||||
* Main matching routine.
|
||||
*
|
||||
@@ -5584,17 +5701,6 @@ nfa_regtry(prog, col)
|
||||
#endif
|
||||
|
||||
reginput = regline + col;
|
||||
need_clear_subexpr = TRUE;
|
||||
#ifdef FEAT_SYN_HL
|
||||
/* Clear the external match subpointers if necessary. */
|
||||
if (prog->reghasz == REX_SET)
|
||||
{
|
||||
nfa_has_zsubexpr = TRUE;
|
||||
need_clear_zsubexpr = TRUE;
|
||||
}
|
||||
else
|
||||
nfa_has_zsubexpr = FALSE;
|
||||
#endif
|
||||
|
||||
#ifdef ENABLE_LOG
|
||||
f = fopen(NFA_REGEXP_RUN_LOG, "a");
|
||||
@@ -5764,12 +5870,31 @@ nfa_regexec_both(line, startcol)
|
||||
if (prog->reganch && col > 0)
|
||||
return 0L;
|
||||
|
||||
need_clear_subexpr = TRUE;
|
||||
#ifdef FEAT_SYN_HL
|
||||
/* Clear the external match subpointers if necessary. */
|
||||
if (prog->reghasz == REX_SET)
|
||||
{
|
||||
nfa_has_zsubexpr = TRUE;
|
||||
need_clear_zsubexpr = TRUE;
|
||||
}
|
||||
else
|
||||
nfa_has_zsubexpr = FALSE;
|
||||
#endif
|
||||
|
||||
if (prog->regstart != NUL)
|
||||
{
|
||||
/* Skip ahead until a character we know the match must start with.
|
||||
* When there is none there is no match. */
|
||||
if (skip_to_start(prog->regstart, &col) == FAIL)
|
||||
return 0L;
|
||||
|
||||
/* If match_text is set it contains the full text that must match.
|
||||
* Nothing else to try. Doesn't handle combining chars well. */
|
||||
if (prog->match_text != NULL && !ireg_icombine)
|
||||
return find_match_text(col, prog->regstart, prog->match_text);
|
||||
}
|
||||
|
||||
/* If the start column is past the maximum column: no need to try. */
|
||||
if (ireg_maxcol > 0 && col >= ireg_maxcol)
|
||||
goto theend;
|
||||
@@ -5876,6 +6001,8 @@ nfa_regcomp(expr, re_flags)
|
||||
prog->reganch = nfa_get_reganch(prog->start, 0);
|
||||
prog->regstart = nfa_get_regstart(prog->start, 0);
|
||||
|
||||
prog->match_text = nfa_get_match_text(prog->start);
|
||||
|
||||
#ifdef ENABLE_LOG
|
||||
nfa_postfix_dump(expr, OK);
|
||||
nfa_dump(prog);
|
||||
@@ -5885,7 +6012,7 @@ nfa_regcomp(expr, re_flags)
|
||||
prog->reghasz = re_has_z;
|
||||
#endif
|
||||
#ifdef DEBUG
|
||||
prog->pattern = vim_strsave(expr); /* memory will leak */
|
||||
prog->pattern = vim_strsave(expr);
|
||||
nfa_regengine.expr = NULL;
|
||||
#endif
|
||||
|
||||
@@ -5907,6 +6034,22 @@ fail:
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* Free a compiled regexp program, returned by nfa_regcomp().
|
||||
*/
|
||||
static void
|
||||
nfa_regfree(prog)
|
||||
regprog_T *prog;
|
||||
{
|
||||
if (prog != NULL)
|
||||
{
|
||||
vim_free(((nfa_regprog_T *)prog)->match_text);
|
||||
#ifdef DEBUG
|
||||
vim_free(((nfa_regprog_T *)prog)->pattern);
|
||||
#endif
|
||||
vim_free(prog);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Match a regexp against a string.
|
||||
|
||||
Reference in New Issue
Block a user