0
0
mirror of https://github.com/vim/vim.git synced 2025-09-25 03:54:15 -04:00

updated for version 7.3.1106

Problem:    New regexp engine: saving and restoring lastlist in the states
            takes a lot of time.
Solution:   Use a second lastlist value for the first recursive call.
This commit is contained in:
Bram Moolenaar
2013-06-03 12:17:04 +02:00
parent f46da70603
commit dd2ccdf6ea
3 changed files with 73 additions and 33 deletions

View File

@@ -72,7 +72,7 @@ struct nfa_state
nfa_state_T *out; nfa_state_T *out;
nfa_state_T *out1; nfa_state_T *out1;
int id; int id;
int lastlist; int lastlist[2]; /* 0: normal, 1: recursive */
int negated; int negated;
int val; int val;
}; };

View File

@@ -255,6 +255,15 @@ static int istate; /* Index in the state vector, used in alloc_state() */
/* If not NULL match must end at this position */ /* If not NULL match must end at this position */
static save_se_T *nfa_endp = NULL; static save_se_T *nfa_endp = NULL;
/* listid is global, so that it increases on recursive calls to
* nfa_regmatch(), which means we don't have to clear the lastlist field of
* all the states. */
static int nfa_listid;
static int nfa_alt_listid;
/* 0 for first call to nfa_regmatch(), 1 for recursive call. */
static int nfa_ll_index = 0;
static int nfa_regcomp_start __ARGS((char_u*expr, int re_flags)); static int nfa_regcomp_start __ARGS((char_u*expr, int re_flags));
static int nfa_recognize_char_class __ARGS((char_u *start, char_u *end, int extra_newl)); static int nfa_recognize_char_class __ARGS((char_u *start, char_u *end, int extra_newl));
static int nfa_emit_equi_class __ARGS((int c, int neg)); static int nfa_emit_equi_class __ARGS((int c, int neg));
@@ -2169,7 +2178,8 @@ alloc_state(c, out, out1)
s->out1 = out1; s->out1 = out1;
s->id = istate; s->id = istate;
s->lastlist = 0; s->lastlist[0] = 0;
s->lastlist[1] = 0;
s->negated = FALSE; s->negated = FALSE;
return s; return s;
@@ -3113,9 +3123,9 @@ addstate(l, state, subs, off)
#endif #endif
/* These nodes do not need to be added, but we need to bail out /* These nodes do not need to be added, but we need to bail out
* when it was tried to be added to this list before. */ * when it was tried to be added to this list before. */
if (state->lastlist == l->id) if (state->lastlist[nfa_ll_index] == l->id)
goto skip_add; goto skip_add;
state->lastlist = l->id; state->lastlist[nfa_ll_index] = l->id;
break; break;
case NFA_BOL: case NFA_BOL:
@@ -3131,7 +3141,7 @@ addstate(l, state, subs, off)
/* FALLTHROUGH */ /* FALLTHROUGH */
default: default:
if (state->lastlist == l->id) if (state->lastlist[nfa_ll_index] == l->id)
{ {
/* This state is already in the list, don't add it again, /* This state is already in the list, don't add it again,
* unless it is an MOPEN that is used for a backreference. */ * unless it is an MOPEN that is used for a backreference. */
@@ -3173,7 +3183,7 @@ skip_add:
} }
/* add the state to the list */ /* add the state to the list */
state->lastlist = l->id; state->lastlist[nfa_ll_index] = l->id;
thread = &l->t[l->n++]; thread = &l->t[l->n++];
thread->state = state; thread->state = state;
copy_sub(&thread->subs.norm, &subs->norm); copy_sub(&thread->subs.norm, &subs->norm);
@@ -3616,6 +3626,7 @@ match_zref(subidx, bytelen)
/* /*
* Save list IDs for all NFA states of "prog" into "list". * Save list IDs for all NFA states of "prog" into "list".
* Also reset the IDs to zero. * Also reset the IDs to zero.
* Only used for the recursive value lastlist[1].
*/ */
static void static void
nfa_save_listids(prog, list) nfa_save_listids(prog, list)
@@ -3629,8 +3640,8 @@ nfa_save_listids(prog, list)
p = &prog->state[0]; p = &prog->state[0];
for (i = prog->nstate; --i >= 0; ) for (i = prog->nstate; --i >= 0; )
{ {
list[i] = p->lastlist; list[i] = p->lastlist[1];
p->lastlist = 0; p->lastlist[1] = 0;
++p; ++p;
} }
} }
@@ -3649,7 +3660,7 @@ nfa_restore_listids(prog, list)
p = &prog->state[0]; p = &prog->state[0];
for (i = prog->nstate; --i >= 0; ) for (i = prog->nstate; --i >= 0; )
{ {
p->lastlist = list[i]; p->lastlist[1] = list[i];
++p; ++p;
} }
} }
@@ -3683,10 +3694,12 @@ recursive_regmatch(state, prog, submatch, m, listids)
char_u *save_regline = regline; char_u *save_regline = regline;
int save_reglnum = reglnum; int save_reglnum = reglnum;
int save_nfa_match = nfa_match; int save_nfa_match = nfa_match;
int save_nfa_listid = nfa_listid;
save_se_T *save_nfa_endp = nfa_endp; save_se_T *save_nfa_endp = nfa_endp;
save_se_T endpos; save_se_T endpos;
save_se_T *endposp = NULL; save_se_T *endposp = NULL;
int result; int result;
int need_restore = FALSE;
if (state->c == NFA_START_INVISIBLE_BEFORE) if (state->c == NFA_START_INVISIBLE_BEFORE)
{ {
@@ -3745,30 +3758,52 @@ recursive_regmatch(state, prog, submatch, m, listids)
} }
} }
/* Call nfa_regmatch() to check if the current concat matches
* at this position. The concat ends with the node
* NFA_END_INVISIBLE */
if (*listids == NULL)
{
*listids = (int *)lalloc(sizeof(int) * nstate, TRUE);
if (*listids == NULL)
{
EMSG(_("E878: (NFA) Could not allocate memory for branch traversal!"));
return 0;
}
}
#ifdef ENABLE_LOG #ifdef ENABLE_LOG
if (log_fd != stderr) if (log_fd != stderr)
fclose(log_fd); fclose(log_fd);
log_fd = NULL; log_fd = NULL;
#endif #endif
/* Have to clear the listid field of the NFA nodes, so that /* Have to clear the lastlist field of the NFA nodes, so that
* nfa_regmatch() and addstate() can run properly after * nfa_regmatch() and addstate() can run properly after recursion. */
* recursion. */ if (nfa_ll_index == 1)
nfa_save_listids(prog, *listids); {
/* Already calling nfa_regmatch() recursively. Save the lastlist[1]
* values and clear them. */
if (*listids == NULL)
{
*listids = (int *)lalloc(sizeof(int) * nstate, TRUE);
if (*listids == NULL)
{
EMSG(_("E878: (NFA) Could not allocate memory for branch traversal!"));
return 0;
}
}
nfa_save_listids(prog, *listids);
need_restore = TRUE;
/* any value of nfa_listid will do */
}
else
{
/* First recursive nfa_regmatch() call, switch to the second lastlist
* entry. Make sure nfa_listid is different from a previous recursive
* call, because some states may still have this ID. */
++nfa_ll_index;
if (nfa_listid <= nfa_alt_listid)
nfa_listid = nfa_alt_listid;
}
/* Call nfa_regmatch() to check if the current concat matches at this
* position. The concat ends with the node NFA_END_INVISIBLE */
nfa_endp = endposp; nfa_endp = endposp;
result = nfa_regmatch(prog, state->out, submatch, m); result = nfa_regmatch(prog, state->out, submatch, m);
nfa_restore_listids(prog, *listids);
if (need_restore)
nfa_restore_listids(prog, *listids);
else
{
--nfa_ll_index;
nfa_alt_listid = nfa_listid;
}
/* restore position in input text */ /* restore position in input text */
reginput = save_reginput; reginput = save_reginput;
@@ -3776,6 +3811,7 @@ recursive_regmatch(state, prog, submatch, m, listids)
reglnum = save_reglnum; reglnum = save_reglnum;
nfa_match = save_nfa_match; nfa_match = save_nfa_match;
nfa_endp = save_nfa_endp; nfa_endp = save_nfa_endp;
nfa_listid = save_nfa_listid;
#ifdef ENABLE_LOG #ifdef ENABLE_LOG
log_fd = fopen(NFA_REGEXP_RUN_LOG, "a"); log_fd = fopen(NFA_REGEXP_RUN_LOG, "a");
@@ -3821,7 +3857,6 @@ nfa_regmatch(prog, start, submatch, m)
nfa_list_T list[3]; nfa_list_T list[3];
nfa_list_T *listtbl[2][2]; nfa_list_T *listtbl[2][2];
nfa_list_T *ll; nfa_list_T *ll;
int listid = 1;
int listidx; int listidx;
nfa_list_T *thislist; nfa_list_T *thislist;
nfa_list_T *nextlist; nfa_list_T *nextlist;
@@ -3875,7 +3910,7 @@ nfa_regmatch(prog, start, submatch, m)
#ifdef ENABLE_LOG #ifdef ENABLE_LOG
fprintf(log_fd, "(---) STARTSTATE\n"); fprintf(log_fd, "(---) STARTSTATE\n");
#endif #endif
thislist->id = listid; thislist->id = nfa_listid + 1;
addstate(thislist, start, m, 0); addstate(thislist, start, m, 0);
/* There are two cases when the NFA advances: 1. input char matches the /* There are two cases when the NFA advances: 1. input char matches the
@@ -3923,10 +3958,10 @@ nfa_regmatch(prog, start, submatch, m)
nextlist = &list[flag ^= 1]; nextlist = &list[flag ^= 1];
nextlist->n = 0; /* clear nextlist */ nextlist->n = 0; /* clear nextlist */
listtbl[1][0] = nextlist; listtbl[1][0] = nextlist;
++listid; ++nfa_listid;
thislist->id = listid; thislist->id = nfa_listid;
nextlist->id = listid + 1; nextlist->id = nfa_listid + 1;
neglist->id = listid + 1; neglist->id = nfa_listid + 1;
#ifdef ENABLE_LOG #ifdef ENABLE_LOG
fprintf(log_fd, "------------------------------------------\n"); fprintf(log_fd, "------------------------------------------\n");
@@ -4843,6 +4878,8 @@ nfa_regexec_both(line, col)
nfa_has_zend = prog->has_zend; nfa_has_zend = prog->has_zend;
nfa_has_backref = prog->has_backref; nfa_has_backref = prog->has_backref;
nfa_nsubexpr = prog->nsubexp; nfa_nsubexpr = prog->nsubexp;
nfa_listid = 1;
nfa_alt_listid = 2;
#ifdef DEBUG #ifdef DEBUG
nfa_regengine.expr = prog->pattern; nfa_regengine.expr = prog->pattern;
#endif #endif
@@ -4851,7 +4888,8 @@ nfa_regexec_both(line, col)
for (i = 0; i < nstate; ++i) for (i = 0; i < nstate; ++i)
{ {
prog->state[i].id = i; prog->state[i].id = i;
prog->state[i].lastlist = 0; prog->state[i].lastlist[0] = 0;
prog->state[i].lastlist[1] = 0;
} }
retval = nfa_regtry(prog, col); retval = nfa_regtry(prog, col);

View File

@@ -728,6 +728,8 @@ static char *(features[]) =
static int included_patches[] = static int included_patches[] =
{ /* Add new patch number below this line */ { /* Add new patch number below this line */
/**/
1106,
/**/ /**/
1105, 1105,
/**/ /**/