mirror of
https://github.com/vim/vim.git
synced 2025-09-28 04:24:06 -04:00
updated for version 7.3.1137
Problem: New regexp engine: collections are slow. Solution: Handle all characters in one go.
This commit is contained in:
420
src/regexp_nfa.c
420
src/regexp_nfa.c
@@ -34,15 +34,23 @@ enum
|
||||
NFA_SPLIT = -1024,
|
||||
NFA_MATCH,
|
||||
NFA_SKIP_CHAR, /* matches a 0-length char */
|
||||
NFA_END_NEG_RANGE, /* Used when expanding [^ab] */
|
||||
|
||||
NFA_CONCAT,
|
||||
NFA_START_COLL, /* [abc] start */
|
||||
NFA_END_COLL, /* [abc] end */
|
||||
NFA_START_NEG_COLL, /* [^abc] start */
|
||||
NFA_END_NEG_COLL, /* [^abc] end (only used in postfix) */
|
||||
NFA_RANGE, /* range of the two previous items (only
|
||||
* used in postfix) */
|
||||
NFA_RANGE_MIN, /* low end of a range */
|
||||
NFA_RANGE_MAX, /* high end of a range */
|
||||
|
||||
NFA_CONCAT, /* concatenate two previous items (only
|
||||
* used in postfix) */
|
||||
NFA_OR,
|
||||
NFA_STAR, /* greedy * */
|
||||
NFA_STAR_NONGREEDY, /* non-greedy * */
|
||||
NFA_QUEST, /* greedy \? */
|
||||
NFA_QUEST_NONGREEDY, /* non-greedy \? */
|
||||
NFA_NOT, /* used for [^ab] negated char ranges */
|
||||
|
||||
NFA_BOL, /* ^ Begin line */
|
||||
NFA_EOL, /* $ End line */
|
||||
@@ -260,7 +268,7 @@ static int nfa_regcomp_start __ARGS((char_u *expr, int re_flags));
|
||||
static int nfa_get_reganch __ARGS((nfa_state_T *start, int depth));
|
||||
static int nfa_get_regstart __ARGS((nfa_state_T *start, int depth));
|
||||
static int nfa_recognize_char_class __ARGS((char_u *start, char_u *end, int extra_newl));
|
||||
static int nfa_emit_equi_class __ARGS((int c, int neg));
|
||||
static int nfa_emit_equi_class __ARGS((int c));
|
||||
static int nfa_regatom __ARGS((void));
|
||||
static int nfa_regpiece __ARGS((void));
|
||||
static int nfa_regconcat __ARGS((void));
|
||||
@@ -664,21 +672,10 @@ nfa_recognize_char_class(start, end, extra_newl)
|
||||
* NOTE! When changing this function, also update reg_equi_class()
|
||||
*/
|
||||
static int
|
||||
nfa_emit_equi_class(c, neg)
|
||||
nfa_emit_equi_class(c)
|
||||
int c;
|
||||
int neg;
|
||||
{
|
||||
int first = TRUE;
|
||||
int glue = neg == TRUE ? NFA_CONCAT : NFA_OR;
|
||||
#define EMIT2(c) \
|
||||
EMIT(c); \
|
||||
if (neg == TRUE) { \
|
||||
EMIT(NFA_NOT); \
|
||||
} \
|
||||
if (first == FALSE) \
|
||||
EMIT(glue); \
|
||||
else \
|
||||
first = FALSE; \
|
||||
#define EMIT2(c) EMIT(c); EMIT(NFA_CONCAT);
|
||||
|
||||
#ifdef FEAT_MBYTE
|
||||
if (enc_utf8 || STRCMP(p_enc, "latin1") == 0
|
||||
@@ -687,84 +684,84 @@ nfa_emit_equi_class(c, neg)
|
||||
{
|
||||
switch (c)
|
||||
{
|
||||
case 'A': case '\300': case '\301': case '\302':
|
||||
case '\303': case '\304': case '\305':
|
||||
EMIT2('A'); EMIT2('\300'); EMIT2('\301');
|
||||
EMIT2('\302'); EMIT2('\303'); EMIT2('\304');
|
||||
EMIT2('\305');
|
||||
case 'A': case 0300: case 0301: case 0302:
|
||||
case 0303: case 0304: case 0305:
|
||||
EMIT2('A'); EMIT2(0300); EMIT2(0301);
|
||||
EMIT2(0302); EMIT2(0303); EMIT2(0304);
|
||||
EMIT2(0305);
|
||||
return OK;
|
||||
|
||||
case 'C': case '\307':
|
||||
EMIT2('C'); EMIT2('\307');
|
||||
case 'C': case 0307:
|
||||
EMIT2('C'); EMIT2(0307);
|
||||
return OK;
|
||||
|
||||
case 'E': case '\310': case '\311': case '\312': case '\313':
|
||||
EMIT2('E'); EMIT2('\310'); EMIT2('\311');
|
||||
EMIT2('\312'); EMIT2('\313');
|
||||
case 'E': case 0310: case 0311: case 0312: case 0313:
|
||||
EMIT2('E'); EMIT2(0310); EMIT2(0311);
|
||||
EMIT2(0312); EMIT2(0313);
|
||||
return OK;
|
||||
|
||||
case 'I': case '\314': case '\315': case '\316': case '\317':
|
||||
EMIT2('I'); EMIT2('\314'); EMIT2('\315');
|
||||
EMIT2('\316'); EMIT2('\317');
|
||||
case 'I': case 0314: case 0315: case 0316: case 0317:
|
||||
EMIT2('I'); EMIT2(0314); EMIT2(0315);
|
||||
EMIT2(0316); EMIT2(0317);
|
||||
return OK;
|
||||
|
||||
case 'N': case '\321':
|
||||
EMIT2('N'); EMIT2('\321');
|
||||
case 'N': case 0321:
|
||||
EMIT2('N'); EMIT2(0321);
|
||||
return OK;
|
||||
|
||||
case 'O': case '\322': case '\323': case '\324': case '\325':
|
||||
case '\326':
|
||||
EMIT2('O'); EMIT2('\322'); EMIT2('\323');
|
||||
EMIT2('\324'); EMIT2('\325'); EMIT2('\326');
|
||||
case 'O': case 0322: case 0323: case 0324: case 0325:
|
||||
case 0326:
|
||||
EMIT2('O'); EMIT2(0322); EMIT2(0323);
|
||||
EMIT2(0324); EMIT2(0325); EMIT2(0326);
|
||||
return OK;
|
||||
|
||||
case 'U': case '\331': case '\332': case '\333': case '\334':
|
||||
EMIT2('U'); EMIT2('\331'); EMIT2('\332');
|
||||
EMIT2('\333'); EMIT2('\334');
|
||||
case 'U': case 0331: case 0332: case 0333: case 0334:
|
||||
EMIT2('U'); EMIT2(0331); EMIT2(0332);
|
||||
EMIT2(0333); EMIT2(0334);
|
||||
return OK;
|
||||
|
||||
case 'Y': case '\335':
|
||||
EMIT2('Y'); EMIT2('\335');
|
||||
case 'Y': case 0335:
|
||||
EMIT2('Y'); EMIT2(0335);
|
||||
return OK;
|
||||
|
||||
case 'a': case '\340': case '\341': case '\342':
|
||||
case '\343': case '\344': case '\345':
|
||||
EMIT2('a'); EMIT2('\340'); EMIT2('\341');
|
||||
EMIT2('\342'); EMIT2('\343'); EMIT2('\344');
|
||||
EMIT2('\345');
|
||||
case 'a': case 0340: case 0341: case 0342:
|
||||
case 0343: case 0344: case 0345:
|
||||
EMIT2('a'); EMIT2(0340); EMIT2(0341);
|
||||
EMIT2(0342); EMIT2(0343); EMIT2(0344);
|
||||
EMIT2(0345);
|
||||
return OK;
|
||||
|
||||
case 'c': case '\347':
|
||||
EMIT2('c'); EMIT2('\347');
|
||||
case 'c': case 0347:
|
||||
EMIT2('c'); EMIT2(0347);
|
||||
return OK;
|
||||
|
||||
case 'e': case '\350': case '\351': case '\352': case '\353':
|
||||
EMIT2('e'); EMIT2('\350'); EMIT2('\351');
|
||||
EMIT2('\352'); EMIT2('\353');
|
||||
case 'e': case 0350: case 0351: case 0352: case 0353:
|
||||
EMIT2('e'); EMIT2(0350); EMIT2(0351);
|
||||
EMIT2(0352); EMIT2(0353);
|
||||
return OK;
|
||||
|
||||
case 'i': case '\354': case '\355': case '\356': case '\357':
|
||||
EMIT2('i'); EMIT2('\354'); EMIT2('\355');
|
||||
EMIT2('\356'); EMIT2('\357');
|
||||
case 'i': case 0354: case 0355: case 0356: case 0357:
|
||||
EMIT2('i'); EMIT2(0354); EMIT2(0355);
|
||||
EMIT2(0356); EMIT2(0357);
|
||||
return OK;
|
||||
|
||||
case 'n': case '\361':
|
||||
EMIT2('n'); EMIT2('\361');
|
||||
case 'n': case 0361:
|
||||
EMIT2('n'); EMIT2(0361);
|
||||
return OK;
|
||||
|
||||
case 'o': case '\362': case '\363': case '\364': case '\365':
|
||||
case '\366':
|
||||
EMIT2('o'); EMIT2('\362'); EMIT2('\363');
|
||||
EMIT2('\364'); EMIT2('\365'); EMIT2('\366');
|
||||
case 'o': case 0362: case 0363: case 0364: case 0365:
|
||||
case 0366:
|
||||
EMIT2('o'); EMIT2(0362); EMIT2(0363);
|
||||
EMIT2(0364); EMIT2(0365); EMIT2(0366);
|
||||
return OK;
|
||||
|
||||
case 'u': case '\371': case '\372': case '\373': case '\374':
|
||||
EMIT2('u'); EMIT2('\371'); EMIT2('\372');
|
||||
EMIT2('\373'); EMIT2('\374');
|
||||
case 'u': case 0371: case 0372: case 0373: case 0374:
|
||||
EMIT2('u'); EMIT2(0371); EMIT2(0372);
|
||||
EMIT2(0373); EMIT2(0374);
|
||||
return OK;
|
||||
|
||||
case 'y': case '\375': case '\377':
|
||||
EMIT2('y'); EMIT2('\375'); EMIT2('\377');
|
||||
case 'y': case 0375: case 0377:
|
||||
EMIT2('y'); EMIT2(0375); EMIT2(0377);
|
||||
return OK;
|
||||
|
||||
default:
|
||||
@@ -811,14 +808,12 @@ nfa_regatom()
|
||||
char_u *old_regparse = regparse;
|
||||
#endif
|
||||
int extra = 0;
|
||||
int first;
|
||||
int emit_range;
|
||||
int negated;
|
||||
int result;
|
||||
int startc = -1;
|
||||
int endc = -1;
|
||||
int oldstartc = -1;
|
||||
int glue; /* ID that will "glue" nodes together */
|
||||
|
||||
c = getchr();
|
||||
switch (c)
|
||||
@@ -927,8 +922,8 @@ nfa_regatom()
|
||||
|
||||
case Magic('n'):
|
||||
if (reg_string)
|
||||
/* In a string "\n" matches a newline character. */
|
||||
EMIT(NL);
|
||||
/* In a string "\n" matches a newline character. */
|
||||
EMIT(NL);
|
||||
else
|
||||
{
|
||||
/* In buffer text "\n" matches the end of a line. */
|
||||
@@ -1160,32 +1155,15 @@ nfa_regatom()
|
||||
case Magic('['):
|
||||
collection:
|
||||
/*
|
||||
* Glue is emitted between several atoms from the [].
|
||||
* It is either NFA_OR, or NFA_CONCAT.
|
||||
*
|
||||
* [abc] expands to 'a b NFA_OR c NFA_OR' (in postfix notation)
|
||||
* [^abc] expands to 'a NFA_NOT b NFA_NOT NFA_CONCAT c NFA_NOT
|
||||
* NFA_CONCAT NFA_END_NEG_RANGE NFA_CONCAT' (in postfix
|
||||
* notation)
|
||||
*
|
||||
* [abc] uses NFA_START_COLL - NFA_END_COLL
|
||||
* [^abc] uses NFA_START_NEG_COLL - NFA_END_NEG_COLL
|
||||
* Each character is produced as a regular state, using
|
||||
* NFA_CONCAT to bind them together.
|
||||
* Besides normal characters there can be:
|
||||
* - character classes NFA_CLASS_*
|
||||
* - ranges, two characters followed by NFA_RANGE.
|
||||
*/
|
||||
|
||||
|
||||
/* Emit negation atoms, if needed.
|
||||
* The CONCAT below merges the NOT with the previous node. */
|
||||
#define TRY_NEG() \
|
||||
if (negated == TRUE) \
|
||||
{ \
|
||||
EMIT(NFA_NOT); \
|
||||
}
|
||||
|
||||
/* Emit glue between important nodes : CONCAT or OR. */
|
||||
#define EMIT_GLUE() \
|
||||
if (first == FALSE) \
|
||||
EMIT(glue); \
|
||||
else \
|
||||
first = FALSE;
|
||||
|
||||
p = regparse;
|
||||
endp = skip_anyof(p);
|
||||
if (*endp == ']')
|
||||
@@ -1216,21 +1194,20 @@ collection:
|
||||
* version that turns [abc] into 'a' OR 'b' OR 'c'
|
||||
*/
|
||||
startc = endc = oldstartc = -1;
|
||||
first = TRUE; /* Emitting first atom in this sequence? */
|
||||
negated = FALSE;
|
||||
glue = NFA_OR;
|
||||
if (*regparse == '^') /* negated range */
|
||||
{
|
||||
negated = TRUE;
|
||||
glue = NFA_CONCAT;
|
||||
mb_ptr_adv(regparse);
|
||||
EMIT(NFA_START_NEG_COLL);
|
||||
}
|
||||
else
|
||||
EMIT(NFA_START_COLL);
|
||||
if (*regparse == '-')
|
||||
{
|
||||
startc = '-';
|
||||
EMIT(startc);
|
||||
TRY_NEG();
|
||||
EMIT_GLUE();
|
||||
EMIT(NFA_CONCAT);
|
||||
mb_ptr_adv(regparse);
|
||||
}
|
||||
/* Emit the OR branches for each character in the [] */
|
||||
@@ -1306,20 +1283,18 @@ collection:
|
||||
EMIT(NFA_CLASS_ESCAPE);
|
||||
break;
|
||||
}
|
||||
TRY_NEG();
|
||||
EMIT_GLUE();
|
||||
EMIT(NFA_CONCAT);
|
||||
continue;
|
||||
}
|
||||
/* Try equivalence class [=a=] and the like */
|
||||
if (equiclass != 0)
|
||||
{
|
||||
result = nfa_emit_equi_class(equiclass, negated);
|
||||
result = nfa_emit_equi_class(equiclass);
|
||||
if (result == FAIL)
|
||||
{
|
||||
/* should never happen */
|
||||
EMSG_RET_FAIL(_("E868: Error building NFA with equivalence class!"));
|
||||
}
|
||||
EMIT_GLUE();
|
||||
continue;
|
||||
}
|
||||
/* Try collating class like [. .] */
|
||||
@@ -1391,19 +1366,32 @@ collection:
|
||||
startc = oldstartc;
|
||||
if (startc > endc)
|
||||
EMSG_RET_FAIL(_(e_invrange));
|
||||
|
||||
if (endc > startc + 2)
|
||||
{
|
||||
/* Emit a range instead of the sequence of
|
||||
* individual characters. */
|
||||
if (startc == 0)
|
||||
/* \x00 is translated to \x0a, start at \x01. */
|
||||
EMIT(1);
|
||||
else
|
||||
--post_ptr; /* remove NFA_CONCAT */
|
||||
EMIT(endc);
|
||||
EMIT(NFA_RANGE);
|
||||
EMIT(NFA_CONCAT);
|
||||
}
|
||||
else
|
||||
#ifdef FEAT_MBYTE
|
||||
if (has_mbyte && ((*mb_char2len)(startc) > 1
|
||||
if (has_mbyte && ((*mb_char2len)(startc) > 1
|
||||
|| (*mb_char2len)(endc) > 1))
|
||||
{
|
||||
if (endc > startc + 256)
|
||||
EMSG_RET_FAIL(_(e_invrange));
|
||||
/* Emit the range. "startc" was already emitted, so
|
||||
* skip it. */
|
||||
/* Emit the characters in the range.
|
||||
* "startc" was already emitted, so skip it.
|
||||
* */
|
||||
for (c = startc + 1; c <= endc; c++)
|
||||
{
|
||||
EMIT(c);
|
||||
TRY_NEG();
|
||||
EMIT_GLUE();
|
||||
EMIT(NFA_CONCAT);
|
||||
}
|
||||
}
|
||||
else
|
||||
@@ -1425,8 +1413,7 @@ collection:
|
||||
#endif
|
||||
{
|
||||
EMIT(c);
|
||||
TRY_NEG();
|
||||
EMIT_GLUE();
|
||||
EMIT(NFA_CONCAT);
|
||||
}
|
||||
}
|
||||
emit_range = FALSE;
|
||||
@@ -1434,23 +1421,29 @@ collection:
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* This char (startc) is not part of a range. Just
|
||||
/* This char (startc) is not part of a range. Just
|
||||
* emit it.
|
||||
*
|
||||
* Normally, simply emit startc. But if we get char
|
||||
* code=0 from a collating char, then replace it with
|
||||
* 0x0a.
|
||||
*
|
||||
* This is needed to completely mimic the behaviour of
|
||||
* the backtracking engine.
|
||||
*/
|
||||
if (got_coll_char == TRUE && startc == 0)
|
||||
EMIT(0x0a);
|
||||
* the backtracking engine. */
|
||||
if (startc == NFA_NEWL)
|
||||
{
|
||||
/* Line break can't be matched as part of the
|
||||
* collection, add an OR below. But not for negated
|
||||
* range. */
|
||||
if (!negated)
|
||||
extra = ADD_NL;
|
||||
}
|
||||
else
|
||||
EMIT(startc);
|
||||
TRY_NEG();
|
||||
EMIT_GLUE();
|
||||
{
|
||||
if (got_coll_char == TRUE && startc == 0)
|
||||
EMIT(0x0a);
|
||||
else
|
||||
EMIT(startc);
|
||||
EMIT(NFA_CONCAT);
|
||||
}
|
||||
}
|
||||
|
||||
mb_ptr_adv(regparse);
|
||||
@@ -1460,20 +1453,19 @@ collection:
|
||||
if (*regparse == '-') /* if last, '-' is just a char */
|
||||
{
|
||||
EMIT('-');
|
||||
TRY_NEG();
|
||||
EMIT_GLUE();
|
||||
EMIT(NFA_CONCAT);
|
||||
}
|
||||
mb_ptr_adv(regparse);
|
||||
|
||||
/* skip the trailing ] */
|
||||
regparse = endp;
|
||||
mb_ptr_adv(regparse);
|
||||
|
||||
/* Mark end of the collection. */
|
||||
if (negated == TRUE)
|
||||
{
|
||||
/* Mark end of negated char range */
|
||||
EMIT(NFA_END_NEG_RANGE);
|
||||
EMIT(NFA_CONCAT);
|
||||
}
|
||||
EMIT(NFA_END_NEG_COLL);
|
||||
else
|
||||
EMIT(NFA_END_COLL);
|
||||
|
||||
/* \_[] also matches \n but it's not negated */
|
||||
if (extra == ADD_NL)
|
||||
@@ -1532,9 +1524,6 @@ nfa_do_multibyte:
|
||||
}
|
||||
}
|
||||
|
||||
#undef TRY_NEG
|
||||
#undef EMIT_GLUE
|
||||
|
||||
return OK;
|
||||
}
|
||||
|
||||
@@ -2091,10 +2080,17 @@ nfa_set_code(c)
|
||||
case NFA_STAR_NONGREEDY: STRCPY(code, "NFA_STAR_NONGREEDY "); break;
|
||||
case NFA_QUEST: STRCPY(code, "NFA_QUEST"); break;
|
||||
case NFA_QUEST_NONGREEDY: STRCPY(code, "NFA_QUEST_NON_GREEDY"); break;
|
||||
case NFA_NOT: STRCPY(code, "NFA_NOT "); break;
|
||||
case NFA_SKIP_CHAR: STRCPY(code, "NFA_SKIP_CHAR"); break;
|
||||
case NFA_OR: STRCPY(code, "NFA_OR"); break;
|
||||
case NFA_END_NEG_RANGE: STRCPY(code, "NFA_END_NEG_RANGE"); break;
|
||||
|
||||
case NFA_START_COLL: STRCPY(code, "NFA_START_COLL"); break;
|
||||
case NFA_END_COLL: STRCPY(code, "NFA_END_COLL"); break;
|
||||
case NFA_START_NEG_COLL: STRCPY(code, "NFA_START_NEG_COLL"); break;
|
||||
case NFA_END_NEG_COLL: STRCPY(code, "NFA_END_NEG_COLL"); break;
|
||||
case NFA_RANGE: STRCPY(code, "NFA_RANGE"); break;
|
||||
case NFA_RANGE_MIN: STRCPY(code, "NFA_RANGE_MIN"); break;
|
||||
case NFA_RANGE_MAX: STRCPY(code, "NFA_RANGE_MAX"); break;
|
||||
|
||||
case NFA_CLASS_ALNUM: STRCPY(code, "NFA_CLASS_ALNUM"); break;
|
||||
case NFA_CLASS_ALPHA: STRCPY(code, "NFA_CLASS_ALPHA"); break;
|
||||
case NFA_CLASS_BLANK: STRCPY(code, "NFA_CLASS_BLANK"); break;
|
||||
@@ -2231,8 +2227,12 @@ nfa_print_state2(debugf, state, indent)
|
||||
fprintf(debugf, " %s", p);
|
||||
|
||||
nfa_set_code(state->c);
|
||||
fprintf(debugf, "%s%s (%d) (id=%d)\n",
|
||||
state->negated ? "NOT " : "", code, state->c, abs(state->id));
|
||||
fprintf(debugf, "%s%s (%d) (id=%d) val=%d\n",
|
||||
state->negated ? "NOT " : "",
|
||||
code,
|
||||
state->c,
|
||||
abs(state->id),
|
||||
state->val);
|
||||
if (state->id < 0)
|
||||
return;
|
||||
|
||||
@@ -2325,6 +2325,7 @@ alloc_state(c, out, out1)
|
||||
s->c = c;
|
||||
s->out = out;
|
||||
s->out1 = out1;
|
||||
s->val = 0;
|
||||
|
||||
s->id = istate;
|
||||
s->lastlist[0] = 0;
|
||||
@@ -2565,13 +2566,10 @@ post2nfa(postfix, end, nfa_calc_size)
|
||||
switch (*p)
|
||||
{
|
||||
case NFA_CONCAT:
|
||||
/* Catenation.
|
||||
* Pay attention: this operator does not exist
|
||||
* in the r.e. itself (it is implicit, really).
|
||||
* It is added when r.e. is translated to postfix
|
||||
* form in re2post().
|
||||
*
|
||||
* No new state added here. */
|
||||
/* Concatenation.
|
||||
* Pay attention: this operator does not exist in the r.e. itself
|
||||
* (it is implicit, really). It is added when r.e. is translated
|
||||
* to postfix form in re2post(). */
|
||||
if (nfa_calc_size == TRUE)
|
||||
{
|
||||
/* nstate += 0; */
|
||||
@@ -2583,22 +2581,6 @@ post2nfa(postfix, end, nfa_calc_size)
|
||||
PUSH(frag(e1.start, e2.out));
|
||||
break;
|
||||
|
||||
case NFA_NOT:
|
||||
/* Negation of a character */
|
||||
if (nfa_calc_size == TRUE)
|
||||
{
|
||||
/* nstate += 0; */
|
||||
break;
|
||||
}
|
||||
e1 = POP();
|
||||
e1.start->negated = TRUE;
|
||||
#ifdef FEAT_MBYTE
|
||||
if (e1.start->c == NFA_COMPOSING)
|
||||
e1.start->out1->negated = TRUE;
|
||||
#endif
|
||||
PUSH(e1);
|
||||
break;
|
||||
|
||||
case NFA_OR:
|
||||
/* Alternation */
|
||||
if (nfa_calc_size == TRUE)
|
||||
@@ -2672,6 +2654,43 @@ post2nfa(postfix, end, nfa_calc_size)
|
||||
PUSH(frag(s, append(e.out, list1(&s->out))));
|
||||
break;
|
||||
|
||||
case NFA_END_COLL:
|
||||
case NFA_END_NEG_COLL:
|
||||
/* On the stack is the sequence starting with NFA_START_COLL or
|
||||
* NFA_START_NEG_COLL and all possible characters. Patch it to
|
||||
* add the output to the start. */
|
||||
if (nfa_calc_size == TRUE)
|
||||
{
|
||||
nstate++;
|
||||
break;
|
||||
}
|
||||
e = POP();
|
||||
s = alloc_state(NFA_END_COLL, NULL, NULL);
|
||||
if (s == NULL)
|
||||
goto theend;
|
||||
patch(e.out, s);
|
||||
e.start->out1 = s;
|
||||
PUSH(frag(e.start, list1(&s->out)));
|
||||
break;
|
||||
|
||||
case NFA_RANGE:
|
||||
/* Before this are two characters, the low and high end of a
|
||||
* range. Turn them into two states with MIN and MAX. */
|
||||
if (nfa_calc_size == TRUE)
|
||||
{
|
||||
/* nstate += 0; */
|
||||
break;
|
||||
}
|
||||
e2 = POP();
|
||||
e1 = POP();
|
||||
e2.start->val = e2.start->c;
|
||||
e2.start->c = NFA_RANGE_MAX;
|
||||
e1.start->val = e1.start->c;
|
||||
e1.start->c = NFA_RANGE_MIN;
|
||||
patch(e1.out, e2.start);
|
||||
PUSH(frag(e1.start, e2.out));
|
||||
break;
|
||||
|
||||
case NFA_SKIP_CHAR:
|
||||
/* Symbol of 0-length, Used in a repetition
|
||||
* with max/min count of 0 */
|
||||
@@ -2990,6 +3009,8 @@ post2nfa(postfix, end, nfa_calc_size)
|
||||
matchstate = &state_ptr[istate++]; /* the match state */
|
||||
matchstate->c = NFA_MATCH;
|
||||
matchstate->out = matchstate->out1 = NULL;
|
||||
matchstate->negated = FALSE;
|
||||
matchstate->id = 0;
|
||||
|
||||
patch(e.out, matchstate);
|
||||
ret = e.start;
|
||||
@@ -3308,7 +3329,6 @@ addstate(l, state, subs, off)
|
||||
switch (state->c)
|
||||
{
|
||||
case NFA_SPLIT:
|
||||
case NFA_NOT:
|
||||
case NFA_NOPEN:
|
||||
case NFA_SKIP_CHAR:
|
||||
case NFA_NCLOSE:
|
||||
@@ -3782,7 +3802,8 @@ check_char_class(class, c)
|
||||
|
||||
default:
|
||||
/* should not be here :P */
|
||||
EMSG_RET_FAIL(_("E877: (NFA regexp) Invalid character class "));
|
||||
EMSGN("E877: (NFA regexp) Invalid character class: %ld", class);
|
||||
return FAIL;
|
||||
}
|
||||
return FAIL;
|
||||
}
|
||||
@@ -4320,8 +4341,8 @@ nfa_regmatch(prog, start, submatch, m)
|
||||
addstate(thislist, start, m, 0);
|
||||
|
||||
/* There are two cases when the NFA advances: 1. input char matches the
|
||||
* NFA node and 2. input char does not match the NFA node, but the next
|
||||
* node is NFA_NOT. The following macro calls addstate() according to
|
||||
* NFA node and 2. input char does not match the NFA node and the state
|
||||
* has the negated flag. The following macro calls addstate() according to
|
||||
* these rules. It is used A LOT, so use the "listtbl" table for speed */
|
||||
listtbl[0][0] = NULL;
|
||||
listtbl[0][1] = neglist;
|
||||
@@ -4845,16 +4866,79 @@ nfa_regmatch(prog, start, submatch, m)
|
||||
ADD_POS_NEG_STATE(t->state);
|
||||
break;
|
||||
|
||||
case NFA_END_NEG_RANGE:
|
||||
/* This follows a series of negated nodes, like:
|
||||
* NOT CHAR(x), NOT CHAR(y), etc. */
|
||||
if (curc > 0)
|
||||
case NFA_START_COLL:
|
||||
case NFA_START_NEG_COLL:
|
||||
{
|
||||
/* What follows is a list of characters, until NFA_END_COLL.
|
||||
* One of them must match or none of them must match. */
|
||||
nfa_state_T *state;
|
||||
int result_if_matched;
|
||||
int c1, c2;
|
||||
|
||||
/* Never match EOL. If it's part of the collection it is added
|
||||
* as a separate state with an OR. */
|
||||
if (curc == NUL)
|
||||
break;
|
||||
|
||||
state = t->state->out;
|
||||
result_if_matched = (t->state->c == NFA_START_COLL);
|
||||
for (;;)
|
||||
{
|
||||
if (state->c == NFA_END_COLL)
|
||||
{
|
||||
result = !result_if_matched;
|
||||
break;
|
||||
}
|
||||
if (state->c == NFA_RANGE_MIN)
|
||||
{
|
||||
c1 = state->val;
|
||||
state = state->out; /* advance to NFA_RANGE_MAX */
|
||||
c2 = state->val;
|
||||
#ifdef ENABLE_LOG
|
||||
fprintf(log_fd, "NFA_RANGE_MIN curc=%d c1=%d c2=%d\n",
|
||||
curc, c1, c2);
|
||||
#endif
|
||||
if (curc >= c1 && curc <= c2)
|
||||
{
|
||||
result = result_if_matched;
|
||||
break;
|
||||
}
|
||||
if (ireg_ic)
|
||||
{
|
||||
int curc_low = MB_TOLOWER(curc);
|
||||
int done = FALSE;
|
||||
|
||||
for ( ; c1 <= c2; ++c1)
|
||||
if (MB_TOLOWER(c1) == curc_low)
|
||||
{
|
||||
result = result_if_matched;
|
||||
done = TRUE;
|
||||
break;
|
||||
}
|
||||
if (done)
|
||||
break;
|
||||
}
|
||||
}
|
||||
else if (state->c < 0 ? check_char_class(state->c, curc)
|
||||
: (curc == state->c
|
||||
|| (ireg_ic && MB_TOLOWER(curc)
|
||||
== MB_TOLOWER(state->c))))
|
||||
{
|
||||
result = result_if_matched;
|
||||
break;
|
||||
}
|
||||
state = state->out;
|
||||
}
|
||||
if (result)
|
||||
{
|
||||
/* next state is in out of the NFA_END_COLL, out1 of
|
||||
* START points to the END state */
|
||||
ll = nextlist;
|
||||
add_state = t->state->out;
|
||||
add_state = t->state->out1->out;
|
||||
add_off = clen;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case NFA_ANY:
|
||||
/* Any char except '\0', (end of input) does not match. */
|
||||
|
@@ -728,6 +728,8 @@ static char *(features[]) =
|
||||
|
||||
static int included_patches[] =
|
||||
{ /* Add new patch number below this line */
|
||||
/**/
|
||||
1137,
|
||||
/**/
|
||||
1136,
|
||||
/**/
|
||||
|
Reference in New Issue
Block a user