0
0
mirror of https://github.com/vim/vim.git synced 2025-09-25 03:54:15 -04:00

updated for version 7.3.970

Problem:    Syntax highlighting can be slow.
Solution:   Include the NFA regexp engine.  Add the 'regexpengine' option to
            select which one is used. (various authors, including Ken Takata,
            Andrei Aiordachioaie, Russ Cox, Xiaozhou Liua, Ian Young)
This commit is contained in:
Bram Moolenaar
2013-05-19 19:40:29 +02:00
parent 6fa41fb374
commit fbc0d2ea1e
24 changed files with 4809 additions and 236 deletions

View File

@@ -57,6 +57,7 @@ SRC_ALL = \
src/popupmnu.c \ src/popupmnu.c \
src/quickfix.c \ src/quickfix.c \
src/regexp.c \ src/regexp.c \
src/regexp_nfa.c \
src/regexp.h \ src/regexp.h \
src/screen.c \ src/screen.c \
src/search.c \ src/search.c \

View File

@@ -1,4 +1,4 @@
*pattern.txt* For Vim version 7.3. Last change: 2013 Apr 20 *pattern.txt* For Vim version 7.3. Last change: 2013 May 17
VIM REFERENCE MANUAL by Bram Moolenaar VIM REFERENCE MANUAL by Bram Moolenaar
@@ -350,6 +350,27 @@ For starters, read chapter 27 of the user manual |usr_27.txt|.
or \z( pattern \) |/\z(| or \z( pattern \) |/\z(|
*/\%#=* *two-engines*
Vim includes two regexp engines:
1. An old, backtracking engine that supports everything.
2. A new, NFA engine that works much faster on some patterns, but does not
support everything.
Vim will automatically select the right engine for you. However, if you run
into a problem or want to specifically select one engine or the other, you can
prepend one of the following to the pattern:
\%#=0 Force automatic selection. Only has an effect when
'regexpengine' has been set to a non-zero value.
\%#=1 Force using the old engine.
\%#=2 Force using the NFA engine.
You can also use the 'regexpengine' option to change the default.
*E864* *E868* *E874* *E875* *E876* *E877* *E878*
If selecting the NFA engine and it runs into something that is not implemented
the pattern will not match. This is only useful when debugging Vim.
============================================================================== ==============================================================================
3. Magic */magic* 3. Magic */magic*
@@ -396,9 +417,10 @@ pattern.
============================================================================== ==============================================================================
4. Overview of pattern items *pattern-overview* 4. Overview of pattern items *pattern-overview*
*E865* *E866* *E867* *E869*
Overview of multi items. */multi* *E61* *E62* Overview of multi items. */multi* *E61* *E62*
More explanation and examples below, follow the links. *E64* More explanation and examples below, follow the links. *E64* *E871*
multi ~ multi ~
'magic' 'nomagic' matches of the preceding atom ~ 'magic' 'nomagic' matches of the preceding atom ~
@@ -508,12 +530,14 @@ Character classes {not in Vi}: */character-classes*
|/\c| \c \c ignore case, do not use the 'ignorecase' option |/\c| \c \c ignore case, do not use the 'ignorecase' option
|/\C| \C \C match case, do not use the 'ignorecase' option |/\C| \C \C match case, do not use the 'ignorecase' option
|/\Z| \Z \Z ignore differences in Unicode "combining characters".
Useful when searching voweled Hebrew or Arabic text.
|/\m| \m \m 'magic' on for the following chars in the pattern |/\m| \m \m 'magic' on for the following chars in the pattern
|/\M| \M \M 'magic' off for the following chars in the pattern |/\M| \M \M 'magic' off for the following chars in the pattern
|/\v| \v \v the following chars in the pattern are "very magic" |/\v| \v \v the following chars in the pattern are "very magic"
|/\V| \V \V the following chars in the pattern are "very nomagic" |/\V| \V \V the following chars in the pattern are "very nomagic"
|/\Z| \Z \Z ignore differences in Unicode "combining characters". |/\%#=| \%#=1 \%#=1 select regexp engine |/zero-width|
Useful when searching voweled Hebrew or Arabic text.
|/\%d| \%d \%d match specified decimal character (eg \%d123) |/\%d| \%d \%d match specified decimal character (eg \%d123)
|/\%x| \%x \%x match specified hex character (eg \%x2a) |/\%x| \%x \%x match specified hex character (eg \%x2a)
@@ -581,7 +605,7 @@ overview.
\? Just like \=. Cannot be used when searching backwards with the "?" \? Just like \=. Cannot be used when searching backwards with the "?"
command. {not in Vi} command. {not in Vi}
*/\{* *E58* *E60* *E554* */\{* *E58* *E60* *E554* *E870*
\{n,m} Matches n to m of the preceding atom, as many as possible \{n,m} Matches n to m of the preceding atom, as many as possible
\{n} Matches n of the preceding atom \{n} Matches n of the preceding atom
\{n,} Matches at least n of the preceding atom, as many as possible \{n,} Matches at least n of the preceding atom, as many as possible
@@ -962,7 +986,8 @@ match ASCII characters, as indicated by the range.
~ matches the last given substitute string */~* */\~* ~ matches the last given substitute string */~* */\~*
\(\) A pattern enclosed by escaped parentheses. */\(* */\(\)* */\)* \(\) A pattern enclosed by escaped parentheses. */\(* */\(\)* */\)*
E.g., "\(^a\)" matches 'a' at the start of a line. *E51* *E54* *E55* E.g., "\(^a\)" matches 'a' at the start of a line.
*E51* *E54* *E55* *E872* *E873*
\1 Matches the same string that was matched by */\1* *E65* \1 Matches the same string that was matched by */\1* *E65*
the first sub-expression in \( and \). {not in Vi} the first sub-expression in \( and \). {not in Vi}

View File

@@ -736,9 +736,11 @@ $VIMRUNTIME starting.txt /*$VIMRUNTIME*
'quote motion.txt /*'quote* 'quote motion.txt /*'quote*
'quoteescape' options.txt /*'quoteescape'* 'quoteescape' options.txt /*'quoteescape'*
'rdt' options.txt /*'rdt'* 'rdt' options.txt /*'rdt'*
're' options.txt /*'re'*
'readonly' options.txt /*'readonly'* 'readonly' options.txt /*'readonly'*
'redraw' vi_diff.txt /*'redraw'* 'redraw' vi_diff.txt /*'redraw'*
'redrawtime' options.txt /*'redrawtime'* 'redrawtime' options.txt /*'redrawtime'*
'regexpengine'' options.txt /*'regexpengine''*
'relativenumber' options.txt /*'relativenumber'* 'relativenumber' options.txt /*'relativenumber'*
'remap' options.txt /*'remap'* 'remap' options.txt /*'remap'*
'report' options.txt /*'report'* 'report' options.txt /*'report'*
@@ -1389,6 +1391,7 @@ $VIMRUNTIME starting.txt /*$VIMRUNTIME*
/\ pattern.txt /*\/\\* /\ pattern.txt /*\/\\*
/\$ pattern.txt /*\/\\$* /\$ pattern.txt /*\/\\$*
/\%# pattern.txt /*\/\\%#* /\%# pattern.txt /*\/\\%#*
/\%#= pattern.txt /*\/\\%#=*
/\%$ pattern.txt /*\/\\%$* /\%$ pattern.txt /*\/\\%$*
/\%'m pattern.txt /*\/\\%'m* /\%'m pattern.txt /*\/\\%'m*
/\%( pattern.txt /*\/\\%(* /\%( pattern.txt /*\/\\%(*
@@ -4261,7 +4264,22 @@ E860 eval.txt /*E860*
E861 eval.txt /*E861* E861 eval.txt /*E861*
E862 eval.txt /*E862* E862 eval.txt /*E862*
E863 if_pyth.txt /*E863* E863 if_pyth.txt /*E863*
E864 pattern.txt /*E864*
E865 pattern.txt /*E865*
E866 pattern.txt /*E866*
E867 pattern.txt /*E867*
E868 pattern.txt /*E868*
E869 pattern.txt /*E869*
E87 windows.txt /*E87* E87 windows.txt /*E87*
E870 pattern.txt /*E870*
E871 pattern.txt /*E871*
E872 pattern.txt /*E872*
E873 pattern.txt /*E873*
E874 pattern.txt /*E874*
E875 pattern.txt /*E875*
E876 pattern.txt /*E876*
E877 pattern.txt /*E877*
E878 pattern.txt /*E878*
E88 windows.txt /*E88* E88 windows.txt /*E88*
E89 message.txt /*E89* E89 message.txt /*E89*
E90 message.txt /*E90* E90 message.txt /*E90*
@@ -8172,6 +8190,7 @@ try-nested eval.txt /*try-nested*
try-nesting eval.txt /*try-nesting* try-nesting eval.txt /*try-nesting*
tutor usr_01.txt /*tutor* tutor usr_01.txt /*tutor*
twice if_cscop.txt /*twice* twice if_cscop.txt /*twice*
two-engines pattern.txt /*two-engines*
type() eval.txt /*type()* type() eval.txt /*type()*
type-mistakes tips.txt /*type-mistakes* type-mistakes tips.txt /*type-mistakes*
typecorr-settings usr_41.txt /*typecorr-settings* typecorr-settings usr_41.txt /*typecorr-settings*

View File

@@ -672,6 +672,9 @@ endif
$(OUTDIR)/netbeans.o: netbeans.c $(INCL) $(NBDEBUG_DEP) $(OUTDIR)/netbeans.o: netbeans.c $(INCL) $(NBDEBUG_DEP)
$(CC) -c $(CFLAGS) netbeans.c -o $(OUTDIR)/netbeans.o $(CC) -c $(CFLAGS) netbeans.c -o $(OUTDIR)/netbeans.o
$(OUTDIR)/regexp.o: regexp.c regexp_nfa.c $(INCL)
$(CC) -c $(CFLAGS) regexp.c -o $(OUTDIR)/regexp.o
$(OUTDIR)/if_mzsch.o: if_mzsch.c $(INCL) if_mzsch.h $(MZ_EXTRA_DEP) $(OUTDIR)/if_mzsch.o: if_mzsch.c $(INCL) if_mzsch.h $(MZ_EXTRA_DEP)
$(CC) -c $(CFLAGS) if_mzsch.c -o $(OUTDIR)/if_mzsch.o $(CC) -c $(CFLAGS) if_mzsch.c -o $(OUTDIR)/if_mzsch.o

View File

@@ -765,6 +765,9 @@ if_perl.c: if_perl.xs typemap
$(OUTDIR)/netbeans.o: netbeans.c $(INCL) $(NBDEBUG_INCL) $(NBDEBUG_SRC) $(OUTDIR)/netbeans.o: netbeans.c $(INCL) $(NBDEBUG_INCL) $(NBDEBUG_SRC)
$(CC) -c $(CFLAGS) netbeans.c -o $(OUTDIR)/netbeans.o $(CC) -c $(CFLAGS) netbeans.c -o $(OUTDIR)/netbeans.o
$(OUTDIR)/regexp.o: regexp.c regexp_nfa.c $(INCL)
$(CC) -c $(CFLAGS) regexp.c -o $(OUTDIR)/regexp.o
$(OUTDIR)/if_mzsch.o: if_mzsch.c $(INCL) if_mzsch.h $(MZ_EXTRA_DEP) $(OUTDIR)/if_mzsch.o: if_mzsch.c $(INCL) if_mzsch.h $(MZ_EXTRA_DEP)
$(CC) -c $(CFLAGS) if_mzsch.c -o $(OUTDIR)/if_mzsch.o $(CC) -c $(CFLAGS) if_mzsch.c -o $(OUTDIR)/if_mzsch.o

View File

@@ -1166,7 +1166,7 @@ $(OUTDIR)/popupmnu.obj: $(OUTDIR) popupmnu.c $(INCL)
$(OUTDIR)/quickfix.obj: $(OUTDIR) quickfix.c $(INCL) $(OUTDIR)/quickfix.obj: $(OUTDIR) quickfix.c $(INCL)
$(OUTDIR)/regexp.obj: $(OUTDIR) regexp.c $(INCL) $(OUTDIR)/regexp.obj: $(OUTDIR) regexp.c regexp_nfa.c $(INCL)
$(OUTDIR)/screen.obj: $(OUTDIR) screen.c $(INCL) $(OUTDIR)/screen.obj: $(OUTDIR) screen.c $(INCL)

View File

@@ -454,7 +454,7 @@ CClink = $(CC)
# MULTIBYTE - To edit multi-byte characters. # MULTIBYTE - To edit multi-byte characters.
# Uncomment this when you want to edit a multibyte language. # Uncomment this when you want to edit a multibyte language.
# It's automatically enabled with big features or IME support. # It's automatically enabled with normal features, GTK or IME support.
# Note: Compile on a machine where setlocale() actually works, otherwise the # Note: Compile on a machine where setlocale() actually works, otherwise the
# configure tests may fail. # configure tests may fail.
#CONF_OPT_MULTIBYTE = --enable-multibyte #CONF_OPT_MULTIBYTE = --enable-multibyte
@@ -2664,7 +2664,7 @@ objects/popupmnu.o: popupmnu.c
objects/quickfix.o: quickfix.c objects/quickfix.o: quickfix.c
$(CCC) -o $@ quickfix.c $(CCC) -o $@ quickfix.c
objects/regexp.o: regexp.c objects/regexp.o: regexp.c regexp_nfa.c
$(CCC) -o $@ regexp.c $(CCC) -o $@ regexp.c
objects/screen.o: screen.c objects/screen.o: screen.c
@@ -2938,10 +2938,10 @@ objects/quickfix.o: quickfix.c vim.h auto/config.h feature.h os_unix.h \
auto/osdef.h ascii.h keymap.h term.h macros.h option.h structs.h \ auto/osdef.h ascii.h keymap.h term.h macros.h option.h structs.h \
regexp.h gui.h gui_beval.h proto/gui_beval.pro ex_cmds.h proto.h \ regexp.h gui.h gui_beval.h proto/gui_beval.pro ex_cmds.h proto.h \
globals.h farsi.h arabic.h globals.h farsi.h arabic.h
objects/regexp.o: regexp.c vim.h auto/config.h feature.h os_unix.h auto/osdef.h \ objects/regexp.o: regexp.c regexp_nfa.c vim.h auto/config.h feature.h os_unix.h \
ascii.h keymap.h term.h macros.h option.h structs.h regexp.h gui.h \ auto/osdef.h ascii.h keymap.h term.h macros.h option.h structs.h \
gui_beval.h proto/gui_beval.pro ex_cmds.h proto.h globals.h farsi.h \ regexp.h gui.h gui_beval.h proto/gui_beval.pro ex_cmds.h proto.h \
arabic.h globals.h farsi.h arabic.h
objects/screen.o: screen.c vim.h auto/config.h feature.h os_unix.h auto/osdef.h \ objects/screen.o: screen.c vim.h auto/config.h feature.h os_unix.h auto/osdef.h \
ascii.h keymap.h term.h macros.h option.h structs.h regexp.h gui.h \ ascii.h keymap.h term.h macros.h option.h structs.h regexp.h gui.h \
gui_beval.h proto/gui_beval.pro ex_cmds.h proto.h globals.h farsi.h \ gui_beval.h proto/gui_beval.pro ex_cmds.h proto.h globals.h farsi.h \

View File

@@ -2077,6 +2077,9 @@ static struct vimoption
(char_u *)NULL, PV_NONE, (char_u *)NULL, PV_NONE,
#endif #endif
{(char_u *)2000L, (char_u *)0L} SCRIPTID_INIT}, {(char_u *)2000L, (char_u *)0L} SCRIPTID_INIT},
{"regexpengine", "re", P_NUM|P_VI_DEF,
(char_u *)&p_re, PV_NONE,
{(char_u *)0L, (char_u *)0L} SCRIPTID_INIT},
{"relativenumber", "rnu", P_BOOL|P_VI_DEF|P_RWIN, {"relativenumber", "rnu", P_BOOL|P_VI_DEF|P_RWIN,
(char_u *)VAR_WIN, PV_RNU, (char_u *)VAR_WIN, PV_RNU,
{(char_u *)FALSE, (char_u *)0L} SCRIPTID_INIT}, {(char_u *)FALSE, (char_u *)0L} SCRIPTID_INIT},
@@ -8604,6 +8607,11 @@ set_num_option(opt_idx, varp, value, errbuf, errbuflen, opt_flags)
errmsg = e_positive; errmsg = e_positive;
p_hi = 0; p_hi = 0;
} }
if (p_re < 0 || p_re > 2)
{
errmsg = e_invarg;
p_re = 0;
}
if (p_report < 0) if (p_report < 0)
{ {
errmsg = e_positive; errmsg = e_positive;

View File

@@ -653,6 +653,7 @@ EXTERN char_u *p_cdpath; /* 'cdpath' */
EXTERN long p_rdt; /* 'redrawtime' */ EXTERN long p_rdt; /* 'redrawtime' */
#endif #endif
EXTERN int p_remap; /* 'remap' */ EXTERN int p_remap; /* 'remap' */
EXTERN long p_re; /* 'regexpengine' */
EXTERN long p_report; /* 'report' */ EXTERN long p_report; /* 'report' */
#if defined(FEAT_WINDOWS) && defined(FEAT_QUICKFIX) #if defined(FEAT_WINDOWS) && defined(FEAT_QUICKFIX)
EXTERN long p_pvh; /* 'previewheight' */ EXTERN long p_pvh; /* 'previewheight' */

View File

@@ -38,9 +38,20 @@
* Named character class support added by Walter Briscoe (1998 Jul 01) * Named character class support added by Walter Briscoe (1998 Jul 01)
*/ */
/* Uncomment the first if you do not want to see debugging logs or files
* related to regular expressions, even when compiling with -DDEBUG.
* Uncomment the second to get the regexp debugging. */
/* #undef DEBUG */
/* #define DEBUG */
#include "vim.h" #include "vim.h"
#undef DEBUG #ifdef DEBUG
/* show/save debugging data when BT engine is used */
# define BT_REGEXP_DUMP
/* save the debugging data to a file instead of displaying it */
# define BT_REGEXP_LOG
#endif
/* /*
* The "internal use only" fields in regexp.h are present to pass info from * The "internal use only" fields in regexp.h are present to pass info from
@@ -326,9 +337,10 @@ toggle_Magic(x)
/* Used for an error (down from) vim_regcomp(): give the error message, set /* Used for an error (down from) vim_regcomp(): give the error message, set
* rc_did_emsg and return NULL */ * rc_did_emsg and return NULL */
#define EMSG_RET_NULL(m) return (EMSG(m), rc_did_emsg = TRUE, (void *)NULL) #define EMSG_RET_NULL(m) return (EMSG(m), rc_did_emsg = TRUE, (void *)NULL)
#define EMSG_M_RET_NULL(m, c) return (EMSG2((m), (c) ? "" : "\\"), rc_did_emsg = TRUE, (void *)NULL)
#define EMSG_RET_FAIL(m) return (EMSG(m), rc_did_emsg = TRUE, FAIL) #define EMSG_RET_FAIL(m) return (EMSG(m), rc_did_emsg = TRUE, FAIL)
#define EMSG_ONE_RET_NULL EMSG_M_RET_NULL(_("E369: invalid item in %s%%[]"), reg_magic == MAGIC_ALL) #define EMSG2_RET_NULL(m, c) return (EMSG2((m), (c) ? "" : "\\"), rc_did_emsg = TRUE, (void *)NULL)
#define EMSG2_RET_FAIL(m, c) return (EMSG2((m), (c) ? "" : "\\"), rc_did_emsg = TRUE, FAIL)
#define EMSG_ONE_RET_NULL EMSG2_RET_NULL(_("E369: invalid item in %s%%[]"), reg_magic == MAGIC_ALL)
#define MAX_LIMIT (32767L << 16L) #define MAX_LIMIT (32767L << 16L)
@@ -336,11 +348,18 @@ static int re_multi_type __ARGS((int));
static int cstrncmp __ARGS((char_u *s1, char_u *s2, int *n)); static int cstrncmp __ARGS((char_u *s1, char_u *s2, int *n));
static char_u *cstrchr __ARGS((char_u *, int)); static char_u *cstrchr __ARGS((char_u *, int));
#ifdef BT_REGEXP_DUMP
static void regdump __ARGS((char_u *, bt_regprog_T *));
#endif
#ifdef DEBUG #ifdef DEBUG
static void regdump __ARGS((char_u *, regprog_T *));
static char_u *regprop __ARGS((char_u *)); static char_u *regprop __ARGS((char_u *));
#endif #endif
static char_u e_missingbracket[] = N_("E769: Missing ] after %s[");
static char_u e_unmatchedpp[] = N_("E53: Unmatched %s%%(");
static char_u e_unmatchedp[] = N_("E54: Unmatched %s(");
static char_u e_unmatchedpar[] = N_("E55: Unmatched %s)");
#define NOT_MULTI 0 #define NOT_MULTI 0
#define MULTI_ONE 1 #define MULTI_ONE 1
#define MULTI_MULT 2 #define MULTI_MULT 2
@@ -630,7 +649,13 @@ static char_u META_flags[] = {
}; };
#endif #endif
static int curchr; static int curchr; /* currently parsed character */
/* Previous character. Note: prevchr is sometimes -1 when we are not at the
* start, eg in /[ ^I]^ the pattern was never found even if it existed,
* because ^ was taken to be magic -- webb */
static int prevchr;
static int prevprevchr; /* previous-previous character */
static int nextchr; /* used for ungetchr() */
/* arguments for reg() */ /* arguments for reg() */
#define REG_NOPAREN 0 /* toplevel reg() */ #define REG_NOPAREN 0 /* toplevel reg() */
@@ -680,6 +705,9 @@ static int read_limits __ARGS((long *, long *));
static void regtail __ARGS((char_u *, char_u *)); static void regtail __ARGS((char_u *, char_u *));
static void regoptail __ARGS((char_u *, char_u *)); static void regoptail __ARGS((char_u *, char_u *));
static regengine_T bt_regengine;
static regengine_T nfa_regengine;
/* /*
* Return TRUE if compiled regular expression "prog" can match a line break. * Return TRUE if compiled regular expression "prog" can match a line break.
*/ */
@@ -762,6 +790,7 @@ char *EQUIVAL_CLASS_C[16] = {
/* /*
* Produce the bytes for equivalence class "c". * Produce the bytes for equivalence class "c".
* Currently only handles latin1, latin9 and utf-8. * Currently only handles latin1, latin9 and utf-8.
* NOTE: When changing this function, also change nfa_emit_equi_class()
*/ */
static void static void
reg_equi_class(c) reg_equi_class(c)
@@ -1239,8 +1268,11 @@ skip_regexp(startp, dirc, magic, newp)
return p; return p;
} }
static regprog_T *bt_regcomp __ARGS((char_u *expr, int re_flags));
/* /*
* vim_regcomp() - compile a regular expression into internal code * bt_regcomp() - compile a regular expression into internal code for the
* traditional back track matcher.
* Returns the program in allocated space. Returns NULL for an error. * Returns the program in allocated space. Returns NULL for an error.
* *
* We can't allocate space until we know how big the compiled form will be, * We can't allocate space until we know how big the compiled form will be,
@@ -1259,12 +1291,12 @@ skip_regexp(startp, dirc, magic, newp)
* of the structure of the compiled regexp. * of the structure of the compiled regexp.
* "re_flags": RE_MAGIC and/or RE_STRING. * "re_flags": RE_MAGIC and/or RE_STRING.
*/ */
regprog_T * static regprog_T *
vim_regcomp(expr, re_flags) bt_regcomp(expr, re_flags)
char_u *expr; char_u *expr;
int re_flags; int re_flags;
{ {
regprog_T *r; bt_regprog_T *r;
char_u *scan; char_u *scan;
char_u *longest; char_u *longest;
int len; int len;
@@ -1291,7 +1323,7 @@ vim_regcomp(expr, re_flags)
#endif #endif
/* Allocate space. */ /* Allocate space. */
r = (regprog_T *)lalloc(sizeof(regprog_T) + regsize, TRUE); r = (bt_regprog_T *)lalloc(sizeof(bt_regprog_T) + regsize, TRUE);
if (r == NULL) if (r == NULL)
return NULL; return NULL;
@@ -1386,10 +1418,11 @@ vim_regcomp(expr, re_flags)
r->regmlen = len; r->regmlen = len;
} }
} }
#ifdef DEBUG #ifdef BT_REGEXP_DUMP
regdump(expr, r); regdump(expr, r);
#endif #endif
return r; r->engine = &bt_regengine;
return (regprog_T *)r;
} }
/* /*
@@ -1436,7 +1469,7 @@ vim_regcomp_had_eol()
#endif #endif
/* /*
* reg - regular expression, i.e. main body or parenthesized thing * Parse regular expression, i.e. main body or parenthesized thing.
* *
* Caller must absorb opening parenthesis. * Caller must absorb opening parenthesis.
* *
@@ -1473,7 +1506,7 @@ reg(paren, flagp)
{ {
/* Make a MOPEN node. */ /* Make a MOPEN node. */
if (regnpar >= NSUBEXP) if (regnpar >= NSUBEXP)
EMSG_M_RET_NULL(_("E51: Too many %s("), reg_magic == MAGIC_ALL); EMSG2_RET_NULL(_("E51: Too many %s("), reg_magic == MAGIC_ALL);
parno = regnpar; parno = regnpar;
++regnpar; ++regnpar;
ret = regnode(MOPEN + parno); ret = regnode(MOPEN + parno);
@@ -1534,14 +1567,14 @@ reg(paren, flagp)
else else
#endif #endif
if (paren == REG_NPAREN) if (paren == REG_NPAREN)
EMSG_M_RET_NULL(_("E53: Unmatched %s%%("), reg_magic == MAGIC_ALL); EMSG2_RET_NULL(_(e_unmatchedpp), reg_magic == MAGIC_ALL);
else else
EMSG_M_RET_NULL(_("E54: Unmatched %s("), reg_magic == MAGIC_ALL); EMSG2_RET_NULL(_(e_unmatchedp), reg_magic == MAGIC_ALL);
} }
else if (paren == REG_NOPAREN && peekchr() != NUL) else if (paren == REG_NOPAREN && peekchr() != NUL)
{ {
if (curchr == Magic(')')) if (curchr == Magic(')'))
EMSG_M_RET_NULL(_("E55: Unmatched %s)"), reg_magic == MAGIC_ALL); EMSG2_RET_NULL(_(e_unmatchedpar), reg_magic == MAGIC_ALL);
else else
EMSG_RET_NULL(_(e_trailing)); /* "Can't happen". */ EMSG_RET_NULL(_(e_trailing)); /* "Can't happen". */
/* NOTREACHED */ /* NOTREACHED */
@@ -1556,7 +1589,7 @@ reg(paren, flagp)
} }
/* /*
* Handle one alternative of an | operator. * Parse one alternative of an | operator.
* Implements the & operator. * Implements the & operator.
*/ */
static char_u * static char_u *
@@ -1599,7 +1632,7 @@ regbranch(flagp)
} }
/* /*
* Handle one alternative of an | or & operator. * Parse one alternative of an | or & operator.
* Implements the concatenation operator. * Implements the concatenation operator.
*/ */
static char_u * static char_u *
@@ -1679,7 +1712,7 @@ regconcat(flagp)
} }
/* /*
* regpiece - something followed by possible [*+=] * Parse something followed by possible [*+=].
* *
* Note that the branching code sequences used for = and the general cases * Note that the branching code sequences used for = and the general cases
* of * and + are somewhat optimized: they use the same NOTHING node as * of * and + are somewhat optimized: they use the same NOTHING node as
@@ -1759,7 +1792,7 @@ regpiece(flagp)
} }
} }
if (lop == END) if (lop == END)
EMSG_M_RET_NULL(_("E59: invalid character after %s@"), EMSG2_RET_NULL(_("E59: invalid character after %s@"),
reg_magic == MAGIC_ALL); reg_magic == MAGIC_ALL);
/* Look behind must match with behind_pos. */ /* Look behind must match with behind_pos. */
if (lop == BEHIND || lop == NOBEHIND) if (lop == BEHIND || lop == NOBEHIND)
@@ -1793,7 +1826,7 @@ regpiece(flagp)
else else
{ {
if (num_complex_braces >= 10) if (num_complex_braces >= 10)
EMSG_M_RET_NULL(_("E60: Too many complex %s{...}s"), EMSG2_RET_NULL(_("E60: Too many complex %s{...}s"),
reg_magic == MAGIC_ALL); reg_magic == MAGIC_ALL);
reginsert(BRACE_COMPLEX + num_complex_braces, ret); reginsert(BRACE_COMPLEX + num_complex_braces, ret);
regoptail(ret, regnode(BACK)); regoptail(ret, regnode(BACK));
@@ -1820,8 +1853,20 @@ regpiece(flagp)
return ret; return ret;
} }
/* When making changes to classchars also change nfa_classcodes. */
static char_u *classchars = (char_u *)".iIkKfFpPsSdDxXoOwWhHaAlLuU";
static int classcodes[] = {
ANY, IDENT, SIDENT, KWORD, SKWORD,
FNAME, SFNAME, PRINT, SPRINT,
WHITE, NWHITE, DIGIT, NDIGIT,
HEX, NHEX, OCTAL, NOCTAL,
WORD, NWORD, HEAD, NHEAD,
ALPHA, NALPHA, LOWER, NLOWER,
UPPER, NUPPER
};
/* /*
* regatom - the lowest level * Parse the lowest level.
* *
* Optimization: gobbles an entire sequence of ordinary characters so that * Optimization: gobbles an entire sequence of ordinary characters so that
* it can turn them into a single node, which is smaller to store and * it can turn them into a single node, which is smaller to store and
@@ -1836,15 +1881,6 @@ regatom(flagp)
int cpo_lit; /* 'cpoptions' contains 'l' flag */ int cpo_lit; /* 'cpoptions' contains 'l' flag */
int cpo_bsl; /* 'cpoptions' contains '\' flag */ int cpo_bsl; /* 'cpoptions' contains '\' flag */
int c; int c;
static char_u *classchars = (char_u *)".iIkKfFpPsSdDxXoOwWhHaAlLuU";
static int classcodes[] = {ANY, IDENT, SIDENT, KWORD, SKWORD,
FNAME, SFNAME, PRINT, SPRINT,
WHITE, NWHITE, DIGIT, NDIGIT,
HEX, NHEX, OCTAL, NOCTAL,
WORD, NWORD, HEAD, NHEAD,
ALPHA, NALPHA, LOWER, NLOWER,
UPPER, NUPPER
};
char_u *p; char_u *p;
int extra = 0; int extra = 0;
@@ -2140,7 +2176,7 @@ regatom(flagp)
while ((c = getchr()) != ']') while ((c = getchr()) != ']')
{ {
if (c == NUL) if (c == NUL)
EMSG_M_RET_NULL(_("E69: Missing ] after %s%%["), EMSG2_RET_NULL(_("E69: Missing ] after %s%%["),
reg_magic == MAGIC_ALL); reg_magic == MAGIC_ALL);
br = regnode(BRANCH); br = regnode(BRANCH);
if (ret == NULL) if (ret == NULL)
@@ -2156,7 +2192,7 @@ regatom(flagp)
return NULL; return NULL;
} }
if (ret == NULL) if (ret == NULL)
EMSG_M_RET_NULL(_("E70: Empty %s%%[]"), EMSG2_RET_NULL(_("E70: Empty %s%%[]"),
reg_magic == MAGIC_ALL); reg_magic == MAGIC_ALL);
lastbranch = regnode(BRANCH); lastbranch = regnode(BRANCH);
br = regnode(NOTHING); br = regnode(NOTHING);
@@ -2200,7 +2236,7 @@ regatom(flagp)
} }
if (i < 0) if (i < 0)
EMSG_M_RET_NULL( EMSG2_RET_NULL(
_("E678: Invalid character after %s%%[dxouU]"), _("E678: Invalid character after %s%%[dxouU]"),
reg_magic == MAGIC_ALL); reg_magic == MAGIC_ALL);
#ifdef FEAT_MBYTE #ifdef FEAT_MBYTE
@@ -2272,7 +2308,7 @@ regatom(flagp)
} }
} }
EMSG_M_RET_NULL(_("E71: Invalid character after %s%%"), EMSG2_RET_NULL(_("E71: Invalid character after %s%%"),
reg_magic == MAGIC_ALL); reg_magic == MAGIC_ALL);
} }
} }
@@ -2567,8 +2603,7 @@ collection:
break; break;
} }
else if (reg_strict) else if (reg_strict)
EMSG_M_RET_NULL(_("E769: Missing ] after %s["), EMSG2_RET_NULL(_(e_missingbracket), reg_magic > MAGIC_OFF);
reg_magic > MAGIC_OFF);
} }
/* FALLTHROUGH */ /* FALLTHROUGH */
@@ -2659,7 +2694,7 @@ use_multibytecode(c)
#endif #endif
/* /*
* emit a node * Emit a node.
* Return pointer to generated code. * Return pointer to generated code.
*/ */
static char_u * static char_u *
@@ -2711,7 +2746,7 @@ regmbc(c)
#endif #endif
/* /*
* reginsert - insert an operator in front of already-emitted operand * Insert an operator in front of already-emitted operand
* *
* Means relocating the operand. * Means relocating the operand.
*/ */
@@ -2742,7 +2777,7 @@ reginsert(op, opnd)
} }
/* /*
* reginsert_limits - insert an operator in front of already-emitted operand. * Insert an operator in front of already-emitted operand.
* The operator has the given limit values as operands. Also set next pointer. * The operator has the given limit values as operands. Also set next pointer.
* *
* Means relocating the operand. * Means relocating the operand.
@@ -2794,7 +2829,7 @@ re_put_long(p, val)
} }
/* /*
* regtail - set the next-pointer at the end of a node chain * Set the next-pointer at the end of a node chain.
*/ */
static void static void
regtail(p, val) regtail(p, val)
@@ -2835,7 +2870,7 @@ regtail(p, val)
} }
/* /*
* regoptail - regtail on item after a BRANCH; nop if none * Like regtail, on item after a BRANCH; nop if none.
*/ */
static void static void
regoptail(p, val) regoptail(p, val)
@@ -2851,22 +2886,15 @@ regoptail(p, val)
} }
/* /*
* getchr() - get the next character from the pattern. We know about * Functions for getting characters from the regexp input.
* magic and such, so therefore we need a lexical analyzer.
*/ */
/* static int curchr; */
static int prevprevchr;
static int prevchr;
static int nextchr; /* used for ungetchr() */
/*
* Note: prevchr is sometimes -1 when we are not at the start,
* eg in /[ ^I]^ the pattern was never found even if it existed, because ^ was
* taken to be magic -- webb
*/
static int at_start; /* True when on the first character */ static int at_start; /* True when on the first character */
static int prev_at_start; /* True when on the second character */ static int prev_at_start; /* True when on the second character */
/*
* Start parsing at "str".
*/
static void static void
initchr(str) initchr(str)
char_u *str; char_u *str;
@@ -2878,6 +2906,9 @@ initchr(str)
prev_at_start = FALSE; prev_at_start = FALSE;
} }
/*
* Get the next character without advancing.
*/
static int static int
peekchr() peekchr()
{ {
@@ -3086,6 +3117,10 @@ skipchr_keepstart()
prevprevchr = prpr; prevprevchr = prpr;
} }
/*
* Get the next character from the pattern. We know about magic and such, so
* therefore we need a lexical analyzer.
*/
static int static int
getchr() getchr()
{ {
@@ -3340,8 +3375,8 @@ typedef struct regbehind_S
} regbehind_T; } regbehind_T;
static char_u *reg_getline __ARGS((linenr_T lnum)); static char_u *reg_getline __ARGS((linenr_T lnum));
static long vim_regexec_both __ARGS((char_u *line, colnr_T col, proftime_T *tm)); static long bt_regexec_both __ARGS((char_u *line, colnr_T col, proftime_T *tm));
static long regtry __ARGS((regprog_T *prog, colnr_T col)); static long regtry __ARGS((bt_regprog_T *prog, colnr_T col));
static void cleanup_subexpr __ARGS((void)); static void cleanup_subexpr __ARGS((void));
#ifdef FEAT_SYN_HL #ifdef FEAT_SYN_HL
static void cleanup_zsubexpr __ARGS((void)); static void cleanup_zsubexpr __ARGS((void));
@@ -3398,7 +3433,7 @@ static colnr_T ireg_maxcol;
/* /*
* Sometimes need to save a copy of a line. Since alloc()/free() is very * Sometimes need to save a copy of a line. Since alloc()/free() is very
* slow, we keep one allocated piece of memory and only re-allocate it when * slow, we keep one allocated piece of memory and only re-allocate it when
* it's too small. It's freed in vim_regexec_both() when finished. * it's too small. It's freed in bt_regexec_both() when finished.
*/ */
static char_u *reg_tofree = NULL; static char_u *reg_tofree = NULL;
static unsigned reg_tofreelen; static unsigned reg_tofreelen;
@@ -3556,6 +3591,8 @@ static lpos_T reg_endzpos[NSUBEXP]; /* idem, end pos */
/* TRUE if using multi-line regexp. */ /* TRUE if using multi-line regexp. */
#define REG_MULTI (reg_match == NULL) #define REG_MULTI (reg_match == NULL)
static int bt_regexec __ARGS((regmatch_T *rmp, char_u *line, colnr_T col));
/* /*
* Match a regexp against a string. * Match a regexp against a string.
* "rmp->regprog" is a compiled regexp as returned by vim_regcomp(). * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
@@ -3563,8 +3600,8 @@ static lpos_T reg_endzpos[NSUBEXP]; /* idem, end pos */
* *
* Return TRUE if there is a match, FALSE if not. * Return TRUE if there is a match, FALSE if not.
*/ */
int static int
vim_regexec(rmp, line, col) bt_regexec(rmp, line, col)
regmatch_T *rmp; regmatch_T *rmp;
char_u *line; /* string to match against */ char_u *line; /* string to match against */
colnr_T col; /* column to start looking for match */ colnr_T col; /* column to start looking for match */
@@ -3580,16 +3617,19 @@ vim_regexec(rmp, line, col)
ireg_icombine = FALSE; ireg_icombine = FALSE;
#endif #endif
ireg_maxcol = 0; ireg_maxcol = 0;
return (vim_regexec_both(line, col, NULL) != 0); return (bt_regexec_both(line, col, NULL) != 0);
} }
#if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \ #if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \
|| defined(FIND_REPLACE_DIALOG) || defined(PROTO) || defined(FIND_REPLACE_DIALOG) || defined(PROTO)
static int bt_regexec_nl __ARGS((regmatch_T *rmp, char_u *line, colnr_T col));
/* /*
* Like vim_regexec(), but consider a "\n" in "line" to be a line break. * Like vim_regexec(), but consider a "\n" in "line" to be a line break.
*/ */
int static int
vim_regexec_nl(rmp, line, col) bt_regexec_nl(rmp, line, col)
regmatch_T *rmp; regmatch_T *rmp;
char_u *line; /* string to match against */ char_u *line; /* string to match against */
colnr_T col; /* column to start looking for match */ colnr_T col; /* column to start looking for match */
@@ -3605,10 +3645,12 @@ vim_regexec_nl(rmp, line, col)
ireg_icombine = FALSE; ireg_icombine = FALSE;
#endif #endif
ireg_maxcol = 0; ireg_maxcol = 0;
return (vim_regexec_both(line, col, NULL) != 0); return (bt_regexec_both(line, col, NULL) != 0);
} }
#endif #endif
static long bt_regexec_multi __ARGS((regmmatch_T *rmp, win_T *win, buf_T *buf, linenr_T lnum, colnr_T col, proftime_T *tm));
/* /*
* Match a regexp against multiple lines. * Match a regexp against multiple lines.
* "rmp->regprog" is a compiled regexp as returned by vim_regcomp(). * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
@@ -3617,8 +3659,8 @@ vim_regexec_nl(rmp, line, col)
* Return zero if there is no match. Return number of lines contained in the * Return zero if there is no match. Return number of lines contained in the
* match otherwise. * match otherwise.
*/ */
long static long
vim_regexec_multi(rmp, win, buf, lnum, col, tm) bt_regexec_multi(rmp, win, buf, lnum, col, tm)
regmmatch_T *rmp; regmmatch_T *rmp;
win_T *win; /* window in which to search or NULL */ win_T *win; /* window in which to search or NULL */
buf_T *buf; /* buffer in which to search */ buf_T *buf; /* buffer in which to search */
@@ -3641,7 +3683,7 @@ vim_regexec_multi(rmp, win, buf, lnum, col, tm)
#endif #endif
ireg_maxcol = rmp->rmm_maxcol; ireg_maxcol = rmp->rmm_maxcol;
r = vim_regexec_both(NULL, col, tm); r = bt_regexec_both(NULL, col, tm);
return r; return r;
} }
@@ -3651,12 +3693,12 @@ vim_regexec_multi(rmp, win, buf, lnum, col, tm)
* lines ("line" is NULL, use reg_getline()). * lines ("line" is NULL, use reg_getline()).
*/ */
static long static long
vim_regexec_both(line, col, tm) bt_regexec_both(line, col, tm)
char_u *line; char_u *line;
colnr_T col; /* column to start looking for match */ colnr_T col; /* column to start looking for match */
proftime_T *tm UNUSED; /* timeout limit or NULL */ proftime_T *tm UNUSED; /* timeout limit or NULL */
{ {
regprog_T *prog; bt_regprog_T *prog;
char_u *s; char_u *s;
long retval = 0L; long retval = 0L;
@@ -3682,14 +3724,14 @@ vim_regexec_both(line, col, tm)
if (REG_MULTI) if (REG_MULTI)
{ {
prog = reg_mmatch->regprog; prog = (bt_regprog_T *)reg_mmatch->regprog;
line = reg_getline((linenr_T)0); line = reg_getline((linenr_T)0);
reg_startpos = reg_mmatch->startpos; reg_startpos = reg_mmatch->startpos;
reg_endpos = reg_mmatch->endpos; reg_endpos = reg_mmatch->endpos;
} }
else else
{ {
prog = reg_match->regprog; prog = (bt_regprog_T *)reg_match->regprog;
reg_startp = reg_match->startp; reg_startp = reg_match->startp;
reg_endp = reg_match->endp; reg_endp = reg_match->endp;
} }
@@ -3931,7 +3973,7 @@ unref_extmatch(em)
*/ */
static long static long
regtry(prog, col) regtry(prog, col)
regprog_T *prog; bt_regprog_T *prog;
colnr_T col; colnr_T col;
{ {
reginput = regline + col; reginput = regline + col;
@@ -4063,7 +4105,7 @@ regmatch(scan)
#define RA_NOMATCH 5 /* didn't match */ #define RA_NOMATCH 5 /* didn't match */
/* Make "regstack" and "backpos" empty. They are allocated and freed in /* Make "regstack" and "backpos" empty. They are allocated and freed in
* vim_regexec_both() to reduce malloc()/free() calls. */ * bt_regexec_both() to reduce malloc()/free() calls. */
regstack.ga_len = 0; regstack.ga_len = 0;
backpos.ga_len = 0; backpos.ga_len = 0;
@@ -4072,14 +4114,14 @@ regmatch(scan)
*/ */
for (;;) for (;;)
{ {
/* Some patterns my cause a long time to match, even though they are not /* Some patterns may cause a long time to match, even though they are not
* illegal. E.g., "\([a-z]\+\)\+Q". Allow breaking them with CTRL-C. */ * illegal. E.g., "\([a-z]\+\)\+Q". Allow breaking them with CTRL-C. */
fast_breakcheck(); fast_breakcheck();
#ifdef DEBUG #ifdef DEBUG
if (scan != NULL && regnarrate) if (scan != NULL && regnarrate)
{ {
mch_errmsg(regprop(scan)); mch_errmsg((char *)regprop(scan));
mch_errmsg("(\n"); mch_errmsg("(\n");
} }
#endif #endif
@@ -4100,7 +4142,7 @@ regmatch(scan)
#ifdef DEBUG #ifdef DEBUG
if (regnarrate) if (regnarrate)
{ {
mch_errmsg(regprop(scan)); mch_errmsg((char *)regprop(scan));
mch_errmsg("...\n"); mch_errmsg("...\n");
# ifdef FEAT_SYN_HL # ifdef FEAT_SYN_HL
if (re_extmatch_in != NULL) if (re_extmatch_in != NULL)
@@ -4112,7 +4154,7 @@ regmatch(scan)
{ {
mch_errmsg(" \""); mch_errmsg(" \"");
if (re_extmatch_in->matches[i] != NULL) if (re_extmatch_in->matches[i] != NULL)
mch_errmsg(re_extmatch_in->matches[i]); mch_errmsg((char *)re_extmatch_in->matches[i]);
mch_errmsg("\"\n"); mch_errmsg("\"\n");
} }
} }
@@ -6091,9 +6133,14 @@ regnext(p)
static int static int
prog_magic_wrong() prog_magic_wrong()
{ {
if (UCHARAT(REG_MULTI regprog_T *prog;
? reg_mmatch->regprog->program
: reg_match->regprog->program) != REGMAGIC) prog = REG_MULTI ? reg_mmatch->regprog : reg_match->regprog;
if (prog->engine == &nfa_regengine)
/* For NFA matcher we don't check the magic */
return FALSE;
if (UCHARAT(((bt_regprog_T *)prog)->program) != REGMAGIC)
{ {
EMSG(_(e_re_corr)); EMSG(_(e_re_corr));
return TRUE; return TRUE;
@@ -6318,7 +6365,7 @@ re_num_cmp(val, scan)
} }
#ifdef DEBUG #ifdef BT_REGEXP_DUMP
/* /*
* regdump - dump a regexp onto stdout in vaguely comprehensible form * regdump - dump a regexp onto stdout in vaguely comprehensible form
@@ -6326,14 +6373,22 @@ re_num_cmp(val, scan)
static void static void
regdump(pattern, r) regdump(pattern, r)
char_u *pattern; char_u *pattern;
regprog_T *r; bt_regprog_T *r;
{ {
char_u *s; char_u *s;
int op = EXACTLY; /* Arbitrary non-END op. */ int op = EXACTLY; /* Arbitrary non-END op. */
char_u *next; char_u *next;
char_u *end = NULL; char_u *end = NULL;
FILE *f;
printf("\r\nregcomp(%s):\r\n", pattern); #ifdef BT_REGEXP_LOG
f = fopen("bt_regexp_log.log", "a");
#else
f = stdout;
#endif
if (f == NULL)
return;
fprintf(f, "-------------------------------------\n\r\nregcomp(%s):\r\n", pattern);
s = r->program + 1; s = r->program + 1;
/* /*
@@ -6343,18 +6398,18 @@ regdump(pattern, r)
while (op != END || s <= end) while (op != END || s <= end)
{ {
op = OP(s); op = OP(s);
printf("%2d%s", (int)(s - r->program), regprop(s)); /* Where, what. */ fprintf(f, "%2d%s", (int)(s - r->program), regprop(s)); /* Where, what. */
next = regnext(s); next = regnext(s);
if (next == NULL) /* Next ptr. */ if (next == NULL) /* Next ptr. */
printf("(0)"); fprintf(f, "(0)");
else else
printf("(%d)", (int)((s - r->program) + (next - s))); fprintf(f, "(%d)", (int)((s - r->program) + (next - s)));
if (end < next) if (end < next)
end = next; end = next;
if (op == BRACE_LIMITS) if (op == BRACE_LIMITS)
{ {
/* Two short ints */ /* Two short ints */
printf(" minval %ld, maxval %ld", OPERAND_MIN(s), OPERAND_MAX(s)); fprintf(f, " minval %ld, maxval %ld", OPERAND_MIN(s), OPERAND_MAX(s));
s += 8; s += 8;
} }
s += 3; s += 3;
@@ -6363,25 +6418,33 @@ regdump(pattern, r)
|| op == EXACTLY) || op == EXACTLY)
{ {
/* Literal string, where present. */ /* Literal string, where present. */
fprintf(f, "\nxxxxxxxxx\n");
while (*s != NUL) while (*s != NUL)
printf("%c", *s++); fprintf(f, "%c", *s++);
fprintf(f, "\nxxxxxxxxx\n");
s++; s++;
} }
printf("\r\n"); fprintf(f, "\r\n");
} }
/* Header fields of interest. */ /* Header fields of interest. */
if (r->regstart != NUL) if (r->regstart != NUL)
printf("start `%s' 0x%x; ", r->regstart < 256 fprintf(f, "start `%s' 0x%x; ", r->regstart < 256
? (char *)transchar(r->regstart) ? (char *)transchar(r->regstart)
: "multibyte", r->regstart); : "multibyte", r->regstart);
if (r->reganch) if (r->reganch)
printf("anchored; "); fprintf(f, "anchored; ");
if (r->regmust != NULL) if (r->regmust != NULL)
printf("must have \"%s\"", r->regmust); fprintf(f, "must have \"%s\"", r->regmust);
printf("\r\n"); fprintf(f, "\r\n");
}
#ifdef BT_REGEXP_LOG
fclose(f);
#endif
}
#endif /* BT_REGEXP_DUMP */
#ifdef DEBUG
/* /*
* regprop - printable representation of opcode * regprop - printable representation of opcode
*/ */
@@ -6389,12 +6452,12 @@ regdump(pattern, r)
regprop(op) regprop(op)
char_u *op; char_u *op;
{ {
char_u *p; char *p;
static char_u buf[50]; static char buf[50];
(void) strcpy(buf, ":"); STRCPY(buf, ":");
switch (OP(op)) switch ((int) OP(op))
{ {
case BOL: case BOL:
p = "BOL"; p = "BOL";
@@ -6761,10 +6824,10 @@ regprop(op)
break; break;
} }
if (p != NULL) if (p != NULL)
(void) strcat(buf, p); STRCAT(buf, p);
return buf; return (char_u *)buf;
} }
#endif #endif /* DEBUG */
#ifdef FEAT_MBYTE #ifdef FEAT_MBYTE
static void mb_decompose __ARGS((int c, int *c1, int *c2, int *c3)); static void mb_decompose __ARGS((int c, int *c1, int *c2, int *c3));
@@ -7667,3 +7730,187 @@ reg_submatch(no)
return retval; return retval;
} }
#endif #endif
static regengine_T bt_regengine =
{
bt_regcomp,
bt_regexec,
#if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \
|| defined(FIND_REPLACE_DIALOG) || defined(PROTO)
bt_regexec_nl,
#endif
bt_regexec_multi
#ifdef DEBUG
,(char_u *)""
#endif
};
#include "regexp_nfa.c"
static regengine_T nfa_regengine =
{
nfa_regcomp,
nfa_regexec,
#if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \
|| defined(FIND_REPLACE_DIALOG) || defined(PROTO)
nfa_regexec_nl,
#endif
nfa_regexec_multi
#ifdef DEBUG
,(char_u *)""
#endif
};
/* Which regexp engine to use? Needed for vim_regcomp().
* Must match with 'regexpengine'. */
static int regexp_engine = 0;
#define AUTOMATIC_ENGINE 0
#define BACKTRACKING_ENGINE 1
#define NFA_ENGINE 2
#ifdef DEBUG
static char_u regname[][30] = {
"AUTOMATIC Regexp Engine",
"BACKTACKING Regexp Engine",
"NFA Regexp Engine"
};
#endif
/*
* Compile a regular expression into internal code.
* Returns the program in allocated memory. Returns NULL for an error.
*/
regprog_T *
vim_regcomp(expr_arg, re_flags)
char_u *expr_arg;
int re_flags;
{
regprog_T *prog = NULL;
char_u *expr = expr_arg;
syntax_error = FALSE;
regexp_engine = p_re;
/* Check for prefix "\%#=", that sets the regexp engine */
if (STRNCMP(expr, "\\%#=", 4) == 0)
{
int newengine = expr[4] - '0';
if (newengine == AUTOMATIC_ENGINE
|| newengine == BACKTRACKING_ENGINE
|| newengine == NFA_ENGINE)
{
regexp_engine = expr[4] - '0';
expr += 5;
#ifdef DEBUG
EMSG3("New regexp mode selected (%d): %s", regexp_engine,
regname[newengine]);
#endif
}
else
{
EMSG(_("E864: \\%#= can only be followed by 0, 1, or 2. The automatic engine will be used "));
regexp_engine = AUTOMATIC_ENGINE;
}
}
#ifdef DEBUG
bt_regengine.expr = expr;
nfa_regengine.expr = expr;
#endif
/*
* First try the NFA engine, unless backtracking was requested.
*/
if (regexp_engine != BACKTRACKING_ENGINE)
prog = nfa_regengine.regcomp(expr, re_flags);
else
prog = bt_regengine.regcomp(expr, re_flags);
if (prog == NULL) /* error compiling regexp with initial engine */
{
#ifdef DEBUG
if (regexp_engine != BACKTRACKING_ENGINE) /* debugging log for NFA */
{
FILE *f;
f = fopen("debug.log", "a");
if (f)
{
if (!syntax_error)
fprintf(f, "NFA engine could not handle \"%s\"\n", expr);
else
fprintf(f, "Syntax error in \"%s\"\n", expr);
fclose(f);
}
else
EMSG("(NFA) Could not open \"debug.log\" to write !!!");
/*
if (syntax_error)
EMSG("NFA Regexp: Syntax Error !");
*/
}
#endif
/*
* If NFA engine failed, then revert to the backtracking engine.
* Except when there was a syntax error, which was properly handled by
* NFA engine.
*/
if (regexp_engine == AUTOMATIC_ENGINE)
if (!syntax_error)
prog = bt_regengine.regcomp(expr, re_flags);
} /* endif prog==NULL */
return prog;
}
/*
* Match a regexp against a string.
* "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
* Uses curbuf for line count and 'iskeyword'.
*
* Return TRUE if there is a match, FALSE if not.
*/
int
vim_regexec(rmp, line, col)
regmatch_T *rmp;
char_u *line; /* string to match against */
colnr_T col; /* column to start looking for match */
{
return rmp->regprog->engine->regexec(rmp, line, col);
}
#if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \
|| defined(FIND_REPLACE_DIALOG) || defined(PROTO)
/*
* Like vim_regexec(), but consider a "\n" in "line" to be a line break.
*/
int
vim_regexec_nl(rmp, line, col)
regmatch_T *rmp;
char_u *line;
colnr_T col;
{
return rmp->regprog->engine->regexec_nl(rmp, line, col);
}
#endif
/*
* Match a regexp against multiple lines.
* "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
* Uses curbuf for line count and 'iskeyword'.
*
* Return zero if there is no match. Return number of lines contained in the
* match otherwise.
*/
long
vim_regexec_multi(rmp, win, buf, lnum, col, tm)
regmmatch_T *rmp;
win_T *win; /* window in which to search or NULL */
buf_T *buf; /* buffer in which to search */
linenr_T lnum; /* nr of line to start looking for match */
colnr_T col; /* column to start looking for match */
proftime_T *tm; /* timeout limit or NULL */
{
return rmp->regprog->engine->regexec_multi(rmp, win, buf, lnum, col, tm);
}

View File

@@ -21,21 +21,77 @@
*/ */
#define NSUBEXP 10 #define NSUBEXP 10
/*
* In the NFA engine: how many braces are allowed.
* TODO(RE): Use dynamic memory allocation instead of static, like here
*/
#define NFA_MAX_BRACES 20
typedef struct regengine regengine_T;
typedef struct thread thread_T;
/* /*
* Structure returned by vim_regcomp() to pass on to vim_regexec(). * Structure returned by vim_regcomp() to pass on to vim_regexec().
* This is the general structure. For the actual matcher, two specific
* structures are used. See code below.
*/
typedef struct regprog
{
regengine_T *engine;
unsigned regflags;
} regprog_T;
/*
* Structure used by the back track matcher.
* These fields are only to be used in regexp.c! * These fields are only to be used in regexp.c!
* See regep.c for an explanation. * See regexp.c for an explanation.
*/ */
typedef struct typedef struct
{ {
/* These two members implement regprog_T */
regengine_T *engine;
unsigned regflags;
int regstart; int regstart;
char_u reganch; char_u reganch;
char_u *regmust; char_u *regmust;
int regmlen; int regmlen;
unsigned regflags;
char_u reghasz; char_u reghasz;
char_u program[1]; /* actually longer.. */ char_u program[1]; /* actually longer.. */
} regprog_T; } bt_regprog_T;
/*
* Structure representing a NFA state.
* A NFA state may have no outgoing edge, when it is a NFA_MATCH state.
*/
typedef struct nfa_state nfa_state_T;
struct nfa_state
{
int c;
nfa_state_T *out;
nfa_state_T *out1;
int id;
int lastlist;
int visits;
thread_T *lastthread;
int negated;
};
/*
* Structure used by the NFA matcher.
*/
typedef struct
{
/* These two members implement regprog_T */
regengine_T *engine;
unsigned regflags;
regprog_T regprog;
nfa_state_T *start;
int nstate;
nfa_state_T state[0]; /* actually longer.. */
} nfa_regprog_T;
/* /*
* Structure to be used for single-line matching. * Structure to be used for single-line matching.
@@ -78,4 +134,18 @@ typedef struct
char_u *matches[NSUBEXP]; char_u *matches[NSUBEXP];
} reg_extmatch_T; } reg_extmatch_T;
struct regengine
{
regprog_T *(*regcomp)(char_u*, int);
int (*regexec)(regmatch_T*, char_u*, colnr_T);
#if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \
|| defined(FIND_REPLACE_DIALOG) || defined(PROTO)
int (*regexec_nl)(regmatch_T*, char_u*, colnr_T);
#endif
long (*regexec_multi)(regmmatch_T*, win_T*, buf_T*, linenr_T, colnr_T, proftime_T*);
#ifdef DEBUG
char_u *expr;
#endif
};
#endif /* _REGEXP_H */ #endif /* _REGEXP_H */

3819
src/regexp_nfa.c Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -63,15 +63,16 @@ typedef struct growarray
#define GA_EMPTY {0, 0, 0, 0, NULL} #define GA_EMPTY {0, 0, 0, 0, NULL}
/*
* This is here because regexp.h needs pos_T and below regprog_T is used.
*/
#include "regexp.h"
typedef struct window_S win_T; typedef struct window_S win_T;
typedef struct wininfo_S wininfo_T; typedef struct wininfo_S wininfo_T;
typedef struct frame_S frame_T; typedef struct frame_S frame_T;
typedef int scid_T; /* script ID */ typedef int scid_T; /* script ID */
typedef struct file_buffer buf_T; /* forward declaration */
/*
* This is here because regexp.h needs pos_T and below regprog_T is used.
*/
#include "regexp.h"
/* /*
* This is here because gui.h needs the pos_T and win_T, and win_T needs gui.h * This is here because gui.h needs the pos_T and win_T, and win_T needs gui.h
@@ -526,8 +527,6 @@ typedef struct
# endif # endif
} cmdmod_T; } cmdmod_T;
typedef struct file_buffer buf_T; /* forward declaration */
#define MF_SEED_LEN 8 #define MF_SEED_LEN 8
struct memfile struct memfile

View File

@@ -33,7 +33,7 @@ SCRIPTS = test1.out test3.out test4.out test5.out test6.out \
test76.out test77.out test78.out test79.out test80.out \ test76.out test77.out test78.out test79.out test80.out \
test81.out test82.out test83.out test84.out test88.out \ test81.out test82.out test83.out test84.out test88.out \
test89.out test90.out test91.out test92.out test93.out \ test89.out test90.out test91.out test92.out test93.out \
test94.out test94.out test95.out
.SUFFIXES: .in .out .SUFFIXES: .in .out
@@ -144,3 +144,4 @@ test91.out: test91.in
test92.out: test92.in test92.out: test92.in
test93.out: test93.in test93.out: test93.in
test94.out: test94.in test94.out: test94.in
test95.out: test95.in

View File

@@ -32,7 +32,7 @@ SCRIPTS = test3.out test4.out test5.out test6.out test7.out \
test79.out test80.out test81.out test82.out test83.out \ test79.out test80.out test81.out test82.out test83.out \
test84.out test85.out test86.out test87.out test88.out \ test84.out test85.out test86.out test87.out test88.out \
test89.out test90.out test91.out test92.out test93.out \ test89.out test90.out test91.out test92.out test93.out \
test94.out test94.out test95.out
SCRIPTS32 = test50.out test70.out SCRIPTS32 = test50.out test70.out

View File

@@ -52,7 +52,7 @@ SCRIPTS = test3.out test4.out test5.out test6.out test7.out \
test79.out test80.out test81.out test82.out test83.out \ test79.out test80.out test81.out test82.out test83.out \
test84.out test85.out test86.out test87.out test88.out \ test84.out test85.out test86.out test87.out test88.out \
test89.out test90.out test91.out test92.out test93.out \ test89.out test90.out test91.out test92.out test93.out \
test94.out test94.out test95.out
SCRIPTS32 = test50.out test70.out SCRIPTS32 = test50.out test70.out

View File

@@ -33,7 +33,7 @@ SCRIPTS = test1.out test3.out test4.out test5.out test6.out \
test76.out test77.out test78.out test79.out test80.out \ test76.out test77.out test78.out test79.out test80.out \
test81.out test82.out test83.out test84.out test88.out \ test81.out test82.out test83.out test84.out test88.out \
test89.out test90.out test91.out test92.out test93.out \ test89.out test90.out test91.out test92.out test93.out \
test94.out test94.out test95.out
.SUFFIXES: .in .out .SUFFIXES: .in .out

View File

@@ -4,7 +4,7 @@
# Authors: Zoltan Arpadffy, <arpadffy@polarhome.com> # Authors: Zoltan Arpadffy, <arpadffy@polarhome.com>
# Sandor Kopanyi, <sandor.kopanyi@mailbox.hu> # Sandor Kopanyi, <sandor.kopanyi@mailbox.hu>
# #
# Last change: 2013 Apr 12 # Last change: 2013 May 18
# #
# This has been tested on VMS 6.2 to 8.3 on DEC Alpha, VAX and IA64. # This has been tested on VMS 6.2 to 8.3 on DEC Alpha, VAX and IA64.
# Edit the lines in the Configuration section below to select. # Edit the lines in the Configuration section below to select.
@@ -77,7 +77,8 @@ SCRIPT = test1.out test2.out test3.out test4.out test5.out \
test71.out test72.out test74.out test75.out test76.out \ test71.out test72.out test74.out test75.out test76.out \
test77.out test78.out test79.out test80.out test81.out \ test77.out test78.out test79.out test80.out test81.out \
test82.out test83.out test84.out test88.out test89.out \ test82.out test83.out test84.out test88.out test89.out \
test90.out test91.out test92.out test93.out test94.out test90.out test91.out test92.out test93.out test94.out \
test95.out
# Known problems: # Known problems:
# Test 30: a problem around mac format - unknown reason # Test 30: a problem around mac format - unknown reason

View File

@@ -29,7 +29,7 @@ SCRIPTS = test1.out test2.out test3.out test4.out test5.out test6.out \
test79.out test80.out test81.out test82.out test83.out \ test79.out test80.out test81.out test82.out test83.out \
test84.out test85.out test86.out test87.out test88.out \ test84.out test85.out test86.out test87.out test88.out \
test89.out test90.out test91.out test92.out test93.out \ test89.out test90.out test91.out test92.out test93.out \
test94.out test94.out test95.out
SCRIPTS_GUI = test16.out SCRIPTS_GUI = test16.out
@@ -85,13 +85,16 @@ test1.out: test1.in
fi" fi"
# Check if the test.out file matches test.ok. # Check if the test.out file matches test.ok.
@/bin/sh -c "if test -f test.out; then\ @/bin/sh -c "if test -f test.out; then \
if diff test.out $*.ok; \ if diff test.out $*.ok; \
then mv -f test.out $*.out; \ then mv -f test.out $*.out; \
else echo $* FAILED >>test.log; mv -f test.out $*.failed; \ else echo $* FAILED >>test.log; mv -f test.out $*.failed; \
fi \ fi \
else echo $* NO OUTPUT >>test.log; \ else echo $* NO OUTPUT >>test.log; \
fi" fi"
@/bin/sh -c "if test -f valgrind; then\
mv -f valgrind valgrind.$*; \
fi"
-rm -rf X* test.ok viminfo -rm -rf X* test.ok viminfo
test49.out: test49.vim test49.out: test49.vim

View File

@@ -1,4 +1,5 @@
Test for regexp patterns. Test for regexp patterns without multi-byte support.
See test95 for multi-byte tests.
A pattern that gives the expected result produces OK, so that we know it was A pattern that gives the expected result produces OK, so that we know it was
actually tried. actually tried.
@@ -14,6 +15,11 @@ STARTTEST
:" etc. :" etc.
:" When there is no match use only the first two items. :" When there is no match use only the first two items.
:let tl = [] :let tl = []
:""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
:"""" Previously written tests """"""""""""""""""""""""""""""""
:""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
:call add(tl, ['ab', 'aab', 'ab']) :call add(tl, ['ab', 'aab', 'ab'])
:call add(tl, ['b', 'abcdef', 'b']) :call add(tl, ['b', 'abcdef', 'b'])
:call add(tl, ['bc*', 'abccccdef', 'bcccc']) :call add(tl, ['bc*', 'abccccdef', 'bcccc'])
@@ -132,6 +138,164 @@ STARTTEST
:" :"
:call add(tl, ['\v(a*)+', 'aaaa', 'aaaa', '']) :call add(tl, ['\v(a*)+', 'aaaa', 'aaaa', ''])
:call add(tl, ['x', 'abcdef']) :call add(tl, ['x', 'abcdef'])
:""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
:""""" Simple tests """""""""""""""""""""""""""""""""""""""""""
:""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
:" Search single groups
:call add(tl, ['ab', 'aab', 'ab'])
:call add(tl, ['ab', 'baced'])
:call add(tl, ['ab', ' ab ', 'ab'])
:" Search multi-modifiers
:call add(tl, ['x*', 'xcd', 'x'])
:call add(tl, ['x*', 'xxxxxxxxxxxxxxxxsofijiojgf', 'xxxxxxxxxxxxxxxx'])
:call add(tl, ['x*', 'abcdoij', '']) " empty match is good
:call add(tl, ['x\+', 'abcdoin']) " no match here
:call add(tl, ['x\+', 'abcdeoijdfxxiuhfij', 'xx'])
:call add(tl, ['x\+', 'xxxxx', 'xxxxx'])
:call add(tl, ['x\+', 'abc x siufhiush xxxxxxxxx', 'x'])
:call add(tl, ['x\=', 'x sdfoij', 'x'])
:call add(tl, ['x\=', 'abc sfoij', '']) " empty match is good
:call add(tl, ['x\=', 'xxxxxxxxx c', 'x'])
:call add(tl, ['x\?', 'x sdfoij', 'x'])
:call add(tl, ['x\?', 'abc sfoij', '']) " empty match is good
:call add(tl, ['x\?', 'xxxxxxxxxx c', 'x'])
:call add(tl, ['a\{0,0}', 'abcdfdoij', ''])
:call add(tl, ['a\{0,1}', 'asiubid axxxaaa', 'a']) " same thing as 'a?'
:call add(tl, ['a\{1,0}', 'asiubid axxxaaa', 'a']) " same thing as 'a\{0,1}'
:call add(tl, ['a\{3,6}', 'aa siofuh'])
:call add(tl, ['a\{3,6}', 'aaaaa asfoij afaa', 'aaaaa'])
:call add(tl, ['a\{3,6}', 'aaaaaaaa', 'aaaaaa'])
:call add(tl, ['a\{0}', 'asoiuj', ''])
:call add(tl, ['a\{2}', 'aaaa', 'aa'])
:call add(tl, ['a\{2}', 'iuash fiusahfliusah fiushfilushfi uhsaifuh askfj nasfvius afg aaaa sfiuhuhiushf', 'aa'])
:call add(tl, ['a\{2}', 'abcdefghijklmnopqrestuvwxyz1234567890'])
:call add(tl, ['a\{0,}', 'oij sdigfusnf', '']) " same thing as 'a*'
:call add(tl, ['a\{0,}', 'aaaaa aa', 'aaaaa'])
:call add(tl, ['a\{2,}', 'sdfiougjdsafg'])
:call add(tl, ['a\{2,}', 'aaaaasfoij ', 'aaaaa'])
:call add(tl, ['a\{,0}', 'oidfguih iuhi hiu aaaa', ''])
:call add(tl, ['a\{,5}', 'abcd', 'a'])
:call add(tl, ['a\{,5}', 'aaaaaaaaaa', 'aaaaa'])
:call add(tl, ['a\{}', 'bbbcddiuhfcd', '']) " same thing as 'a*'
:call add(tl, ['a\{}', 'aaaaioudfh coisf jda', 'aaaa'])
:call add(tl, ['a\{-0,0}', 'abcdfdoij', ''])
:call add(tl, ['a\{-0,1}', 'asiubid axxxaaa', '']) " anti-greedy version of 'a?'
:call add(tl, ['a\{-3,6}', 'aa siofuh'])
:call add(tl, ['a\{-3,6}', 'aaaaa asfoij afaa', 'aaa'])
:call add(tl, ['a\{-3,6}', 'aaaaaaaa', 'aaa'])
:call add(tl, ['a\{-0}', 'asoiuj', ''])
:call add(tl, ['a\{-2}', 'aaaa', 'aa'])
:call add(tl, ['a\{-2}', 'abcdefghijklmnopqrestuvwxyz1234567890'])
:call add(tl, ['a\{-0,}', 'oij sdigfusnf', ''])
:call add(tl, ['a\{-0,}', 'aaaaa aa', ''])
:call add(tl, ['a\{-2,}', 'sdfiougjdsafg'])
:call add(tl, ['a\{-2,}', 'aaaaasfoij ', 'aa'])
:call add(tl, ['a\{-,0}', 'oidfguih iuhi hiu aaaa', ''])
:call add(tl, ['a\{-,5}', 'abcd', ''])
:call add(tl, ['a\{-,5}', 'aaaaaaaaaa', ''])
:call add(tl, ['a\{-}', 'bbbcddiuhfcd', '']) " anti-greedy version of 'a*'
:call add(tl, ['a\{-}', 'aaaaioudfh coisf jda', ''])
:" Test groups of characters and submatches
:call add(tl, ['\(abc\)*', 'abcabcabc', 'abcabcabc', 'abc'])
:call add(tl, ['\(ab\)\+', 'abababaaaaa', 'ababab', 'ab'])
:call add(tl, ['\(abaaaaa\)*cd', 'cd', 'cd', ''])
:call add(tl, ['\(test1\)\? \(test2\)\?', 'test1 test3', 'test1 ', 'test1', ''])
:call add(tl, ['\(test1\)\= \(test2\) \(test4443\)\=', ' test2 test4443 yupiiiiiiiiiii', ' test2 test4443', '', 'test2', 'test4443'])
:call add(tl, ['\(\(sub1\) hello \(sub 2\)\)', 'asterix sub1 hello sub 2 obelix', 'sub1 hello sub 2', 'sub1 hello sub 2', 'sub1', 'sub 2'])
:call add(tl, ['\(\(\(yyxxzz\)\)\)', 'abcdddsfiusfyyzzxxyyxxzz', 'yyxxzz', 'yyxxzz', 'yyxxzz', 'yyxxzz'])
:call add(tl, ['\v((ab)+|c+)+', 'abcccaba', 'abcccab', 'ab', 'ab'])
:call add(tl, ['\v((ab)|c*)+', 'abcccaba', 'abcccab', '', 'ab'])
:call add(tl, ['\v(a(c*)+b)+', 'acbababaaa', 'acbabab', 'ab', ''])
:call add(tl, ['\v(a|b*)+', 'aaaa', 'aaaa', ''])
:" Test greedy-ness and lazy-ness
:call add(tl, ['a\{-2,7}','aaaaaaaaaaaaa', 'aa'])
:call add(tl, ['a\{2,7}','aaaaaaaaaaaaaaaaaaaa', 'aaaaaaa'])
:call add(tl, ['\vx(.{-,8})yz(.*)','xayxayzxayzxayz','xayxayzxayzxayz','ayxa','xayzxayz'])
:call add(tl, ['\vx(.*)yz(.*)','xayxayzxayzxayz','xayxayzxayzxayz', 'ayxayzxayzxa',''])
:call add(tl, ['\v(a{1,2}){-2,3}','aaaaaaa','aaaa','aa'])
:call add(tl, ['\v(a{-1,3})+','aa','aa','a'])
:" Test Character classes
:call add(tl, ['\d\+e\d\d','test 10e23 fd','10e23'])
:" Test collections and character range []
:call add(tl, ['\v[a]', 'abcd', 'a'])
:call add(tl, ['a[bcd]', 'abcd', 'ab'])
:call add(tl, ['a[b-d]', 'acbd', 'ac'])
:call add(tl, ['[a-d][e-f][x-x]d', 'cexdxx', 'cexd'])
:call add(tl, ['\v[[:alpha:]]+', 'abcdefghijklmnopqrstuvwxyz6','abcdefghijklmnopqrstuvwxyz'])
:call add(tl, ['[[:alpha:]\+]', '6x8','x'])
:call add(tl, ['[^abc]\+','abcabcabc'])
:call add(tl, ['[^abc]','defghiasijvoinasoiunbvb','d'])
:call add(tl, ['[^abc]\+','ddddddda','ddddddd'])
:call add(tl, ['[^a-d]\+','aaaAAAZIHFNCddd','AAAZIHFNC'])
:call add(tl, ['[a-f]*','iiiiiiii',''])
:call add(tl, ['[a-f]*','abcdefgh','abcdef'])
:call add(tl, ['[^a-f]\+','abcdefgh','gh'])
:call add(tl, ['[a-c]\{-3,6}','abcabc','abc'])
:call add(tl, ['[^[:alpha:]]\+','abcccadfoij7787ysf287yrnccdu','7787'])
:call add(tl, ['[-a]', '-', '-'])
:call add(tl, ['[a-]', '-', '-'])
:call add(tl, ['[-./[:alnum:]_~]\+', 'log13.file', 'log13.file']) " filename regexp
:call add(tl, ['[\]\^\-\\]\+', '\^\\\-\---^', '\^\\\-\---^']) " special chars
:call add(tl, ['[[.a.]]\+', 'aa', 'aa']) " collation elem
:call add(tl, ['abc[0-9]*ddd', 'siuhabc ii']) " middle of regexp
:call add(tl, ['abc[0-9]*ddd', 'adf abc44482ddd oijs', 'abc44482ddd'])
:call add(tl, ['\_[0-9]\+', 'asfi9888u', '9888'])
:call add(tl, ['[0-9\n]\+', 'asfi9888u', '9888'])
:"""" Test recognition of some character classes
:call add(tl, ['[0-9]', '8', '8'])
:call add(tl, ['[^0-9]', '8'])
:call add(tl, ['[0-9a-fA-F]*', '0a7', '0a7'])
:call add(tl, ['[^0-9A-Fa-f]\+', '0a7'])
:call add(tl, ['[a-z_A-Z0-9]\+', 'aso_sfoij', 'aso_sfoij'])
:call add(tl, ['[a-z]', 'a', 'a'])
:call add(tl, ['[a-zA-Z]', 'a', 'a'])
:call add(tl, ['[A-Z]', 'a'])
:call add(tl, ['\C[^A-Z]\+', 'ABCOIJDEOIFNSD jsfoij sa', ' jsfoij sa'])
:"""" Tests for \z features
:call add(tl, ['xx \ze test', 'xx ']) " must match after \ze
:call add(tl, ['abc\zeend', 'oij abcend', 'abc'])
:call add(tl, ['abc\zsdd', 'ddabcddxyzt', 'dd'])
:call add(tl, ['aa \zsax', ' ax']) " must match before \zs
:call add(tl, ['abc \zsmatch\ze abc', 'abc abc abc match abc abc', 'match'])
:call add(tl, ['\v(a \zsif .*){2}', 'a if then a if last', 'if last', 'a if last'])
:"""" Tests for \@ features
:call add(tl, ['abc\@=', 'abc', 'ab'])
:call add(tl, ['abc\@=cd', 'abcd', 'abcd'])
:call add(tl, ['abc\@=', 'ababc', 'ab'])
:call add(tl, ['abcd\@=e', 'abcd']) " will never match, no matter the input text
:call add(tl, ['abcd\@=e', 'any text in here ... ']) " will never match
:call add(tl, ['\v(abc)@=..', 'xabcd', 'ab', 'abc'])
:call add(tl, ['\(.*John\)\@=.*Bob', 'here is John, and here is B']) " no match
:call add(tl, ['\(John.*\)\@=.*Bob', 'John is Bobs friend', 'John is Bob', 'John is Bobs friend'])
:call add(tl, ['.*John\&.*Bob', 'here is John, and here is B']) " no match
:call add(tl, ['.*John\&.*Bob', 'John is Bobs friend', 'John is Bob'])
:call add(tl, ['\v(test1)@=.*yep', 'this is a test1, yep it is', 'test1, yep', 'test1'])
:"""" Combining different tests and features
:call add(tl, ['[[:alpha:]]\{-2,6}', '787abcdiuhsasiuhb4', 'ab'])
:call add(tl, ['[^[=a=]]\+', 'ddaãâbcd', 'dd'])
:call add(tl, ['', 'abcd', ''])
:call add(tl, ['\v(())', 'any possible text', ''])
:call add(tl, ['\v%(ab(xyz)c)', ' abxyzc ', 'abxyzc', 'xyz'])
:call add(tl, ['\v(test|)empty', 'tesempty', 'empty', ''])
:call add(tl, ['\v(a|aa)(a|aa)', 'aaa', 'aa', 'a', 'a'])
:"""" Run the tests
:" :"
:for t in tl :for t in tl
: let l = matchlist(t[1], t[0]) : let l = matchlist(t[1], t[0])
@@ -143,7 +307,7 @@ STARTTEST
: elseif len(t) > 2 && l[0] != t[2] : elseif len(t) > 2 && l[0] != t[2]
: $put ='ERROR: pat: \"' . t[0] . '\", text: \"' . t[1] . '\", match: \"' . l[0] . '\", expected: \"' . t[2] . '\"' : $put ='ERROR: pat: \"' . t[0] . '\", text: \"' . t[1] . '\", match: \"' . l[0] . '\", expected: \"' . t[2] . '\"'
: else : else
: $put ='OK' : $put ='OK - ' . t[0]
: endif : endif
: if len(l) > 0 : if len(l) > 0
:" check all the nine submatches :" check all the nine submatches
@@ -161,7 +325,17 @@ STARTTEST
: endif : endif
:endfor :endfor
:unlet t tl e l :unlet t tl e l
:/^Results/,$wq! test.out
:" Check that \_[0-9] matching EOL does not break a following \>
:" This only works on a buffer line, not with expression evaluation
/^Find this
/\<\(\(25\_[0-5]\|2\_[0-4]\_[0-9]\|\_[01]\?\_[0-9]\_[0-9]\?\)\.\)\{3\}\(25\_[0-5]\|2\_[0-4]\_[0-9]\|\_[01]\?\_[0-9]\_[0-9]\?\)\>
y$Gop:"
:/\%#=1^Results/,$wq! test.out
ENDTEST ENDTEST
Find this:
localnet/192.168.0.1
Results of test64: Results of test64:

View File

@@ -1,102 +1,230 @@
Results of test64: Results of test64:
OK OK - ab
OK OK - b
OK OK - bc*
OK OK - bc\{-}
OK OK - bc\{-}\(d\)
OK OK - bc*
OK OK - c*
OK OK - bc*
OK OK - c*
OK OK - bc\+
OK OK - bc\+
OK OK - a\|ab
OK OK - c\?
OK OK - bc\?
OK OK - bc\?
OK OK - \va{1}
OK OK - \va{2}
OK OK - \va{2}
OK OK - \va{2}
OK OK - \va{2}
OK OK - \va{2}
OK OK - \va{2}
OK OK - \vb{1}
OK OK - \vba{2}
OK OK - \vba{3}
OK OK - \v(ab){1}
OK OK - \v(ab){1}
OK OK - \v(ab){1}
OK OK - \v(ab){0,2}
OK OK - \v(ab){0,2}
OK OK - \v(ab){1,2}
OK OK - \v(ab){1,2}
OK OK - \v(ab){2,4}
OK OK - \v(ab){2,4}
OK OK - \v(ab){2}
OK OK - \v(ab){2}
OK OK - \v(ab){2}
OK OK - \v(ab){2}
OK OK - \v((ab){2}){2}
OK OK - \v((ab){2}){2}
OK OK - \v(a{1}){1}
OK OK - \v(a{2}){1}
OK OK - \v(a{2}){1}
OK OK - \v(a{2}){1}
OK OK - \v(a{1}){2}
OK OK - \v(a{1}){2}
OK OK - \v(a{2})+
OK OK - \v(a{2})+
OK OK - \v(a{2}){1}
OK OK - \v(a{1}){2}
OK OK - \v(a{1}){1}
OK OK - \v(a{2}){2}
OK OK - \v(a{2}){2}
OK OK - \v(a+){2}
OK OK - \v(a{3}){2}
OK OK - \v(a{1,2}){2}
OK OK - \v(a{1,3}){2}
OK OK - \v(a{1,3}){2}
OK OK - \v(a{1,3}){3}
OK OK - \v(a{1,2}){2}
OK OK - \v(a+)+
OK OK - \v(a+)+
OK OK - \v(a+){1,2}
OK OK - \v(a+)(a+)
OK OK - \v(a{3})+
OK OK - \v(a|b|c)+
OK OK - \v(a|b|c){2}
OK OK - \v(abc){2}
OK OK - \v(abc){2}
OK OK - a*
OK OK - \v(a*)+
OK OK - \v((ab)+)+
OK OK - \v(((ab)+)+)+
OK OK - \v(((ab)+)+)+
OK OK - \v(a{0,2})+
OK OK - \v(a*)+
OK OK - \v((a*)+)+
OK OK - \v((ab)*)+
OK OK - \va{1,3}
OK OK - \va{2,3}
OK OK - \v((ab)+|c*)+
OK OK - \v(a{2})|(b{3})
OK OK - \va{2}|b{2}
OK OK - \v(a)+|(c)+
OK OK - \vab{2,3}c
OK OK - \vab{2,3}c
OK OK - \vab{2,3}cd{2,3}e
OK OK - \va(bc){2}d
OK OK - \va*a{2}
OK OK - \va*a{2}
OK OK - \va*a{2}
OK OK - \va*a{2}
OK OK - \va*b*|a*c*
OK OK - \va{1}b{1}|a{1}b{1}
OK OK - \v(a)
OK OK - \v(a)(b)
OK OK - \v(ab)(b)(c)
OK OK - \v((a)(b))
OK OK - \v(a)|(b)
OK OK - \v(a*)+
OK OK - x
OK - ab
OK - ab
OK - ab
OK - x*
OK - x*
OK - x*
OK - x\+
OK - x\+
OK - x\+
OK - x\+
OK - x\=
OK - x\=
OK - x\=
OK - x\?
OK - x\?
OK - x\?
OK - a\{0,0}
OK - a\{0,1}
OK - a\{1,0}
OK - a\{3,6}
OK - a\{3,6}
OK - a\{3,6}
OK - a\{0}
OK - a\{2}
OK - a\{2}
OK - a\{2}
OK - a\{0,}
OK - a\{0,}
OK - a\{2,}
OK - a\{2,}
OK - a\{,0}
OK - a\{,5}
OK - a\{,5}
OK - a\{}
OK - a\{}
OK - a\{-0,0}
OK - a\{-0,1}
OK - a\{-3,6}
OK - a\{-3,6}
OK - a\{-3,6}
OK - a\{-0}
OK - a\{-2}
OK - a\{-2}
OK - a\{-0,}
OK - a\{-0,}
OK - a\{-2,}
OK - a\{-2,}
OK - a\{-,0}
OK - a\{-,5}
OK - a\{-,5}
OK - a\{-}
OK - a\{-}
OK - \(abc\)*
OK - \(ab\)\+
OK - \(abaaaaa\)*cd
OK - \(test1\)\? \(test2\)\?
OK - \(test1\)\= \(test2\) \(test4443\)\=
OK - \(\(sub1\) hello \(sub 2\)\)
OK - \(\(\(yyxxzz\)\)\)
OK - \v((ab)+|c+)+
OK - \v((ab)|c*)+
OK - \v(a(c*)+b)+
OK - \v(a|b*)+
OK - a\{-2,7}
OK - a\{2,7}
OK - \vx(.{-,8})yz(.*)
OK - \vx(.*)yz(.*)
OK - \v(a{1,2}){-2,3}
OK - \v(a{-1,3})+
OK - \d\+e\d\d
OK - \v[a]
OK - a[bcd]
OK - a[b-d]
OK - [a-d][e-f][x-x]d
OK - \v[[:alpha:]]+
OK - [[:alpha:]\+]
OK - [^abc]\+
OK - [^abc]
OK - [^abc]\+
OK - [^a-d]\+
OK - [a-f]*
OK - [a-f]*
OK - [^a-f]\+
OK - [a-c]\{-3,6}
OK - [^[:alpha:]]\+
OK - [-a]
OK - [a-]
OK - [-./[:alnum:]_~]\+
OK - [\]\^\-\\]\+
OK - [[.a.]]\+
OK - abc[0-9]*ddd
OK - abc[0-9]*ddd
OK - \_[0-9]\+
OK - [0-9\n]\+
OK - [0-9]
OK - [^0-9]
OK - [0-9a-fA-F]*
OK - [^0-9A-Fa-f]\+
OK - [a-z_A-Z0-9]\+
OK - [a-z]
OK - [a-zA-Z]
OK - [A-Z]
OK - \C[^A-Z]\+
OK - xx \ze test
OK - abc\zeend
OK - abc\zsdd
OK - aa \zsax
OK - abc \zsmatch\ze abc
OK - \v(a \zsif .*){2}
OK - abc\@=
OK - abc\@=cd
OK - abc\@=
OK - abcd\@=e
OK - abcd\@=e
OK - \v(abc)@=..
OK - \(.*John\)\@=.*Bob
OK - \(John.*\)\@=.*Bob
OK - .*John\&.*Bob
OK - .*John\&.*Bob
OK - \v(test1)@=.*yep
OK - [[:alpha:]]\{-2,6}
OK - [^[=a=]]\+
OK -
OK - \v(())
OK - \v%(ab(xyz)c)
OK - \v(test|)empty
OK - \v(a|aa)(a|aa)
192.168.0.1

63
src/testdir/test95.in Normal file
View File

@@ -0,0 +1,63 @@
Test for regexp patterns with multi-byte support.
See test64 for the non-multi-byte tests.
A pattern that gives the expected result produces OK, so that we know it was
actually tried.
STARTTEST
:so small.vim
:so mbyte.vim
:" tl is a List of Lists with:
:" regexp pattern
:" text to test the pattern on
:" expected match (optional)
:" expected submatch 1 (optional)
:" expected submatch 2 (optional)
:" etc.
:" When there is no match use only the first two items.
:let tl = []
:"""" Multi-byte character tests. These will fail unless vim is compiled
:"""" with Multibyte (FEAT_MBYTE) or BIG/HUGE features.
:call add(tl, ['[[:alpha:][=a=]]\+', '879 aiaãâaiuvna ', 'aiaãâaiuvna'])
:call add(tl, ['[[=a=]]\+', 'ddaãâbcd', 'aãâ']) " equivalence classes
:call add(tl, ['[^ม ]\+', 'มม oijasoifjos ifjoisj f osij j มมมมม abcd', 'oijasoifjos'])
:call add(tl, [' [^ ]\+', 'start มabcdม ', ' มabcdม'])
:call add(tl, ['[ม[:alpha:][=a=]]\+', '879 aiaãมâมaiuvna ', 'aiaãมâมaiuvna'])
:"""" Run the tests
:"
:for t in tl
: let l = matchlist(t[1], t[0])
:" check the match itself
: if len(l) == 0 && len(t) > 2
: $put ='ERROR: pat: \"' . t[0] . '\", text: \"' . t[1] . '\", did not match, expected: \"' . t[2] . '\"'
: elseif len(l) > 0 && len(t) == 2
: $put ='ERROR: pat: \"' . t[0] . '\", text: \"' . t[1] . '\", match: \"' . l[0] . '\", expected no match'
: elseif len(t) > 2 && l[0] != t[2]
: $put ='ERROR: pat: \"' . t[0] . '\", text: \"' . t[1] . '\", match: \"' . l[0] . '\", expected: \"' . t[2] . '\"'
: else
: $put ='OK - ' . t[0]
: endif
: if len(l) > 0
:" check all the nine submatches
: for i in range(1, 9)
: if len(t) <= i + 2
: let e = ''
: else
: let e = t[i + 2]
: endif
: if l[i] != e
: $put ='ERROR: pat: \"' . t[0] . '\", text: \"' . t[1] . '\", submatch ' . i . ': \"' . l[i] . '\", expected: \"' . e . '\"'
: endif
: endfor
: unlet i
: endif
:endfor
:unlet t tl e l
:/\%#=1^Results/,$wq! test.out
ENDTEST
Results of test95:

6
src/testdir/test95.ok Normal file
View File

@@ -0,0 +1,6 @@
Results of test95:
OK - [[:alpha:][=a=]]\+
OK - [[=a=]]\+
OK - [^ม ]\+
OK - [^ ]\+
OK - [ม[:alpha:][=a=]]\+

View File

@@ -728,6 +728,8 @@ static char *(features[]) =
static int included_patches[] = static int included_patches[] =
{ /* Add new patch number below this line */ { /* Add new patch number below this line */
/**/
970,
/**/ /**/
969, 969,
/**/ /**/