mirror of
https://github.com/vim/vim.git
synced 2025-07-04 23:07:33 -04:00
updated for version 7.4.293
Problem: It is not possible to ignore composing characters at a specific point in a pattern. Solution: Add the %C item.
This commit is contained in:
parent
6082bea6ac
commit
8df5acfda9
@ -545,6 +545,7 @@ Character classes {not in Vi}: */character-classes*
|
|||||||
|/\%u| \%u \%u match specified multibyte character (eg \%u20ac)
|
|/\%u| \%u \%u match specified multibyte character (eg \%u20ac)
|
||||||
|/\%U| \%U \%U match specified large multibyte character (eg
|
|/\%U| \%U \%U match specified large multibyte character (eg
|
||||||
\%U12345678)
|
\%U12345678)
|
||||||
|
|/\%C| \%C \%C match any composing characters
|
||||||
|
|
||||||
Example matches ~
|
Example matches ~
|
||||||
\<\I\i* or
|
\<\I\i* or
|
||||||
@ -1207,12 +1208,18 @@ will probably never match.
|
|||||||
8. Composing characters *patterns-composing*
|
8. Composing characters *patterns-composing*
|
||||||
|
|
||||||
*/\Z*
|
*/\Z*
|
||||||
When "\Z" appears anywhere in the pattern, composing characters are ignored.
|
When "\Z" appears anywhere in the pattern, all composing characters are
|
||||||
Thus only the base characters need to match, the composing characters may be
|
ignored. Thus only the base characters need to match, the composing
|
||||||
different and the number of composing characters may differ. Only relevant
|
characters may be different and the number of composing characters may differ.
|
||||||
when 'encoding' is "utf-8".
|
Only relevant when 'encoding' is "utf-8".
|
||||||
Exception: If the pattern starts with one or more composing characters, these
|
Exception: If the pattern starts with one or more composing characters, these
|
||||||
must match.
|
must match.
|
||||||
|
*/\%C*
|
||||||
|
Use "\%C" to skip any composing characters. For example, the pattern "a" does
|
||||||
|
not match in "càt" (where the a has the composing character 0x0300), but
|
||||||
|
"a\%C" does. Note that this does not match "cát" (where the á is character
|
||||||
|
0xe1, it does not have a compositing character). It does match "cat" (where
|
||||||
|
the a is just an a).
|
||||||
|
|
||||||
When a composing character appears at the start of the pattern of after an
|
When a composing character appears at the start of the pattern of after an
|
||||||
item that doesn't include the composing character, a match is found at any
|
item that doesn't include the composing character, a match is found at any
|
||||||
|
21
src/regexp.c
21
src/regexp.c
@ -244,6 +244,7 @@
|
|||||||
|
|
||||||
#define RE_MARK 207 /* mark cmp Match mark position */
|
#define RE_MARK 207 /* mark cmp Match mark position */
|
||||||
#define RE_VISUAL 208 /* Match Visual area */
|
#define RE_VISUAL 208 /* Match Visual area */
|
||||||
|
#define RE_COMPOSING 209 /* any composing characters */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Magic characters have a special meaning, they don't match literally.
|
* Magic characters have a special meaning, they don't match literally.
|
||||||
@ -2208,6 +2209,10 @@ regatom(flagp)
|
|||||||
ret = regnode(RE_VISUAL);
|
ret = regnode(RE_VISUAL);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case 'C':
|
||||||
|
ret = regnode(RE_COMPOSING);
|
||||||
|
break;
|
||||||
|
|
||||||
/* \%[abc]: Emit as a list of branches, all ending at the last
|
/* \%[abc]: Emit as a list of branches, all ending at the last
|
||||||
* branch which matches nothing. */
|
* branch which matches nothing. */
|
||||||
case '[':
|
case '[':
|
||||||
@ -4710,11 +4715,13 @@ regmatch(scan)
|
|||||||
status = RA_NOMATCH;
|
status = RA_NOMATCH;
|
||||||
}
|
}
|
||||||
#ifdef FEAT_MBYTE
|
#ifdef FEAT_MBYTE
|
||||||
/* Check for following composing character. */
|
/* Check for following composing character, unless %C
|
||||||
|
* follows (skips over all composing chars). */
|
||||||
if (status != RA_NOMATCH
|
if (status != RA_NOMATCH
|
||||||
&& enc_utf8
|
&& enc_utf8
|
||||||
&& UTF_COMPOSINGLIKE(reginput, reginput + len)
|
&& UTF_COMPOSINGLIKE(reginput, reginput + len)
|
||||||
&& !ireg_icombine)
|
&& !ireg_icombine
|
||||||
|
&& OP(next) != RE_COMPOSING)
|
||||||
{
|
{
|
||||||
/* raaron: This code makes a composing character get
|
/* raaron: This code makes a composing character get
|
||||||
* ignored, which is the correct behavior (sometimes)
|
* ignored, which is the correct behavior (sometimes)
|
||||||
@ -4791,6 +4798,16 @@ regmatch(scan)
|
|||||||
status = RA_NOMATCH;
|
status = RA_NOMATCH;
|
||||||
break;
|
break;
|
||||||
#endif
|
#endif
|
||||||
|
case RE_COMPOSING:
|
||||||
|
#ifdef FEAT_MBYTE
|
||||||
|
if (enc_utf8)
|
||||||
|
{
|
||||||
|
/* Skip composing characters. */
|
||||||
|
while (utf_iscomposing(utf_ptr2char(reginput)))
|
||||||
|
mb_cptr_adv(reginput);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
break;
|
||||||
|
|
||||||
case NOTHING:
|
case NOTHING:
|
||||||
break;
|
break;
|
||||||
|
@ -81,6 +81,7 @@ enum
|
|||||||
NFA_COMPOSING, /* Next nodes in NFA are part of the
|
NFA_COMPOSING, /* Next nodes in NFA are part of the
|
||||||
composing multibyte char */
|
composing multibyte char */
|
||||||
NFA_END_COMPOSING, /* End of a composing char in the NFA */
|
NFA_END_COMPOSING, /* End of a composing char in the NFA */
|
||||||
|
NFA_ANY_COMPOSING, /* \%C: Any composing characters. */
|
||||||
NFA_OPT_CHARS, /* \%[abc] */
|
NFA_OPT_CHARS, /* \%[abc] */
|
||||||
|
|
||||||
/* The following are used only in the postfix form, not in the NFA */
|
/* The following are used only in the postfix form, not in the NFA */
|
||||||
@ -1418,6 +1419,10 @@ nfa_regatom()
|
|||||||
EMIT(NFA_VISUAL);
|
EMIT(NFA_VISUAL);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case 'C':
|
||||||
|
EMIT(NFA_ANY_COMPOSING);
|
||||||
|
break;
|
||||||
|
|
||||||
case '[':
|
case '[':
|
||||||
{
|
{
|
||||||
int n;
|
int n;
|
||||||
@ -2429,6 +2434,7 @@ nfa_set_code(c)
|
|||||||
case NFA_MARK_LT: STRCPY(code, "NFA_MARK_LT "); break;
|
case NFA_MARK_LT: STRCPY(code, "NFA_MARK_LT "); break;
|
||||||
case NFA_CURSOR: STRCPY(code, "NFA_CURSOR "); break;
|
case NFA_CURSOR: STRCPY(code, "NFA_CURSOR "); break;
|
||||||
case NFA_VISUAL: STRCPY(code, "NFA_VISUAL "); break;
|
case NFA_VISUAL: STRCPY(code, "NFA_VISUAL "); break;
|
||||||
|
case NFA_ANY_COMPOSING: STRCPY(code, "NFA_ANY_COMPOSING "); break;
|
||||||
|
|
||||||
case NFA_STAR: STRCPY(code, "NFA_STAR "); break;
|
case NFA_STAR: STRCPY(code, "NFA_STAR "); break;
|
||||||
case NFA_STAR_NONGREEDY: STRCPY(code, "NFA_STAR_NONGREEDY "); break;
|
case NFA_STAR_NONGREEDY: STRCPY(code, "NFA_STAR_NONGREEDY "); break;
|
||||||
@ -2967,6 +2973,7 @@ nfa_max_width(startstate, depth)
|
|||||||
case NFA_NLOWER_IC:
|
case NFA_NLOWER_IC:
|
||||||
case NFA_UPPER_IC:
|
case NFA_UPPER_IC:
|
||||||
case NFA_NUPPER_IC:
|
case NFA_NUPPER_IC:
|
||||||
|
case NFA_ANY_COMPOSING:
|
||||||
/* possibly non-ascii */
|
/* possibly non-ascii */
|
||||||
#ifdef FEAT_MBYTE
|
#ifdef FEAT_MBYTE
|
||||||
if (has_mbyte)
|
if (has_mbyte)
|
||||||
@ -4152,6 +4159,7 @@ match_follows(startstate, depth)
|
|||||||
continue;
|
continue;
|
||||||
|
|
||||||
case NFA_ANY:
|
case NFA_ANY:
|
||||||
|
case NFA_ANY_COMPOSING:
|
||||||
case NFA_IDENT:
|
case NFA_IDENT:
|
||||||
case NFA_SIDENT:
|
case NFA_SIDENT:
|
||||||
case NFA_KWORD:
|
case NFA_KWORD:
|
||||||
@ -4395,7 +4403,7 @@ skip_add:
|
|||||||
switch (state->c)
|
switch (state->c)
|
||||||
{
|
{
|
||||||
case NFA_MATCH:
|
case NFA_MATCH:
|
||||||
nfa_match = TRUE;
|
// nfa_match = TRUE;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case NFA_SPLIT:
|
case NFA_SPLIT:
|
||||||
@ -5151,6 +5159,7 @@ failure_chance(state, depth)
|
|||||||
|
|
||||||
case NFA_MATCH:
|
case NFA_MATCH:
|
||||||
case NFA_MCLOSE:
|
case NFA_MCLOSE:
|
||||||
|
case NFA_ANY_COMPOSING:
|
||||||
/* empty match works always */
|
/* empty match works always */
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
@ -5573,6 +5582,12 @@ nfa_regmatch(prog, start, submatch, m)
|
|||||||
{
|
{
|
||||||
case NFA_MATCH:
|
case NFA_MATCH:
|
||||||
{
|
{
|
||||||
|
#ifdef FEAT_MBYTE
|
||||||
|
/* If the match ends before a composing characters and
|
||||||
|
* ireg_icombine is not set, that is not really a match. */
|
||||||
|
if (enc_utf8 && !ireg_icombine && utf_iscomposing(curc))
|
||||||
|
break;
|
||||||
|
#endif
|
||||||
nfa_match = TRUE;
|
nfa_match = TRUE;
|
||||||
copy_sub(&submatch->norm, &t->subs.norm);
|
copy_sub(&submatch->norm, &t->subs.norm);
|
||||||
#ifdef FEAT_SYN_HL
|
#ifdef FEAT_SYN_HL
|
||||||
@ -6120,6 +6135,23 @@ nfa_regmatch(prog, start, submatch, m)
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case NFA_ANY_COMPOSING:
|
||||||
|
/* On a composing character skip over it. Otherwise do
|
||||||
|
* nothing. Always matches. */
|
||||||
|
#ifdef FEAT_MBYTE
|
||||||
|
if (enc_utf8 && utf_iscomposing(curc))
|
||||||
|
{
|
||||||
|
add_off = clen;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
add_here = TRUE;
|
||||||
|
add_off = 0;
|
||||||
|
}
|
||||||
|
add_state = t->state->out;
|
||||||
|
break;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Character classes like \a for alpha, \d for digit etc.
|
* Character classes like \a for alpha, \d for digit etc.
|
||||||
*/
|
*/
|
||||||
@ -6484,12 +6516,10 @@ nfa_regmatch(prog, start, submatch, m)
|
|||||||
if (!result && ireg_ic)
|
if (!result && ireg_ic)
|
||||||
result = MB_TOLOWER(c) == MB_TOLOWER(curc);
|
result = MB_TOLOWER(c) == MB_TOLOWER(curc);
|
||||||
#ifdef FEAT_MBYTE
|
#ifdef FEAT_MBYTE
|
||||||
/* If there is a composing character which is not being
|
/* If ireg_icombine is not set only skip over the character
|
||||||
* ignored there can be no match. Match with composing
|
* itself. When it is set skip over composing characters. */
|
||||||
* character uses NFA_COMPOSING above. */
|
if (result && enc_utf8 && !ireg_icombine)
|
||||||
if (result && enc_utf8 && !ireg_icombine
|
clen = utf_char2len(curc);
|
||||||
&& clen != utf_char2len(curc))
|
|
||||||
result = FALSE;
|
|
||||||
#endif
|
#endif
|
||||||
ADD_STATE_IF_MATCH(t->state);
|
ADD_STATE_IF_MATCH(t->state);
|
||||||
break;
|
break;
|
||||||
|
@ -734,6 +734,8 @@ static char *(features[]) =
|
|||||||
|
|
||||||
static int included_patches[] =
|
static int included_patches[] =
|
||||||
{ /* Add new patch number below this line */
|
{ /* Add new patch number below this line */
|
||||||
|
/**/
|
||||||
|
293,
|
||||||
/**/
|
/**/
|
||||||
292,
|
292,
|
||||||
/**/
|
/**/
|
||||||
|
Loading…
x
Reference in New Issue
Block a user