mirror of
https://github.com/vim/vim.git
synced 2025-09-24 03:44:06 -04:00
updated for version 7.3.253
Problem: "echo 'abc' > ''" returns 0 or 1, depending on 'ignorecase'. Checks in mb_strnicmp() for illegal and truncated bytes are wrong. Should not assume that byte length is equal before case folding. Solution: Add utf_safe_read_char_adv() and utf_strnicmp(). Add a test for this. (Ivan Krasilnikov)
This commit is contained in:
182
src/mbyte.c
182
src/mbyte.c
@@ -132,6 +132,7 @@ static int utf_ptr2cells_len __ARGS((char_u *p, int size));
|
|||||||
static int dbcs_char2cells __ARGS((int c));
|
static int dbcs_char2cells __ARGS((int c));
|
||||||
static int dbcs_ptr2cells_len __ARGS((char_u *p, int size));
|
static int dbcs_ptr2cells_len __ARGS((char_u *p, int size));
|
||||||
static int dbcs_ptr2char __ARGS((char_u *p));
|
static int dbcs_ptr2char __ARGS((char_u *p));
|
||||||
|
static int utf_safe_read_char_adv __ARGS((char_u **s, size_t *n));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Lookup table to quickly get the length in bytes of a UTF-8 character from
|
* Lookup table to quickly get the length in bytes of a UTF-8 character from
|
||||||
@@ -1700,6 +1701,66 @@ utf_ptr2char(p)
|
|||||||
return p[0];
|
return p[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Convert a UTF-8 byte sequence to a wide character.
|
||||||
|
* String is assumed to be terminated by NUL or after "n" bytes, whichever
|
||||||
|
* comes first.
|
||||||
|
* The function is safe in the sense that it never accesses memory beyond the
|
||||||
|
* first "n" bytes of "s".
|
||||||
|
*
|
||||||
|
* On success, returns decoded codepoint, advances "s" to the beginning of
|
||||||
|
* next character and decreases "n" accordingly.
|
||||||
|
*
|
||||||
|
* If end of string was reached, returns 0 and, if "n" > 0, advances "s" past
|
||||||
|
* NUL byte.
|
||||||
|
*
|
||||||
|
* If byte sequence is illegal or incomplete, returns -1 and does not advance
|
||||||
|
* "s".
|
||||||
|
*/
|
||||||
|
static int
|
||||||
|
utf_safe_read_char_adv(s, n)
|
||||||
|
char_u **s;
|
||||||
|
size_t *n;
|
||||||
|
{
|
||||||
|
int c, k;
|
||||||
|
|
||||||
|
if (*n == 0) /* end of buffer */
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
k = utf8len_tab_zero[**s];
|
||||||
|
|
||||||
|
if (k == 1)
|
||||||
|
{
|
||||||
|
/* ASCII character or NUL */
|
||||||
|
(*n)--;
|
||||||
|
return *(*s)++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((size_t)k <= *n)
|
||||||
|
{
|
||||||
|
/* We have a multibyte sequence and it isn't truncated by buffer
|
||||||
|
* limits so utf_ptr2char() is safe to use. Or the first byte is
|
||||||
|
* illegal (k=0), and it's also safe to use utf_ptr2char(). */
|
||||||
|
c = utf_ptr2char(*s);
|
||||||
|
|
||||||
|
/* On failure, utf_ptr2char() returns the first byte, so here we
|
||||||
|
* check equality with the first byte. The only non-ASCII character
|
||||||
|
* which equals the first byte of its own UTF-8 representation is
|
||||||
|
* U+00C3 (UTF-8: 0xC3 0x83), so need to check that special case too.
|
||||||
|
* It's safe even if n=1, else we would have k=2 > n. */
|
||||||
|
if (c != (int)(**s) || (c == 0xC3 && (*s)[1] == 0x83))
|
||||||
|
{
|
||||||
|
/* byte sequence was successfully decoded */
|
||||||
|
*s += k;
|
||||||
|
*n -= k;
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* byte sequence is incomplete or illegal */
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Get character at **pp and advance *pp to the next character.
|
* Get character at **pp and advance *pp to the next character.
|
||||||
* Note: composing characters are skipped!
|
* Note: composing characters are skipped!
|
||||||
@@ -2667,7 +2728,8 @@ static convertStruct foldCase[] =
|
|||||||
{0x10400,0x10427,1,40}
|
{0x10400,0x10427,1,40}
|
||||||
};
|
};
|
||||||
|
|
||||||
static int utf_convert(int a, convertStruct table[], int tableSize);
|
static int utf_convert __ARGS((int a, convertStruct table[], int tableSize));
|
||||||
|
static int utf_strnicmp __ARGS((char_u *s1, char_u *s2, size_t n1, size_t n2));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Generic conversion function for case operations.
|
* Generic conversion function for case operations.
|
||||||
@@ -3079,6 +3141,80 @@ utf_isupper(a)
|
|||||||
return (utf_tolower(a) != a);
|
return (utf_tolower(a) != a);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
utf_strnicmp(s1, s2, n1, n2)
|
||||||
|
char_u *s1, *s2;
|
||||||
|
size_t n1, n2;
|
||||||
|
{
|
||||||
|
int c1, c2, cdiff;
|
||||||
|
char_u buffer[6];
|
||||||
|
|
||||||
|
for (;;)
|
||||||
|
{
|
||||||
|
c1 = utf_safe_read_char_adv(&s1, &n1);
|
||||||
|
c2 = utf_safe_read_char_adv(&s2, &n2);
|
||||||
|
|
||||||
|
if (c1 <= 0 || c2 <= 0)
|
||||||
|
break;
|
||||||
|
|
||||||
|
if (c1 == c2)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
cdiff = utf_fold(c1) - utf_fold(c2);
|
||||||
|
if (cdiff != 0)
|
||||||
|
return cdiff;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* some string ended or has an incomplete/illegal character sequence */
|
||||||
|
|
||||||
|
if (c1 == 0 || c2 == 0)
|
||||||
|
{
|
||||||
|
/* some string ended. shorter string is smaller */
|
||||||
|
if (c1 == 0 && c2 == 0)
|
||||||
|
return 0;
|
||||||
|
return c1 == 0 ? -1 : 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Continue with bytewise comparison to produce some result that
|
||||||
|
* would make comparison operations involving this function transitive.
|
||||||
|
*
|
||||||
|
* If only one string had an error, comparison should be made with
|
||||||
|
* folded version of the other string. In this case it is enough
|
||||||
|
* to fold just one character to determine the result of comparison. */
|
||||||
|
|
||||||
|
if (c1 != -1 && c2 == -1)
|
||||||
|
{
|
||||||
|
n1 = utf_char2bytes(utf_fold(c1), buffer);
|
||||||
|
s1 = buffer;
|
||||||
|
}
|
||||||
|
else if (c2 != -1 && c1 == -1)
|
||||||
|
{
|
||||||
|
n2 = utf_char2bytes(utf_fold(c2), buffer);
|
||||||
|
s2 = buffer;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (n1 > 0 && n2 > 0 && *s1 != NUL && *s2 != NUL)
|
||||||
|
{
|
||||||
|
cdiff = (int)(*s1) - (int)(*s2);
|
||||||
|
if (cdiff != 0)
|
||||||
|
return cdiff;
|
||||||
|
|
||||||
|
s1++;
|
||||||
|
s2++;
|
||||||
|
n1--;
|
||||||
|
n2--;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (n1 > 0 && *s1 == NUL)
|
||||||
|
n1 = 0;
|
||||||
|
if (n2 > 0 && *s2 == NUL)
|
||||||
|
n2 = 0;
|
||||||
|
|
||||||
|
if (n1 == 0 && n2 == 0)
|
||||||
|
return 0;
|
||||||
|
return n1 == 0 ? -1 : 1;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Version of strnicmp() that handles multi-byte characters.
|
* Version of strnicmp() that handles multi-byte characters.
|
||||||
* Needed for Big5, Sjift-JIS and UTF-8 encoding. Other DBCS encodings can
|
* Needed for Big5, Sjift-JIS and UTF-8 encoding. Other DBCS encodings can
|
||||||
@@ -3092,49 +3228,21 @@ mb_strnicmp(s1, s2, nn)
|
|||||||
char_u *s1, *s2;
|
char_u *s1, *s2;
|
||||||
size_t nn;
|
size_t nn;
|
||||||
{
|
{
|
||||||
int i, j, l;
|
int i, l;
|
||||||
int cdiff;
|
int cdiff;
|
||||||
int incomplete = FALSE;
|
|
||||||
int n = (int)nn;
|
int n = (int)nn;
|
||||||
|
|
||||||
|
if (enc_utf8)
|
||||||
|
{
|
||||||
|
return utf_strnicmp(s1, s2, nn, nn);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
for (i = 0; i < n; i += l)
|
for (i = 0; i < n; i += l)
|
||||||
{
|
{
|
||||||
if (s1[i] == NUL && s2[i] == NUL) /* both strings end */
|
if (s1[i] == NUL && s2[i] == NUL) /* both strings end */
|
||||||
return 0;
|
return 0;
|
||||||
if (enc_utf8)
|
|
||||||
{
|
|
||||||
l = utf_byte2len(s1[i]);
|
|
||||||
if (l > n - i)
|
|
||||||
{
|
|
||||||
l = n - i; /* incomplete character */
|
|
||||||
incomplete = TRUE;
|
|
||||||
}
|
|
||||||
/* Check directly first, it's faster. */
|
|
||||||
for (j = 0; j < l; ++j)
|
|
||||||
{
|
|
||||||
if (s1[i + j] != s2[i + j])
|
|
||||||
break;
|
|
||||||
if (s1[i + j] == 0)
|
|
||||||
/* Both stings have the same bytes but are incomplete or
|
|
||||||
* have illegal bytes, accept them as equal. */
|
|
||||||
l = j;
|
|
||||||
}
|
|
||||||
if (j < l)
|
|
||||||
{
|
|
||||||
/* If one of the two characters is incomplete return -1. */
|
|
||||||
if (incomplete || i + utf_byte2len(s2[i]) > n)
|
|
||||||
return -1;
|
|
||||||
/* Don't case-fold illegal bytes or truncated characters. */
|
|
||||||
if (utf_ptr2len(s1 + i) < l || utf_ptr2len(s2 + i) < l)
|
|
||||||
return -1;
|
|
||||||
cdiff = utf_fold(utf_ptr2char(s1 + i))
|
|
||||||
- utf_fold(utf_ptr2char(s2 + i));
|
|
||||||
if (cdiff != 0)
|
|
||||||
return cdiff;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
l = (*mb_ptr2len)(s1 + i);
|
l = (*mb_ptr2len)(s1 + i);
|
||||||
if (l <= 1)
|
if (l <= 1)
|
||||||
{
|
{
|
||||||
|
@@ -29,7 +29,7 @@ SCRIPTS = test1.out test3.out test4.out test5.out test6.out \
|
|||||||
test66.out test67.out test68.out test69.out test70.out \
|
test66.out test67.out test68.out test69.out test70.out \
|
||||||
test71.out test72.out test73.out test74.out test75.out \
|
test71.out test72.out test73.out test74.out test75.out \
|
||||||
test76.out test77.out test78.out test79.out test80.out \
|
test76.out test77.out test78.out test79.out test80.out \
|
||||||
test81.out
|
test81.out test82.out
|
||||||
|
|
||||||
.SUFFIXES: .in .out
|
.SUFFIXES: .in .out
|
||||||
|
|
||||||
@@ -130,3 +130,4 @@ test78.out: test78.in
|
|||||||
test79.out: test79.in
|
test79.out: test79.in
|
||||||
test80.out: test80.in
|
test80.out: test80.in
|
||||||
test81.out: test81.in
|
test81.out: test81.in
|
||||||
|
test82.out: test82.in
|
||||||
|
@@ -29,7 +29,7 @@ SCRIPTS = test3.out test4.out test5.out test6.out test7.out \
|
|||||||
test42.out test52.out test65.out test66.out test67.out \
|
test42.out test52.out test65.out test66.out test67.out \
|
||||||
test68.out test69.out test71.out test72.out test73.out \
|
test68.out test69.out test71.out test72.out test73.out \
|
||||||
test74.out test75.out test76.out test77.out test78.out \
|
test74.out test75.out test76.out test77.out test78.out \
|
||||||
test79.out test80.out test81.out
|
test79.out test80.out test81.out test82.out
|
||||||
|
|
||||||
SCRIPTS32 = test50.out test70.out
|
SCRIPTS32 = test50.out test70.out
|
||||||
|
|
||||||
|
@@ -49,7 +49,7 @@ SCRIPTS = test3.out test4.out test5.out test6.out test7.out \
|
|||||||
test42.out test52.out test65.out test66.out test67.out \
|
test42.out test52.out test65.out test66.out test67.out \
|
||||||
test68.out test69.out test71.out test72.out test73.out \
|
test68.out test69.out test71.out test72.out test73.out \
|
||||||
test74.out test75.out test76.out test77.out test78.out \
|
test74.out test75.out test76.out test77.out test78.out \
|
||||||
test79.out test80.out test81.out
|
test79.out test80.out test81.out test82.out
|
||||||
|
|
||||||
SCRIPTS32 = test50.out test70.out
|
SCRIPTS32 = test50.out test70.out
|
||||||
|
|
||||||
|
@@ -29,7 +29,7 @@ SCRIPTS = test1.out test3.out test4.out test5.out test6.out \
|
|||||||
test66.out test67.out test68.out test69.out test70.out \
|
test66.out test67.out test68.out test69.out test70.out \
|
||||||
test71.out test72.out test73.out test74.out test75.out \
|
test71.out test72.out test73.out test74.out test75.out \
|
||||||
test76.out test77.out test78.out test79.out test80.out \
|
test76.out test77.out test78.out test79.out test80.out \
|
||||||
test81.out
|
test81.out test82.out
|
||||||
|
|
||||||
.SUFFIXES: .in .out
|
.SUFFIXES: .in .out
|
||||||
|
|
||||||
|
@@ -4,7 +4,7 @@
|
|||||||
# Authors: Zoltan Arpadffy, <arpadffy@polarhome.com>
|
# Authors: Zoltan Arpadffy, <arpadffy@polarhome.com>
|
||||||
# Sandor Kopanyi, <sandor.kopanyi@mailbox.hu>
|
# Sandor Kopanyi, <sandor.kopanyi@mailbox.hu>
|
||||||
#
|
#
|
||||||
# Last change: 2011 Jun 26
|
# Last change: 2011 Jul 15
|
||||||
#
|
#
|
||||||
# This has been tested on VMS 6.2 to 8.3 on DEC Alpha, VAX and IA64.
|
# This has been tested on VMS 6.2 to 8.3 on DEC Alpha, VAX and IA64.
|
||||||
# Edit the lines in the Configuration section below to select.
|
# Edit the lines in the Configuration section below to select.
|
||||||
@@ -75,7 +75,8 @@ SCRIPT = test1.out test2.out test3.out test4.out test5.out \
|
|||||||
test61.out test62.out test63.out test64.out test65.out \
|
test61.out test62.out test63.out test64.out test65.out \
|
||||||
test66.out test67.out test68.out test69.out \
|
test66.out test67.out test68.out test69.out \
|
||||||
test71.out test72.out test74.out test75.out test76.out \
|
test71.out test72.out test74.out test75.out test76.out \
|
||||||
test77.out test78.out test79.out test80.out test81.out
|
test77.out test78.out test79.out test80.out test81.out \
|
||||||
|
test82.out
|
||||||
|
|
||||||
# Known problems:
|
# Known problems:
|
||||||
# Test 30: a problem around mac format - unknown reason
|
# Test 30: a problem around mac format - unknown reason
|
||||||
|
@@ -26,7 +26,7 @@ SCRIPTS = test1.out test2.out test3.out test4.out test5.out test6.out \
|
|||||||
test64.out test65.out test66.out test67.out test68.out \
|
test64.out test65.out test66.out test67.out test68.out \
|
||||||
test69.out test70.out test71.out test72.out test73.out \
|
test69.out test70.out test71.out test72.out test73.out \
|
||||||
test74.out test75.out test76.out test77.out test78.out \
|
test74.out test75.out test76.out test77.out test78.out \
|
||||||
test79.out test80.out test81.out
|
test79.out test80.out test81.out test82.out
|
||||||
|
|
||||||
SCRIPTS_GUI = test16.out
|
SCRIPTS_GUI = test16.out
|
||||||
|
|
||||||
|
@@ -709,6 +709,8 @@ static char *(features[]) =
|
|||||||
|
|
||||||
static int included_patches[] =
|
static int included_patches[] =
|
||||||
{ /* Add new patch number below this line */
|
{ /* Add new patch number below this line */
|
||||||
|
/**/
|
||||||
|
253,
|
||||||
/**/
|
/**/
|
||||||
252,
|
252,
|
||||||
/**/
|
/**/
|
||||||
|
Reference in New Issue
Block a user