1
0
forked from aniani/vim

patch 9.0.1485: no functions for converting from/to UTF-16 index

Problem:    no functions for converting from/to UTF-16 index.
Solution:   Add UTF-16 flag to existing funtions and add strutf16len() and
            utf16idx(). (Yegappan Lakshmanan, closes #12216)
This commit is contained in:
Christian Brabandt
2023-04-24 21:09:54 +01:00
committed by Bram Moolenaar
parent e1b4822137
commit 67672ef097
8 changed files with 677 additions and 56 deletions

View File

@@ -1751,9 +1751,9 @@ static funcentry_T global_functions[] =
ret_number, f_bufwinnr},
{"byte2line", 1, 1, FEARG_1, arg1_number,
ret_number, f_byte2line},
{"byteidx", 2, 2, FEARG_1, arg2_string_number,
{"byteidx", 2, 3, FEARG_1, arg3_string_number_bool,
ret_number, f_byteidx},
{"byteidxcomp", 2, 2, FEARG_1, arg2_string_number,
{"byteidxcomp", 2, 3, FEARG_1, arg3_string_number_bool,
ret_number, f_byteidxcomp},
{"call", 2, 3, FEARG_1, arg3_any_list_dict,
ret_any, f_call},
@@ -1803,7 +1803,7 @@ static funcentry_T global_functions[] =
ret_number, f_charclass},
{"charcol", 1, 2, FEARG_1, arg2_string_or_list_number,
ret_number, f_charcol},
{"charidx", 2, 3, FEARG_1, arg3_string_number_bool,
{"charidx", 2, 4, FEARG_1, arg3_string_number_bool,
ret_number, f_charidx},
{"chdir", 1, 1, FEARG_1, arg1_string,
ret_string, f_chdir},
@@ -2601,6 +2601,8 @@ static funcentry_T global_functions[] =
ret_number, f_strridx},
{"strtrans", 1, 1, FEARG_1, arg1_string,
ret_string, f_strtrans},
{"strutf16len", 1, 2, FEARG_1, arg2_string_bool,
ret_number, f_strutf16len},
{"strwidth", 1, 1, FEARG_1, arg1_string,
ret_number, f_strwidth},
{"submatch", 1, 2, FEARG_1, arg2_number_bool,
@@ -2785,6 +2787,8 @@ static funcentry_T global_functions[] =
ret_dict_any, f_undotree},
{"uniq", 1, 3, FEARG_1, arg13_sortuniq,
ret_first_arg, f_uniq},
{"utf16idx", 2, 4, FEARG_1, arg3_string_number_bool,
ret_number, f_utf16idx},
{"values", 1, 1, FEARG_1, arg1_dict_any,
ret_list_member, f_values},
{"virtcol", 1, 2, FEARG_1, arg2_string_or_list_bool,

View File

@@ -36,12 +36,14 @@ void f_string(typval_T *argvars, typval_T *rettv);
void f_strlen(typval_T *argvars, typval_T *rettv);
void f_strcharlen(typval_T *argvars, typval_T *rettv);
void f_strchars(typval_T *argvars, typval_T *rettv);
void f_strutf16len(typval_T *argvars, typval_T *rettv);
void f_strdisplaywidth(typval_T *argvars, typval_T *rettv);
void f_strwidth(typval_T *argvars, typval_T *rettv);
void f_strcharpart(typval_T *argvars, typval_T *rettv);
void f_strpart(typval_T *argvars, typval_T *rettv);
void f_strridx(typval_T *argvars, typval_T *rettv);
void f_strtrans(typval_T *argvars, typval_T *rettv);
void f_utf16idx(typval_T *argvars, typval_T *rettv);
void f_tolower(typval_T *argvars, typval_T *rettv);
void f_toupper(typval_T *argvars, typval_T *rettv);
void f_tr(typval_T *argvars, typval_T *rettv);

View File

@@ -1006,10 +1006,6 @@ string_reduce(
static void
byteidx(typval_T *argvars, typval_T *rettv, int comp UNUSED)
{
char_u *t;
char_u *str;
varnumber_T idx;
rettv->vval.v_number = -1;
if (in_vim9script()
@@ -1017,20 +1013,42 @@ byteidx(typval_T *argvars, typval_T *rettv, int comp UNUSED)
|| check_for_number_arg(argvars, 1) == FAIL))
return;
str = tv_get_string_chk(&argvars[0]);
idx = tv_get_number_chk(&argvars[1], NULL);
char_u *str = tv_get_string_chk(&argvars[0]);
varnumber_T idx = tv_get_number_chk(&argvars[1], NULL);
if (str == NULL || idx < 0)
return;
t = str;
varnumber_T utf16idx = FALSE;
if (argvars[2].v_type != VAR_UNKNOWN)
{
utf16idx = tv_get_bool(&argvars[2]);
if (utf16idx < 0 || utf16idx > 1)
{
semsg(_(e_using_number_as_bool_nr), utf16idx);
return;
}
}
int (*ptr2len)(char_u *);
if (enc_utf8 && comp)
ptr2len = utf_ptr2len;
else
ptr2len = mb_ptr2len;
char_u *t = str;
for ( ; idx > 0; idx--)
{
if (*t == NUL) // EOL reached
return;
if (enc_utf8 && comp)
t += utf_ptr2len(t);
else
t += (*mb_ptr2len)(t);
if (utf16idx)
{
int clen = ptr2len(t);
int c = (clen > 1) ? utf_ptr2char(t) : *t;
if (c > 0xFFFF)
idx--;
}
if (idx > 0)
t += ptr2len(t);
}
rettv->vval.v_number = (varnumber_T)(t - str);
}
@@ -1059,42 +1077,49 @@ f_byteidxcomp(typval_T *argvars, typval_T *rettv)
void
f_charidx(typval_T *argvars, typval_T *rettv)
{
char_u *str;
varnumber_T idx;
varnumber_T countcc = FALSE;
char_u *p;
int len;
int (*ptr2len)(char_u *);
rettv->vval.v_number = -1;
if ((check_for_string_arg(argvars, 0) == FAIL
if (check_for_string_arg(argvars, 0) == FAIL
|| check_for_number_arg(argvars, 1) == FAIL
|| check_for_opt_bool_arg(argvars, 2) == FAIL))
|| check_for_opt_bool_arg(argvars, 2) == FAIL
|| (argvars[2].v_type != VAR_UNKNOWN
&& check_for_opt_bool_arg(argvars, 3) == FAIL))
return;
str = tv_get_string_chk(&argvars[0]);
idx = tv_get_number_chk(&argvars[1], NULL);
char_u *str = tv_get_string_chk(&argvars[0]);
varnumber_T idx = tv_get_number_chk(&argvars[1], NULL);
if (str == NULL || idx < 0)
return;
varnumber_T countcc = FALSE;
varnumber_T utf16idx = FALSE;
if (argvars[2].v_type != VAR_UNKNOWN)
countcc = tv_get_bool(&argvars[2]);
if (countcc < 0 || countcc > 1)
{
semsg(_(e_using_number_as_bool_nr), countcc);
return;
countcc = tv_get_bool(&argvars[2]);
if (argvars[3].v_type != VAR_UNKNOWN)
utf16idx = tv_get_bool(&argvars[3]);
}
int (*ptr2len)(char_u *);
if (enc_utf8 && countcc)
ptr2len = utf_ptr2len;
else
ptr2len = mb_ptr2len;
for (p = str, len = 0; p <= str + idx; len++)
char_u *p;
int len;
for (p = str, len = 0; utf16idx ? idx >= 0 : p <= str + idx; len++)
{
if (*p == NUL)
return;
if (utf16idx)
{
idx--;
int clen = ptr2len(p);
int c = (clen > 1) ? utf_ptr2char(p) : *p;
if (c > 0xFFFF)
idx--;
}
p += ptr2len(p);
}
@@ -1358,6 +1383,38 @@ f_strchars(typval_T *argvars, typval_T *rettv)
strchar_common(argvars, rettv, skipcc);
}
/*
* "strutf16len()" function
*/
void
f_strutf16len(typval_T *argvars, typval_T *rettv)
{
rettv->vval.v_number = -1;
if (check_for_string_arg(argvars, 0) == FAIL
|| check_for_opt_bool_arg(argvars, 1) == FAIL)
return;
varnumber_T countcc = FALSE;
if (argvars[1].v_type != VAR_UNKNOWN)
countcc = tv_get_bool(&argvars[1]);
char_u *s = tv_get_string(&argvars[0]);
varnumber_T len = 0;
int (*func_mb_ptr2char_adv)(char_u **pp);
int ch;
func_mb_ptr2char_adv = countcc ? mb_cptr2char_adv : mb_ptr2char_adv;
while (*s != NUL)
{
ch = func_mb_ptr2char_adv(&s);
if (ch > 0xFFFF)
++len;
++len;
}
rettv->vval.v_number = len;
}
/*
* "strdisplaywidth()" function
*/
@@ -1619,6 +1676,61 @@ f_strtrans(typval_T *argvars, typval_T *rettv)
rettv->vval.v_string = transstr(tv_get_string(&argvars[0]));
}
/*
*
* "utf16idx()" function
*/
void
f_utf16idx(typval_T *argvars, typval_T *rettv)
{
rettv->vval.v_number = -1;
if (check_for_string_arg(argvars, 0) == FAIL
|| check_for_opt_number_arg(argvars, 1) == FAIL
|| check_for_opt_bool_arg(argvars, 2) == FAIL
|| (argvars[2].v_type != VAR_UNKNOWN
&& check_for_opt_bool_arg(argvars, 3) == FAIL))
return;
char_u *str = tv_get_string_chk(&argvars[0]);
varnumber_T idx = tv_get_number_chk(&argvars[1], NULL);
if (str == NULL || idx < 0)
return;
varnumber_T countcc = FALSE;
varnumber_T charidx = FALSE;
if (argvars[2].v_type != VAR_UNKNOWN)
{
countcc = tv_get_bool(&argvars[2]);
if (argvars[3].v_type != VAR_UNKNOWN)
charidx = tv_get_bool(&argvars[3]);
}
int (*ptr2len)(char_u *);
if (enc_utf8 && countcc)
ptr2len = utf_ptr2len;
else
ptr2len = mb_ptr2len;
char_u *p;
int len;
for (p = str, len = 0; charidx ? idx >= 0 : p <= str + idx; len++)
{
if (*p == NUL)
return;
int clen = ptr2len(p);
int c = (clen > 1) ? utf_ptr2char(p) : *p;
if (c > 0xFFFF)
len++;
p += ptr2len(p);
if (charidx)
idx--;
}
rettv->vval.v_number = len > 0 ? len - 1 : 0;
}
/*
* "tolower(string)" function
*/

View File

@@ -1192,19 +1192,14 @@ func Test_byte2line_line2byte()
bw!
endfunc
" Test for byteidx() and byteidxcomp() functions
" Test for byteidx() using a character index
func Test_byteidx()
let a = '.é.' " one char of two bytes
call assert_equal(0, byteidx(a, 0))
call assert_equal(0, byteidxcomp(a, 0))
call assert_equal(1, byteidx(a, 1))
call assert_equal(1, byteidxcomp(a, 1))
call assert_equal(3, byteidx(a, 2))
call assert_equal(3, byteidxcomp(a, 2))
call assert_equal(4, byteidx(a, 3))
call assert_equal(4, byteidxcomp(a, 3))
call assert_equal(-1, byteidx(a, 4))
call assert_equal(-1, byteidxcomp(a, 4))
let b = '.é.' " normal e with composing char
call assert_equal(0, b->byteidx(0))
@@ -1212,18 +1207,184 @@ func Test_byteidx()
call assert_equal(4, b->byteidx(2))
call assert_equal(5, b->byteidx(3))
call assert_equal(-1, b->byteidx(4))
call assert_fails("call byteidx([], 0)", 'E730:')
" string with multiple composing characters
let str = '-ą́-ą́'
call assert_equal(0, byteidx(str, 0))
call assert_equal(1, byteidx(str, 1))
call assert_equal(6, byteidx(str, 2))
call assert_equal(7, byteidx(str, 3))
call assert_equal(12, byteidx(str, 4))
call assert_equal(-1, byteidx(str, 5))
" empty string
call assert_equal(0, byteidx('', 0))
call assert_equal(-1, byteidx('', 1))
" error cases
call assert_fails("call byteidx([], 0)", 'E730:')
call assert_fails("call byteidx('abc', [])", 'E745:')
endfunc
" Test for byteidxcomp() using a character index
func Test_byteidxcomp()
let a = '.é.' " one char of two bytes
call assert_equal(0, byteidxcomp(a, 0))
call assert_equal(1, byteidxcomp(a, 1))
call assert_equal(3, byteidxcomp(a, 2))
call assert_equal(4, byteidxcomp(a, 3))
call assert_equal(-1, byteidxcomp(a, 4))
let b = '.é.' " normal e with composing char
call assert_equal(0, b->byteidxcomp(0))
call assert_equal(1, b->byteidxcomp(1))
call assert_equal(2, b->byteidxcomp(2))
call assert_equal(4, b->byteidxcomp(3))
call assert_equal(5, b->byteidxcomp(4))
call assert_equal(-1, b->byteidxcomp(5))
" string with multiple composing characters
let str = '-ą́-ą́'
call assert_equal(0, byteidxcomp(str, 0))
call assert_equal(1, byteidxcomp(str, 1))
call assert_equal(2, byteidxcomp(str, 2))
call assert_equal(4, byteidxcomp(str, 3))
call assert_equal(6, byteidxcomp(str, 4))
call assert_equal(7, byteidxcomp(str, 5))
call assert_equal(8, byteidxcomp(str, 6))
call assert_equal(10, byteidxcomp(str, 7))
call assert_equal(12, byteidxcomp(str, 8))
call assert_equal(-1, byteidxcomp(str, 9))
" empty string
call assert_equal(0, byteidxcomp('', 0))
call assert_equal(-1, byteidxcomp('', 1))
" error cases
call assert_fails("call byteidxcomp([], 0)", 'E730:')
call assert_fails("call byteidxcomp('abc', [])", 'E745:')
endfunc
" Test for charidx()
" Test for byteidx() using a UTF-16 index
func Test_byteidx_from_utf16_index()
" string with single byte characters
let str = "abc"
for i in range(3)
call assert_equal(i, byteidx(str, i, v:true))
endfor
call assert_equal(3, byteidx(str, 3, v:true))
call assert_equal(-1, byteidx(str, 4, v:true))
" string with two byte characters
let str = "a©©b"
call assert_equal(0, byteidx(str, 0, v:true))
call assert_equal(1, byteidx(str, 1, v:true))
call assert_equal(3, byteidx(str, 2, v:true))
call assert_equal(5, byteidx(str, 3, v:true))
call assert_equal(6, byteidx(str, 4, v:true))
call assert_equal(-1, byteidx(str, 5, v:true))
" string with two byte characters
let str = "a😊😊b"
call assert_equal(0, byteidx(str, 0, v:true))
call assert_equal(1, byteidx(str, 1, v:true))
call assert_equal(1, byteidx(str, 2, v:true))
call assert_equal(5, byteidx(str, 3, v:true))
call assert_equal(5, byteidx(str, 4, v:true))
call assert_equal(9, byteidx(str, 5, v:true))
call assert_equal(10, byteidx(str, 6, v:true))
call assert_equal(-1, byteidx(str, 7, v:true))
" string with composing characters
let str = '-á-b́'
call assert_equal(0, byteidx(str, 0, v:true))
call assert_equal(1, byteidx(str, 1, v:true))
call assert_equal(4, byteidx(str, 2, v:true))
call assert_equal(5, byteidx(str, 3, v:true))
call assert_equal(8, byteidx(str, 4, v:true))
call assert_equal(-1, byteidx(str, 5, v:true))
" string with multiple composing characters
let str = '-ą́-ą́'
call assert_equal(0, byteidx(str, 0, v:true))
call assert_equal(1, byteidx(str, 1, v:true))
call assert_equal(6, byteidx(str, 2, v:true))
call assert_equal(7, byteidx(str, 3, v:true))
call assert_equal(12, byteidx(str, 4, v:true))
call assert_equal(-1, byteidx(str, 5, v:true))
" empty string
call assert_equal(0, byteidx('', 0, v:true))
call assert_equal(-1, byteidx('', 1, v:true))
" error cases
call assert_fails('call byteidx(str, 0, [])', 'E745:')
endfunc
" Test for byteidxcomp() using a UTF-16 index
func Test_byteidxcomp_from_utf16_index()
" string with single byte characters
let str = "abc"
for i in range(3)
call assert_equal(i, byteidxcomp(str, i, v:true))
endfor
call assert_equal(3, byteidxcomp(str, 3, v:true))
call assert_equal(-1, byteidxcomp(str, 4, v:true))
" string with two byte characters
let str = "a©©b"
call assert_equal(0, byteidxcomp(str, 0, v:true))
call assert_equal(1, byteidxcomp(str, 1, v:true))
call assert_equal(3, byteidxcomp(str, 2, v:true))
call assert_equal(5, byteidxcomp(str, 3, v:true))
call assert_equal(6, byteidxcomp(str, 4, v:true))
call assert_equal(-1, byteidxcomp(str, 5, v:true))
" string with two byte characters
let str = "a😊😊b"
call assert_equal(0, byteidxcomp(str, 0, v:true))
call assert_equal(1, byteidxcomp(str, 1, v:true))
call assert_equal(1, byteidxcomp(str, 2, v:true))
call assert_equal(5, byteidxcomp(str, 3, v:true))
call assert_equal(5, byteidxcomp(str, 4, v:true))
call assert_equal(9, byteidxcomp(str, 5, v:true))
call assert_equal(10, byteidxcomp(str, 6, v:true))
call assert_equal(-1, byteidxcomp(str, 7, v:true))
" string with composing characters
let str = '-á-b́'
call assert_equal(0, byteidxcomp(str, 0, v:true))
call assert_equal(1, byteidxcomp(str, 1, v:true))
call assert_equal(2, byteidxcomp(str, 2, v:true))
call assert_equal(4, byteidxcomp(str, 3, v:true))
call assert_equal(5, byteidxcomp(str, 4, v:true))
call assert_equal(6, byteidxcomp(str, 5, v:true))
call assert_equal(8, byteidxcomp(str, 6, v:true))
call assert_equal(-1, byteidxcomp(str, 7, v:true))
call assert_fails('call byteidxcomp(str, 0, [])', 'E745:')
" string with multiple composing characters
let str = '-ą́-ą́'
call assert_equal(0, byteidxcomp(str, 0, v:true))
call assert_equal(1, byteidxcomp(str, 1, v:true))
call assert_equal(2, byteidxcomp(str, 2, v:true))
call assert_equal(4, byteidxcomp(str, 3, v:true))
call assert_equal(6, byteidxcomp(str, 4, v:true))
call assert_equal(7, byteidxcomp(str, 5, v:true))
call assert_equal(8, byteidxcomp(str, 6, v:true))
call assert_equal(10, byteidxcomp(str, 7, v:true))
call assert_equal(12, byteidxcomp(str, 8, v:true))
call assert_equal(-1, byteidxcomp(str, 9, v:true))
" empty string
call assert_equal(0, byteidxcomp('', 0, v:true))
call assert_equal(-1, byteidxcomp('', 1, v:true))
" error cases
call assert_fails('call byteidxcomp(str, 0, [])', 'E745:')
endfunc
" Test for charidx() using a byte index
func Test_charidx()
let a = 'xáb́y'
call assert_equal(0, charidx(a, 0))
@@ -1232,17 +1393,20 @@ func Test_charidx()
call assert_equal(3, charidx(a, 7))
call assert_equal(-1, charidx(a, 8))
call assert_equal(-1, charidx(a, -1))
call assert_equal(-1, charidx('', 0))
call assert_equal(-1, charidx(test_null_string(), 0))
" count composing characters
call assert_equal(0, charidx(a, 0, 1))
call assert_equal(2, charidx(a, 2, 1))
call assert_equal(3, charidx(a, 4, 1))
call assert_equal(5, charidx(a, 7, 1))
call assert_equal(-1, charidx(a, 8, 1))
call assert_equal(0, a->charidx(0, 1))
call assert_equal(2, a->charidx(2, 1))
call assert_equal(3, a->charidx(4, 1))
call assert_equal(5, a->charidx(7, 1))
call assert_equal(-1, a->charidx(8, 1))
" empty string
call assert_equal(-1, charidx('', 0))
call assert_equal(-1, charidx('', 0, 1))
" error cases
call assert_equal(-1, charidx(test_null_string(), 0))
call assert_fails('let x = charidx([], 1)', 'E1174:')
call assert_fails('let x = charidx("abc", [])', 'E1210:')
call assert_fails('let x = charidx("abc", 1, [])', 'E1212:')
@@ -1250,6 +1414,237 @@ func Test_charidx()
call assert_fails('let x = charidx("abc", 1, 2)', 'E1212:')
endfunc
" Test for charidx() using a UTF-16 index
func Test_charidx_from_utf16_index()
" string with single byte characters
let str = "abc"
for i in range(3)
call assert_equal(i, charidx(str, i, v:false, v:true))
endfor
call assert_equal(-1, charidx(str, 3, v:false, v:true))
" string with two byte characters
let str = "a©©b"
call assert_equal(0, charidx(str, 0, v:false, v:true))
call assert_equal(1, charidx(str, 1, v:false, v:true))
call assert_equal(2, charidx(str, 2, v:false, v:true))
call assert_equal(3, charidx(str, 3, v:false, v:true))
call assert_equal(-1, charidx(str, 4, v:false, v:true))
" string with four byte characters
let str = "a😊😊b"
call assert_equal(0, charidx(str, 0, v:false, v:true))
call assert_equal(1, charidx(str, 1, v:false, v:true))
call assert_equal(1, charidx(str, 2, v:false, v:true))
call assert_equal(2, charidx(str, 3, v:false, v:true))
call assert_equal(2, charidx(str, 4, v:false, v:true))
call assert_equal(3, charidx(str, 5, v:false, v:true))
call assert_equal(-1, charidx(str, 6, v:false, v:true))
" string with composing characters
let str = '-á-b́'
for i in str->strcharlen()->range()
call assert_equal(i, charidx(str, i, v:false, v:true))
endfor
call assert_equal(-1, charidx(str, 4, v:false, v:true))
for i in str->strchars()->range()
call assert_equal(i, charidx(str, i, v:true, v:true))
endfor
call assert_equal(-1, charidx(str, 6, v:true, v:true))
" string with multiple composing characters
let str = '-ą́-ą́'
for i in str->strcharlen()->range()
call assert_equal(i, charidx(str, i, v:false, v:true))
endfor
call assert_equal(-1, charidx(str, 4, v:false, v:true))
for i in str->strchars()->range()
call assert_equal(i, charidx(str, i, v:true, v:true))
endfor
call assert_equal(-1, charidx(str, 8, v:true, v:true))
" empty string
call assert_equal(-1, charidx('', 0, v:false, v:true))
call assert_equal(-1, charidx('', 0, v:true, v:true))
" error cases
call assert_equal(-1, charidx('', 0, v:false, v:true))
call assert_equal(-1, charidx('', 0, v:true, v:true))
call assert_equal(-1, charidx(test_null_string(), 0, v:false, v:true))
call assert_fails('let x = charidx("abc", 1, v:false, [])', 'E1212:')
call assert_fails('let x = charidx("abc", 1, v:true, [])', 'E1212:')
endfunc
" Test for utf16idx() using a byte index
func Test_utf16idx_from_byteidx()
" UTF-16 index of a string with single byte characters
let str = "abc"
for i in range(3)
call assert_equal(i, utf16idx(str, i))
endfor
call assert_equal(-1, utf16idx(str, 3))
" UTF-16 index of a string with two byte characters
let str = 'a©©b'
call assert_equal(0, str->utf16idx(0))
call assert_equal(1, str->utf16idx(1))
call assert_equal(1, str->utf16idx(2))
call assert_equal(2, str->utf16idx(3))
call assert_equal(2, str->utf16idx(4))
call assert_equal(3, str->utf16idx(5))
call assert_equal(-1, str->utf16idx(6))
" UTF-16 index of a string with four byte characters
let str = 'a😊😊b'
call assert_equal(0, utf16idx(str, 0))
call assert_equal(2, utf16idx(str, 1))
call assert_equal(2, utf16idx(str, 2))
call assert_equal(2, utf16idx(str, 3))
call assert_equal(2, utf16idx(str, 4))
call assert_equal(4, utf16idx(str, 5))
call assert_equal(4, utf16idx(str, 6))
call assert_equal(4, utf16idx(str, 7))
call assert_equal(4, utf16idx(str, 8))
call assert_equal(5, utf16idx(str, 9))
call assert_equal(-1, utf16idx(str, 10))
" UTF-16 index of a string with composing characters
let str = '-á-b́'
call assert_equal(0, utf16idx(str, 0))
call assert_equal(1, utf16idx(str, 1))
call assert_equal(1, utf16idx(str, 2))
call assert_equal(1, utf16idx(str, 3))
call assert_equal(2, utf16idx(str, 4))
call assert_equal(3, utf16idx(str, 5))
call assert_equal(3, utf16idx(str, 6))
call assert_equal(3, utf16idx(str, 7))
call assert_equal(-1, utf16idx(str, 8))
call assert_equal(0, utf16idx(str, 0, v:true))
call assert_equal(1, utf16idx(str, 1, v:true))
call assert_equal(2, utf16idx(str, 2, v:true))
call assert_equal(2, utf16idx(str, 3, v:true))
call assert_equal(3, utf16idx(str, 4, v:true))
call assert_equal(4, utf16idx(str, 5, v:true))
call assert_equal(5, utf16idx(str, 6, v:true))
call assert_equal(5, utf16idx(str, 7, v:true))
call assert_equal(-1, utf16idx(str, 8, v:true))
" string with multiple composing characters
let str = '-ą́-ą́'
call assert_equal(0, utf16idx(str, 0))
call assert_equal(1, utf16idx(str, 1))
call assert_equal(1, utf16idx(str, 2))
call assert_equal(1, utf16idx(str, 3))
call assert_equal(1, utf16idx(str, 4))
call assert_equal(1, utf16idx(str, 5))
call assert_equal(2, utf16idx(str, 6))
call assert_equal(3, utf16idx(str, 7))
call assert_equal(3, utf16idx(str, 8))
call assert_equal(3, utf16idx(str, 9))
call assert_equal(3, utf16idx(str, 10))
call assert_equal(3, utf16idx(str, 11))
call assert_equal(-1, utf16idx(str, 12))
call assert_equal(0, utf16idx(str, 0, v:true))
call assert_equal(1, utf16idx(str, 1, v:true))
call assert_equal(2, utf16idx(str, 2, v:true))
call assert_equal(2, utf16idx(str, 3, v:true))
call assert_equal(3, utf16idx(str, 4, v:true))
call assert_equal(3, utf16idx(str, 5, v:true))
call assert_equal(4, utf16idx(str, 6, v:true))
call assert_equal(5, utf16idx(str, 7, v:true))
call assert_equal(6, utf16idx(str, 8, v:true))
call assert_equal(6, utf16idx(str, 9, v:true))
call assert_equal(7, utf16idx(str, 10, v:true))
call assert_equal(7, utf16idx(str, 11, v:true))
call assert_equal(-1, utf16idx(str, 12, v:true))
" empty string
call assert_equal(-1, utf16idx('', 0))
call assert_equal(-1, utf16idx('', 0, v:true))
" error cases
call assert_equal(-1, utf16idx("", 0))
call assert_equal(-1, utf16idx("abc", -1))
call assert_equal(-1, utf16idx(test_null_string(), 0))
call assert_fails('let l = utf16idx([], 0)', 'E1174:')
call assert_fails('let l = utf16idx("ab", [])', 'E1210:')
call assert_fails('let l = utf16idx("ab", 0, [])', 'E1212:')
endfunc
" Test for utf16idx() using a character index
func Test_utf16idx_from_charidx()
let str = "abc"
for i in str->strcharlen()->range()
call assert_equal(i, utf16idx(str, i, v:false, v:true))
endfor
call assert_equal(-1, utf16idx(str, 3, v:false, v:true))
" UTF-16 index of a string with two byte characters
let str = "a©©b"
for i in str->strcharlen()->range()
call assert_equal(i, utf16idx(str, i, v:false, v:true))
endfor
call assert_equal(-1, utf16idx(str, 4, v:false, v:true))
" UTF-16 index of a string with four byte characters
let str = "a😊😊b"
call assert_equal(0, utf16idx(str, 0, v:false, v:true))
call assert_equal(2, utf16idx(str, 1, v:false, v:true))
call assert_equal(4, utf16idx(str, 2, v:false, v:true))
call assert_equal(5, utf16idx(str, 3, v:false, v:true))
call assert_equal(-1, utf16idx(str, 4, v:false, v:true))
" UTF-16 index of a string with composing characters
let str = '-á-b́'
for i in str->strcharlen()->range()
call assert_equal(i, utf16idx(str, i, v:false, v:true))
endfor
call assert_equal(-1, utf16idx(str, 4, v:false, v:true))
for i in str->strchars()->range()
call assert_equal(i, utf16idx(str, i, v:true, v:true))
endfor
call assert_equal(-1, utf16idx(str, 6, v:true, v:true))
" string with multiple composing characters
let str = '-ą́-ą́'
for i in str->strcharlen()->range()
call assert_equal(i, utf16idx(str, i, v:false, v:true))
endfor
call assert_equal(-1, utf16idx(str, 4, v:false, v:true))
for i in str->strchars()->range()
call assert_equal(i, utf16idx(str, i, v:true, v:true))
endfor
call assert_equal(-1, utf16idx(str, 8, v:true, v:true))
" empty string
call assert_equal(-1, utf16idx('', 0, v:false, v:true))
call assert_equal(-1, utf16idx('', 0, v:true, v:true))
" error cases
call assert_equal(-1, utf16idx(test_null_string(), 0, v:true, v:true))
call assert_fails('let l = utf16idx("ab", 0, v:false, [])', 'E1212:')
endfunc
" Test for strutf16len()
func Test_strutf16len()
call assert_equal(3, strutf16len('abc'))
call assert_equal(3, 'abc'->strutf16len(v:true))
call assert_equal(4, strutf16len('a©©b'))
call assert_equal(4, strutf16len('a©©b', v:true))
call assert_equal(6, strutf16len('a😊😊b'))
call assert_equal(6, strutf16len('a😊😊b', v:true))
call assert_equal(4, strutf16len('-á-b́'))
call assert_equal(6, strutf16len('-á-b́', v:true))
call assert_equal(4, strutf16len('-ą́-ą́'))
call assert_equal(8, strutf16len('-ą́-ą́', v:true))
call assert_equal(0, strutf16len(''))
" error cases
call assert_fails('let l = strutf16len([])', 'E1174:')
call assert_fails('let l = strutf16len("a", [])', 'E1212:')
call assert_equal(0, strutf16len(test_null_string()))
endfunc
func Test_count()
let l = ['a', 'a', 'A', 'b']
call assert_equal(2, count(l, 'a'))
@@ -3074,5 +3469,4 @@ func Test_delfunc_while_listing()
call StopVimInTerminal(buf)
endfunc
" vim: shiftwidth=2 sts=2 expandtab

View File

@@ -695,6 +695,8 @@ static char *(features[]) =
static int included_patches[] =
{ /* Add new patch number below this line */
/**/
1485,
/**/
1484,
/**/