0
0
mirror of https://github.com/vim/vim.git synced 2025-10-04 05:25:06 -04:00

patch 8.2.2233: cannot convert a byte index into a character index

Problem:    Cannot convert a byte index into a character index.
Solution:   Add charidx(). (Yegappan Lakshmanan, closes #7561)
This commit is contained in:
Bram Moolenaar
2020-12-28 12:56:58 +01:00
parent 9618a25b9c
commit 17793ef23a
5 changed files with 109 additions and 0 deletions

View File

@@ -2475,6 +2475,8 @@ ch_status({handle} [, {options}])
changenr() Number current change number changenr() Number current change number
char2nr({expr} [, {utf8}]) Number ASCII/UTF8 value of first char in {expr} char2nr({expr} [, {utf8}]) Number ASCII/UTF8 value of first char in {expr}
charclass({string}) Number character class of {string} charclass({string}) Number character class of {string}
charidx({string}, {idx} [, {countcc}])
Number char index of byte {idx} in {string}
chdir({dir}) String change current working directory chdir({dir}) String change current working directory
cindent({lnum}) Number C indent for line {lnum} cindent({lnum}) Number C indent for line {lnum}
clearmatches([{win}]) none clear all matches clearmatches([{win}]) none clear all matches
@@ -3588,6 +3590,31 @@ charclass({string}) *charclass()*
other specific Unicode class other specific Unicode class
The class is used in patterns and word motions. The class is used in patterns and word motions.
*charidx()*
charidx({string}, {idx} [, {countcc}])
Return the character index of the byte at {idx} in {string}.
The index of the first character is zero.
If there are no multibyte characters the returned value is
equal to {idx}.
When {countcc} is omitted or zero, then composing characters
are not counted separately, their byte length is added to the
preceding base character.
When {countcc} is set to 1, then composing characters are
counted as separate characters.
Returns -1 if the arguments are invalid or if {idx} is greater
than the index of the last byte in {string}. An error is
given if the first argument is not a string, the second
argument is not a number or when the third argument is present
and is not zero or one.
See |byteidx()| and |byteidxcomp()| for getting the byte index
from the character index.
Examples: >
echo charidx('áb́ć', 3) returns 1
echo charidx('áb́ć', 6, 1) returns 4
echo charidx('áb́ć', 16) returns -1
<
Can also be used as a |method|: >
GetName()->charidx(idx)
chdir({dir}) *chdir()* chdir({dir}) *chdir()*
Change the current working directory to {dir}. The scope of Change the current working directory to {dir}. The scope of

View File

@@ -625,6 +625,7 @@ String manipulation: *string-functions*
iconv() convert text from one encoding to another iconv() convert text from one encoding to another
byteidx() byte index of a character in a string byteidx() byte index of a character in a string
byteidxcomp() like byteidx() but count composing characters byteidxcomp() like byteidx() but count composing characters
charidx() character index of a byte in a string
repeat() repeat a string multiple times repeat() repeat a string multiple times
eval() evaluate a string expression eval() evaluate a string expression
execute() execute an Ex command and get the output execute() execute an Ex command and get the output

View File

@@ -47,6 +47,7 @@ static void f_ceil(typval_T *argvars, typval_T *rettv);
#endif #endif
static void f_changenr(typval_T *argvars, typval_T *rettv); static void f_changenr(typval_T *argvars, typval_T *rettv);
static void f_char2nr(typval_T *argvars, typval_T *rettv); static void f_char2nr(typval_T *argvars, typval_T *rettv);
static void f_charidx(typval_T *argvars, typval_T *rettv);
static void f_col(typval_T *argvars, typval_T *rettv); static void f_col(typval_T *argvars, typval_T *rettv);
static void f_confirm(typval_T *argvars, typval_T *rettv); static void f_confirm(typval_T *argvars, typval_T *rettv);
static void f_copy(typval_T *argvars, typval_T *rettv); static void f_copy(typval_T *argvars, typval_T *rettv);
@@ -789,6 +790,8 @@ static funcentry_T global_functions[] =
ret_number, f_char2nr}, ret_number, f_char2nr},
{"charclass", 1, 1, FEARG_1, NULL, {"charclass", 1, 1, FEARG_1, NULL,
ret_number, f_charclass}, ret_number, f_charclass},
{"charidx", 2, 3, FEARG_1, NULL,
ret_number, f_charidx},
{"chdir", 1, 1, FEARG_1, NULL, {"chdir", 1, 1, FEARG_1, NULL,
ret_string, f_chdir}, ret_string, f_chdir},
{"cindent", 1, 1, FEARG_1, NULL, {"cindent", 1, 1, FEARG_1, NULL,
@@ -2420,6 +2423,57 @@ f_char2nr(typval_T *argvars, typval_T *rettv)
rettv->vval.v_number = tv_get_string(&argvars[0])[0]; rettv->vval.v_number = tv_get_string(&argvars[0])[0];
} }
/*
* "charidx()" function
*/
static void
f_charidx(typval_T *argvars, typval_T *rettv)
{
char_u *str;
varnumber_T idx;
int countcc = FALSE;
char_u *p;
int len;
int (*ptr2len)(char_u *);
rettv->vval.v_number = -1;
if (argvars[0].v_type != VAR_STRING || argvars[1].v_type != VAR_NUMBER
|| (argvars[2].v_type != VAR_UNKNOWN
&& argvars[2].v_type != VAR_NUMBER))
{
emsg(_(e_invarg));
return;
}
str = tv_get_string_chk(&argvars[0]);
idx = tv_get_number_chk(&argvars[1], NULL);
if (str == NULL || idx < 0)
return;
if (argvars[2].v_type != VAR_UNKNOWN)
countcc = (int)tv_get_bool(&argvars[2]);
if (countcc < 0 || countcc > 1)
{
semsg(_(e_using_number_as_bool_nr), countcc);
return;
}
if (enc_utf8 && countcc)
ptr2len = utf_ptr2len;
else
ptr2len = mb_ptr2len;
for (p = str, len = 0; p <= str + idx; len++)
{
if (*p == NUL)
return;
p += ptr2len(p);
}
rettv->vval.v_number = len > 0 ? len - 1 : 0;
}
win_T * win_T *
get_optional_window(typval_T *argvars, int idx) get_optional_window(typval_T *argvars, int idx)
{ {

View File

@@ -1132,6 +1132,31 @@ func Test_byteidx()
call assert_fails("call byteidxcomp([], 0)", 'E730:') call assert_fails("call byteidxcomp([], 0)", 'E730:')
endfunc endfunc
" Test for charidx()
func Test_charidx()
let a = 'xáb́y'
call assert_equal(0, charidx(a, 0))
call assert_equal(1, charidx(a, 3))
call assert_equal(2, charidx(a, 4))
call assert_equal(3, charidx(a, 7))
call assert_equal(-1, charidx(a, 8))
call assert_equal(-1, charidx('', 0))
" count composing characters
call assert_equal(0, charidx(a, 0, 1))
call assert_equal(2, charidx(a, 2, 1))
call assert_equal(3, charidx(a, 4, 1))
call assert_equal(5, charidx(a, 7, 1))
call assert_equal(-1, charidx(a, 8, 1))
call assert_equal(-1, charidx('', 0, 1))
call assert_fails('let x = charidx([], 1)', 'E474:')
call assert_fails('let x = charidx("abc", [])', 'E474:')
call assert_fails('let x = charidx("abc", 1, [])', 'E474:')
call assert_fails('let x = charidx("abc", 1, -1)', 'E1023:')
call assert_fails('let x = charidx("abc", 1, 2)', 'E1023:')
endfunc
func Test_count() func Test_count()
let l = ['a', 'a', 'A', 'b'] let l = ['a', 'a', 'A', 'b']
call assert_equal(2, count(l, 'a')) call assert_equal(2, count(l, 'a'))

View File

@@ -750,6 +750,8 @@ static char *(features[]) =
static int included_patches[] = static int included_patches[] =
{ /* Add new patch number below this line */ { /* Add new patch number below this line */
/**/
2233,
/**/ /**/
2232, 2232,
/**/ /**/