0
0
mirror of https://github.com/vim/vim.git synced 2025-09-26 04:04:07 -04:00

patch 8.2.2605: Vim9: string index and slice does not include composing chars

Problem:    Vim9: string index and slice does not include composing chars.
Solution:   Include composing characters. (issue #6563)
This commit is contained in:
Bram Moolenaar
2021-03-14 18:40:19 +01:00
parent 240309c9bf
commit 0289a093a4
4 changed files with 54 additions and 16 deletions

View File

@@ -96,8 +96,8 @@ script and `:def` functions; details are below:
def CallMe(count: number, message: string): bool def CallMe(count: number, message: string): bool
- Call functions without `:call`: > - Call functions without `:call`: >
writefile(['done'], 'file.txt') writefile(['done'], 'file.txt')
- You cannot use `:xit`, `:t`, `:k`, `:append`, `:change`, `:insert` or - You cannot use `:xit`, `:t`, `:k`, `:append`, `:change`, `:insert`, `:open`
curly-braces names. or curly-braces names.
- A range before a command must be prefixed with a colon: > - A range before a command must be prefixed with a colon: >
:%s/this/that :%s/this/that
- Unless mentioned specifically, the highest |scriptversion| is used. - Unless mentioned specifically, the highest |scriptversion| is used.
@@ -341,7 +341,8 @@ Functions can be called without `:call`: >
Using `:call` is still possible, but this is discouraged. Using `:call` is still possible, but this is discouraged.
A method call without `eval` is possible, so long as the start is an A method call without `eval` is possible, so long as the start is an
identifier or can't be an Ex command. Examples: > identifier or can't be an Ex command. For a function either "(" or "->" must
be following, without a line break. Examples: >
myList->add(123) myList->add(123)
g:myList->add(123) g:myList->add(123)
[1, 2, 3]->Process() [1, 2, 3]->Process()
@@ -696,8 +697,9 @@ for v:null. When converting a boolean to a string "false" and "true" are
used, not "v:false" and "v:true" like in legacy script. "v:none" is not used, not "v:false" and "v:true" like in legacy script. "v:none" is not
changed, it is only used in JSON and has no equivalent in other languages. changed, it is only used in JSON and has no equivalent in other languages.
Indexing a string with [idx] or [idx : idx] uses character indexes instead of Indexing a string with [idx] or taking a slice with [idx : idx] uses character
byte indexes. Example: > indexes instead of byte indexes. Composing characters are included.
Example: >
echo 'bár'[1] echo 'bár'[1]
In legacy script this results in the character 0xc3 (an illegal byte), in Vim9 In legacy script this results in the character 0xc3 (an illegal byte), in Vim9
script this results in the string 'á'. script this results in the string 'á'.
@@ -845,6 +847,8 @@ THIS IS STILL UNDER DEVELOPMENT - ANYTHING CAN BREAK - ANYTHING CAN CHANGE
:enddef End of a function defined with `:def`. It should be on :enddef End of a function defined with `:def`. It should be on
a line by its own. a line by its own.
You may also find this wiki useful. It was written by an early adoptor of
Vim9 script: https://github.com/lacygoill/wiki/blob/master/vim/vim9.md
If the script the function is defined in is Vim9 script, then script-local If the script the function is defined in is Vim9 script, then script-local
variables can be accessed without the "s:" prefix. They must be defined variables can be accessed without the "s:" prefix. They must be defined

View File

@@ -2367,6 +2367,35 @@ def Test_expr7_any_index_slice()
assert_equal('abcd', g:teststring[: -3]) assert_equal('abcd', g:teststring[: -3])
assert_equal('', g:teststring[: -9]) assert_equal('', g:teststring[: -9])
# composing characters are included
g:teststring = 'àéû'
assert_equal('à', g:teststring[0])
assert_equal('é', g:teststring[1])
assert_equal('û', g:teststring[2])
assert_equal('', g:teststring[3])
assert_equal('', g:teststring[4])
assert_equal('û', g:teststring[-1])
assert_equal('é', g:teststring[-2])
assert_equal('à', g:teststring[-3])
assert_equal('', g:teststring[-4])
assert_equal('', g:teststring[-5])
assert_equal('à', g:teststring[0 : 0])
assert_equal('é', g:teststring[1 : 1])
assert_equal('àé', g:teststring[0 : 1])
assert_equal('àéû', g:teststring[0 : -1])
assert_equal('àé', g:teststring[0 : -2])
assert_equal('à', g:teststring[0 : -3])
assert_equal('', g:teststring[0 : -4])
assert_equal('', g:teststring[0 : -5])
assert_equal('àéû', g:teststring[ : ])
assert_equal('àéû', g:teststring[0 : ])
assert_equal('éû', g:teststring[1 : ])
assert_equal('û', g:teststring[2 : ])
assert_equal('', g:teststring[3 : ])
assert_equal('', g:teststring[4 : ])
# blob index cannot be out of range # blob index cannot be out of range
g:testblob = 0z01ab g:testblob = 0z01ab
assert_equal(0x01, g:testblob[0]) assert_equal(0x01, g:testblob[0])

View File

@@ -750,6 +750,8 @@ static char *(features[]) =
static int included_patches[] = static int included_patches[] =
{ /* Add new patch number below this line */ { /* Add new patch number below this line */
/**/
2605,
/**/ /**/
2604, 2604,
/**/ /**/

View File

@@ -985,8 +985,9 @@ allocate_if_null(typval_T *tv)
} }
/* /*
* Return the character "str[index]" where "index" is the character index. If * Return the character "str[index]" where "index" is the character index,
* "index" is out of range NULL is returned. * including composing characters.
* If "index" is out of range NULL is returned.
*/ */
char_u * char_u *
char_from_string(char_u *str, varnumber_T index) char_from_string(char_u *str, varnumber_T index)
@@ -1005,7 +1006,7 @@ char_from_string(char_u *str, varnumber_T index)
int clen = 0; int clen = 0;
for (nbyte = 0; nbyte < slen; ++clen) for (nbyte = 0; nbyte < slen; ++clen)
nbyte += MB_CPTR2LEN(str + nbyte); nbyte += mb_ptr2len(str + nbyte);
nchar = clen + index; nchar = clen + index;
if (nchar < 0) if (nchar < 0)
// unlike list: index out of range results in empty string // unlike list: index out of range results in empty string
@@ -1013,15 +1014,15 @@ char_from_string(char_u *str, varnumber_T index)
} }
for (nbyte = 0; nchar > 0 && nbyte < slen; --nchar) for (nbyte = 0; nchar > 0 && nbyte < slen; --nchar)
nbyte += MB_CPTR2LEN(str + nbyte); nbyte += mb_ptr2len(str + nbyte);
if (nbyte >= slen) if (nbyte >= slen)
return NULL; return NULL;
return vim_strnsave(str + nbyte, MB_CPTR2LEN(str + nbyte)); return vim_strnsave(str + nbyte, mb_ptr2len(str + nbyte));
} }
/* /*
* Get the byte index for character index "idx" in string "str" with length * Get the byte index for character index "idx" in string "str" with length
* "str_len". * "str_len". Composing characters are included.
* If going over the end return "str_len". * If going over the end return "str_len".
* If "idx" is negative count from the end, -1 is the last character. * If "idx" is negative count from the end, -1 is the last character.
* When going over the start return -1. * When going over the start return -1.
@@ -1036,7 +1037,7 @@ char_idx2byte(char_u *str, size_t str_len, varnumber_T idx)
{ {
while (nchar > 0 && nbyte < str_len) while (nchar > 0 && nbyte < str_len)
{ {
nbyte += MB_CPTR2LEN(str + nbyte); nbyte += mb_ptr2len(str + nbyte);
--nchar; --nchar;
} }
} }
@@ -1056,7 +1057,8 @@ char_idx2byte(char_u *str, size_t str_len, varnumber_T idx)
} }
/* /*
* Return the slice "str[first:last]" using character indexes. * Return the slice "str[first : last]" using character indexes. Composing
* characters are included.
* "exclusive" is TRUE for slice(). * "exclusive" is TRUE for slice().
* Return NULL when the result is empty. * Return NULL when the result is empty.
*/ */
@@ -1079,7 +1081,7 @@ string_slice(char_u *str, varnumber_T first, varnumber_T last, int exclusive)
end_byte = char_idx2byte(str, slen, last); end_byte = char_idx2byte(str, slen, last);
if (!exclusive && end_byte >= 0 && end_byte < (long)slen) if (!exclusive && end_byte >= 0 && end_byte < (long)slen)
// end index is inclusive // end index is inclusive
end_byte += MB_CPTR2LEN(str + end_byte); end_byte += mb_ptr2len(str + end_byte);
} }
if (start_byte >= (long)slen || end_byte <= start_byte) if (start_byte >= (long)slen || end_byte <= start_byte)
@@ -3249,8 +3251,9 @@ call_def_function(
res = string_slice(tv->vval.v_string, n1, n2, FALSE); res = string_slice(tv->vval.v_string, n1, n2, FALSE);
else else
// Index: The resulting variable is a string of a // Index: The resulting variable is a string of a
// single character. If the index is too big or // single character (including composing characters).
// negative the result is empty. // If the index is too big or negative the result is
// empty.
res = char_from_string(tv->vval.v_string, n2); res = char_from_string(tv->vval.v_string, n2);
vim_free(tv->vval.v_string); vim_free(tv->vval.v_string);
tv->vval.v_string = res; tv->vval.v_string = res;