0
0
mirror of https://github.com/vim/vim.git synced 2025-09-28 04:24:06 -04:00

patch 9.0.1485: no functions for converting from/to UTF-16 index

Problem:    no functions for converting from/to UTF-16 index.
Solution:   Add UTF-16 flag to existing funtions and add strutf16len() and
            utf16idx(). (Yegappan Lakshmanan, closes #12216)
This commit is contained in:
Christian Brabandt
2023-04-24 21:09:54 +01:00
committed by Bram Moolenaar
parent e1b4822137
commit 67672ef097
8 changed files with 677 additions and 56 deletions

View File

@@ -1192,19 +1192,14 @@ func Test_byte2line_line2byte()
bw!
endfunc
" Test for byteidx() and byteidxcomp() functions
" Test for byteidx() using a character index
func Test_byteidx()
let a = '.é.' " one char of two bytes
call assert_equal(0, byteidx(a, 0))
call assert_equal(0, byteidxcomp(a, 0))
call assert_equal(1, byteidx(a, 1))
call assert_equal(1, byteidxcomp(a, 1))
call assert_equal(3, byteidx(a, 2))
call assert_equal(3, byteidxcomp(a, 2))
call assert_equal(4, byteidx(a, 3))
call assert_equal(4, byteidxcomp(a, 3))
call assert_equal(-1, byteidx(a, 4))
call assert_equal(-1, byteidxcomp(a, 4))
let b = '.é.' " normal e with composing char
call assert_equal(0, b->byteidx(0))
@@ -1212,18 +1207,184 @@ func Test_byteidx()
call assert_equal(4, b->byteidx(2))
call assert_equal(5, b->byteidx(3))
call assert_equal(-1, b->byteidx(4))
call assert_fails("call byteidx([], 0)", 'E730:')
" string with multiple composing characters
let str = '-ą́-ą́'
call assert_equal(0, byteidx(str, 0))
call assert_equal(1, byteidx(str, 1))
call assert_equal(6, byteidx(str, 2))
call assert_equal(7, byteidx(str, 3))
call assert_equal(12, byteidx(str, 4))
call assert_equal(-1, byteidx(str, 5))
" empty string
call assert_equal(0, byteidx('', 0))
call assert_equal(-1, byteidx('', 1))
" error cases
call assert_fails("call byteidx([], 0)", 'E730:')
call assert_fails("call byteidx('abc', [])", 'E745:')
endfunc
" Test for byteidxcomp() using a character index
func Test_byteidxcomp()
let a = '.é.' " one char of two bytes
call assert_equal(0, byteidxcomp(a, 0))
call assert_equal(1, byteidxcomp(a, 1))
call assert_equal(3, byteidxcomp(a, 2))
call assert_equal(4, byteidxcomp(a, 3))
call assert_equal(-1, byteidxcomp(a, 4))
let b = '.é.' " normal e with composing char
call assert_equal(0, b->byteidxcomp(0))
call assert_equal(1, b->byteidxcomp(1))
call assert_equal(2, b->byteidxcomp(2))
call assert_equal(4, b->byteidxcomp(3))
call assert_equal(5, b->byteidxcomp(4))
call assert_equal(-1, b->byteidxcomp(5))
" string with multiple composing characters
let str = '-ą́-ą́'
call assert_equal(0, byteidxcomp(str, 0))
call assert_equal(1, byteidxcomp(str, 1))
call assert_equal(2, byteidxcomp(str, 2))
call assert_equal(4, byteidxcomp(str, 3))
call assert_equal(6, byteidxcomp(str, 4))
call assert_equal(7, byteidxcomp(str, 5))
call assert_equal(8, byteidxcomp(str, 6))
call assert_equal(10, byteidxcomp(str, 7))
call assert_equal(12, byteidxcomp(str, 8))
call assert_equal(-1, byteidxcomp(str, 9))
" empty string
call assert_equal(0, byteidxcomp('', 0))
call assert_equal(-1, byteidxcomp('', 1))
" error cases
call assert_fails("call byteidxcomp([], 0)", 'E730:')
call assert_fails("call byteidxcomp('abc', [])", 'E745:')
endfunc
" Test for charidx()
" Test for byteidx() using a UTF-16 index
func Test_byteidx_from_utf16_index()
" string with single byte characters
let str = "abc"
for i in range(3)
call assert_equal(i, byteidx(str, i, v:true))
endfor
call assert_equal(3, byteidx(str, 3, v:true))
call assert_equal(-1, byteidx(str, 4, v:true))
" string with two byte characters
let str = "a©©b"
call assert_equal(0, byteidx(str, 0, v:true))
call assert_equal(1, byteidx(str, 1, v:true))
call assert_equal(3, byteidx(str, 2, v:true))
call assert_equal(5, byteidx(str, 3, v:true))
call assert_equal(6, byteidx(str, 4, v:true))
call assert_equal(-1, byteidx(str, 5, v:true))
" string with two byte characters
let str = "a😊😊b"
call assert_equal(0, byteidx(str, 0, v:true))
call assert_equal(1, byteidx(str, 1, v:true))
call assert_equal(1, byteidx(str, 2, v:true))
call assert_equal(5, byteidx(str, 3, v:true))
call assert_equal(5, byteidx(str, 4, v:true))
call assert_equal(9, byteidx(str, 5, v:true))
call assert_equal(10, byteidx(str, 6, v:true))
call assert_equal(-1, byteidx(str, 7, v:true))
" string with composing characters
let str = '-á-b́'
call assert_equal(0, byteidx(str, 0, v:true))
call assert_equal(1, byteidx(str, 1, v:true))
call assert_equal(4, byteidx(str, 2, v:true))
call assert_equal(5, byteidx(str, 3, v:true))
call assert_equal(8, byteidx(str, 4, v:true))
call assert_equal(-1, byteidx(str, 5, v:true))
" string with multiple composing characters
let str = '-ą́-ą́'
call assert_equal(0, byteidx(str, 0, v:true))
call assert_equal(1, byteidx(str, 1, v:true))
call assert_equal(6, byteidx(str, 2, v:true))
call assert_equal(7, byteidx(str, 3, v:true))
call assert_equal(12, byteidx(str, 4, v:true))
call assert_equal(-1, byteidx(str, 5, v:true))
" empty string
call assert_equal(0, byteidx('', 0, v:true))
call assert_equal(-1, byteidx('', 1, v:true))
" error cases
call assert_fails('call byteidx(str, 0, [])', 'E745:')
endfunc
" Test for byteidxcomp() using a UTF-16 index
func Test_byteidxcomp_from_utf16_index()
" string with single byte characters
let str = "abc"
for i in range(3)
call assert_equal(i, byteidxcomp(str, i, v:true))
endfor
call assert_equal(3, byteidxcomp(str, 3, v:true))
call assert_equal(-1, byteidxcomp(str, 4, v:true))
" string with two byte characters
let str = "a©©b"
call assert_equal(0, byteidxcomp(str, 0, v:true))
call assert_equal(1, byteidxcomp(str, 1, v:true))
call assert_equal(3, byteidxcomp(str, 2, v:true))
call assert_equal(5, byteidxcomp(str, 3, v:true))
call assert_equal(6, byteidxcomp(str, 4, v:true))
call assert_equal(-1, byteidxcomp(str, 5, v:true))
" string with two byte characters
let str = "a😊😊b"
call assert_equal(0, byteidxcomp(str, 0, v:true))
call assert_equal(1, byteidxcomp(str, 1, v:true))
call assert_equal(1, byteidxcomp(str, 2, v:true))
call assert_equal(5, byteidxcomp(str, 3, v:true))
call assert_equal(5, byteidxcomp(str, 4, v:true))
call assert_equal(9, byteidxcomp(str, 5, v:true))
call assert_equal(10, byteidxcomp(str, 6, v:true))
call assert_equal(-1, byteidxcomp(str, 7, v:true))
" string with composing characters
let str = '-á-b́'
call assert_equal(0, byteidxcomp(str, 0, v:true))
call assert_equal(1, byteidxcomp(str, 1, v:true))
call assert_equal(2, byteidxcomp(str, 2, v:true))
call assert_equal(4, byteidxcomp(str, 3, v:true))
call assert_equal(5, byteidxcomp(str, 4, v:true))
call assert_equal(6, byteidxcomp(str, 5, v:true))
call assert_equal(8, byteidxcomp(str, 6, v:true))
call assert_equal(-1, byteidxcomp(str, 7, v:true))
call assert_fails('call byteidxcomp(str, 0, [])', 'E745:')
" string with multiple composing characters
let str = '-ą́-ą́'
call assert_equal(0, byteidxcomp(str, 0, v:true))
call assert_equal(1, byteidxcomp(str, 1, v:true))
call assert_equal(2, byteidxcomp(str, 2, v:true))
call assert_equal(4, byteidxcomp(str, 3, v:true))
call assert_equal(6, byteidxcomp(str, 4, v:true))
call assert_equal(7, byteidxcomp(str, 5, v:true))
call assert_equal(8, byteidxcomp(str, 6, v:true))
call assert_equal(10, byteidxcomp(str, 7, v:true))
call assert_equal(12, byteidxcomp(str, 8, v:true))
call assert_equal(-1, byteidxcomp(str, 9, v:true))
" empty string
call assert_equal(0, byteidxcomp('', 0, v:true))
call assert_equal(-1, byteidxcomp('', 1, v:true))
" error cases
call assert_fails('call byteidxcomp(str, 0, [])', 'E745:')
endfunc
" Test for charidx() using a byte index
func Test_charidx()
let a = 'xáb́y'
call assert_equal(0, charidx(a, 0))
@@ -1232,17 +1393,20 @@ func Test_charidx()
call assert_equal(3, charidx(a, 7))
call assert_equal(-1, charidx(a, 8))
call assert_equal(-1, charidx(a, -1))
call assert_equal(-1, charidx('', 0))
call assert_equal(-1, charidx(test_null_string(), 0))
" count composing characters
call assert_equal(0, charidx(a, 0, 1))
call assert_equal(2, charidx(a, 2, 1))
call assert_equal(3, charidx(a, 4, 1))
call assert_equal(5, charidx(a, 7, 1))
call assert_equal(-1, charidx(a, 8, 1))
call assert_equal(0, a->charidx(0, 1))
call assert_equal(2, a->charidx(2, 1))
call assert_equal(3, a->charidx(4, 1))
call assert_equal(5, a->charidx(7, 1))
call assert_equal(-1, a->charidx(8, 1))
" empty string
call assert_equal(-1, charidx('', 0))
call assert_equal(-1, charidx('', 0, 1))
" error cases
call assert_equal(-1, charidx(test_null_string(), 0))
call assert_fails('let x = charidx([], 1)', 'E1174:')
call assert_fails('let x = charidx("abc", [])', 'E1210:')
call assert_fails('let x = charidx("abc", 1, [])', 'E1212:')
@@ -1250,6 +1414,237 @@ func Test_charidx()
call assert_fails('let x = charidx("abc", 1, 2)', 'E1212:')
endfunc
" Test for charidx() using a UTF-16 index
func Test_charidx_from_utf16_index()
" string with single byte characters
let str = "abc"
for i in range(3)
call assert_equal(i, charidx(str, i, v:false, v:true))
endfor
call assert_equal(-1, charidx(str, 3, v:false, v:true))
" string with two byte characters
let str = "a©©b"
call assert_equal(0, charidx(str, 0, v:false, v:true))
call assert_equal(1, charidx(str, 1, v:false, v:true))
call assert_equal(2, charidx(str, 2, v:false, v:true))
call assert_equal(3, charidx(str, 3, v:false, v:true))
call assert_equal(-1, charidx(str, 4, v:false, v:true))
" string with four byte characters
let str = "a😊😊b"
call assert_equal(0, charidx(str, 0, v:false, v:true))
call assert_equal(1, charidx(str, 1, v:false, v:true))
call assert_equal(1, charidx(str, 2, v:false, v:true))
call assert_equal(2, charidx(str, 3, v:false, v:true))
call assert_equal(2, charidx(str, 4, v:false, v:true))
call assert_equal(3, charidx(str, 5, v:false, v:true))
call assert_equal(-1, charidx(str, 6, v:false, v:true))
" string with composing characters
let str = '-á-b́'
for i in str->strcharlen()->range()
call assert_equal(i, charidx(str, i, v:false, v:true))
endfor
call assert_equal(-1, charidx(str, 4, v:false, v:true))
for i in str->strchars()->range()
call assert_equal(i, charidx(str, i, v:true, v:true))
endfor
call assert_equal(-1, charidx(str, 6, v:true, v:true))
" string with multiple composing characters
let str = '-ą́-ą́'
for i in str->strcharlen()->range()
call assert_equal(i, charidx(str, i, v:false, v:true))
endfor
call assert_equal(-1, charidx(str, 4, v:false, v:true))
for i in str->strchars()->range()
call assert_equal(i, charidx(str, i, v:true, v:true))
endfor
call assert_equal(-1, charidx(str, 8, v:true, v:true))
" empty string
call assert_equal(-1, charidx('', 0, v:false, v:true))
call assert_equal(-1, charidx('', 0, v:true, v:true))
" error cases
call assert_equal(-1, charidx('', 0, v:false, v:true))
call assert_equal(-1, charidx('', 0, v:true, v:true))
call assert_equal(-1, charidx(test_null_string(), 0, v:false, v:true))
call assert_fails('let x = charidx("abc", 1, v:false, [])', 'E1212:')
call assert_fails('let x = charidx("abc", 1, v:true, [])', 'E1212:')
endfunc
" Test for utf16idx() using a byte index
func Test_utf16idx_from_byteidx()
" UTF-16 index of a string with single byte characters
let str = "abc"
for i in range(3)
call assert_equal(i, utf16idx(str, i))
endfor
call assert_equal(-1, utf16idx(str, 3))
" UTF-16 index of a string with two byte characters
let str = 'a©©b'
call assert_equal(0, str->utf16idx(0))
call assert_equal(1, str->utf16idx(1))
call assert_equal(1, str->utf16idx(2))
call assert_equal(2, str->utf16idx(3))
call assert_equal(2, str->utf16idx(4))
call assert_equal(3, str->utf16idx(5))
call assert_equal(-1, str->utf16idx(6))
" UTF-16 index of a string with four byte characters
let str = 'a😊😊b'
call assert_equal(0, utf16idx(str, 0))
call assert_equal(2, utf16idx(str, 1))
call assert_equal(2, utf16idx(str, 2))
call assert_equal(2, utf16idx(str, 3))
call assert_equal(2, utf16idx(str, 4))
call assert_equal(4, utf16idx(str, 5))
call assert_equal(4, utf16idx(str, 6))
call assert_equal(4, utf16idx(str, 7))
call assert_equal(4, utf16idx(str, 8))
call assert_equal(5, utf16idx(str, 9))
call assert_equal(-1, utf16idx(str, 10))
" UTF-16 index of a string with composing characters
let str = '-á-b́'
call assert_equal(0, utf16idx(str, 0))
call assert_equal(1, utf16idx(str, 1))
call assert_equal(1, utf16idx(str, 2))
call assert_equal(1, utf16idx(str, 3))
call assert_equal(2, utf16idx(str, 4))
call assert_equal(3, utf16idx(str, 5))
call assert_equal(3, utf16idx(str, 6))
call assert_equal(3, utf16idx(str, 7))
call assert_equal(-1, utf16idx(str, 8))
call assert_equal(0, utf16idx(str, 0, v:true))
call assert_equal(1, utf16idx(str, 1, v:true))
call assert_equal(2, utf16idx(str, 2, v:true))
call assert_equal(2, utf16idx(str, 3, v:true))
call assert_equal(3, utf16idx(str, 4, v:true))
call assert_equal(4, utf16idx(str, 5, v:true))
call assert_equal(5, utf16idx(str, 6, v:true))
call assert_equal(5, utf16idx(str, 7, v:true))
call assert_equal(-1, utf16idx(str, 8, v:true))
" string with multiple composing characters
let str = '-ą́-ą́'
call assert_equal(0, utf16idx(str, 0))
call assert_equal(1, utf16idx(str, 1))
call assert_equal(1, utf16idx(str, 2))
call assert_equal(1, utf16idx(str, 3))
call assert_equal(1, utf16idx(str, 4))
call assert_equal(1, utf16idx(str, 5))
call assert_equal(2, utf16idx(str, 6))
call assert_equal(3, utf16idx(str, 7))
call assert_equal(3, utf16idx(str, 8))
call assert_equal(3, utf16idx(str, 9))
call assert_equal(3, utf16idx(str, 10))
call assert_equal(3, utf16idx(str, 11))
call assert_equal(-1, utf16idx(str, 12))
call assert_equal(0, utf16idx(str, 0, v:true))
call assert_equal(1, utf16idx(str, 1, v:true))
call assert_equal(2, utf16idx(str, 2, v:true))
call assert_equal(2, utf16idx(str, 3, v:true))
call assert_equal(3, utf16idx(str, 4, v:true))
call assert_equal(3, utf16idx(str, 5, v:true))
call assert_equal(4, utf16idx(str, 6, v:true))
call assert_equal(5, utf16idx(str, 7, v:true))
call assert_equal(6, utf16idx(str, 8, v:true))
call assert_equal(6, utf16idx(str, 9, v:true))
call assert_equal(7, utf16idx(str, 10, v:true))
call assert_equal(7, utf16idx(str, 11, v:true))
call assert_equal(-1, utf16idx(str, 12, v:true))
" empty string
call assert_equal(-1, utf16idx('', 0))
call assert_equal(-1, utf16idx('', 0, v:true))
" error cases
call assert_equal(-1, utf16idx("", 0))
call assert_equal(-1, utf16idx("abc", -1))
call assert_equal(-1, utf16idx(test_null_string(), 0))
call assert_fails('let l = utf16idx([], 0)', 'E1174:')
call assert_fails('let l = utf16idx("ab", [])', 'E1210:')
call assert_fails('let l = utf16idx("ab", 0, [])', 'E1212:')
endfunc
" Test for utf16idx() using a character index
func Test_utf16idx_from_charidx()
let str = "abc"
for i in str->strcharlen()->range()
call assert_equal(i, utf16idx(str, i, v:false, v:true))
endfor
call assert_equal(-1, utf16idx(str, 3, v:false, v:true))
" UTF-16 index of a string with two byte characters
let str = "a©©b"
for i in str->strcharlen()->range()
call assert_equal(i, utf16idx(str, i, v:false, v:true))
endfor
call assert_equal(-1, utf16idx(str, 4, v:false, v:true))
" UTF-16 index of a string with four byte characters
let str = "a😊😊b"
call assert_equal(0, utf16idx(str, 0, v:false, v:true))
call assert_equal(2, utf16idx(str, 1, v:false, v:true))
call assert_equal(4, utf16idx(str, 2, v:false, v:true))
call assert_equal(5, utf16idx(str, 3, v:false, v:true))
call assert_equal(-1, utf16idx(str, 4, v:false, v:true))
" UTF-16 index of a string with composing characters
let str = '-á-b́'
for i in str->strcharlen()->range()
call assert_equal(i, utf16idx(str, i, v:false, v:true))
endfor
call assert_equal(-1, utf16idx(str, 4, v:false, v:true))
for i in str->strchars()->range()
call assert_equal(i, utf16idx(str, i, v:true, v:true))
endfor
call assert_equal(-1, utf16idx(str, 6, v:true, v:true))
" string with multiple composing characters
let str = '-ą́-ą́'
for i in str->strcharlen()->range()
call assert_equal(i, utf16idx(str, i, v:false, v:true))
endfor
call assert_equal(-1, utf16idx(str, 4, v:false, v:true))
for i in str->strchars()->range()
call assert_equal(i, utf16idx(str, i, v:true, v:true))
endfor
call assert_equal(-1, utf16idx(str, 8, v:true, v:true))
" empty string
call assert_equal(-1, utf16idx('', 0, v:false, v:true))
call assert_equal(-1, utf16idx('', 0, v:true, v:true))
" error cases
call assert_equal(-1, utf16idx(test_null_string(), 0, v:true, v:true))
call assert_fails('let l = utf16idx("ab", 0, v:false, [])', 'E1212:')
endfunc
" Test for strutf16len()
func Test_strutf16len()
call assert_equal(3, strutf16len('abc'))
call assert_equal(3, 'abc'->strutf16len(v:true))
call assert_equal(4, strutf16len('a©©b'))
call assert_equal(4, strutf16len('a©©b', v:true))
call assert_equal(6, strutf16len('a😊😊b'))
call assert_equal(6, strutf16len('a😊😊b', v:true))
call assert_equal(4, strutf16len('-á-b́'))
call assert_equal(6, strutf16len('-á-b́', v:true))
call assert_equal(4, strutf16len('-ą́-ą́'))
call assert_equal(8, strutf16len('-ą́-ą́', v:true))
call assert_equal(0, strutf16len(''))
" error cases
call assert_fails('let l = strutf16len([])', 'E1174:')
call assert_fails('let l = strutf16len("a", [])', 'E1212:')
call assert_equal(0, strutf16len(test_null_string()))
endfunc
func Test_count()
let l = ['a', 'a', 'A', 'b']
call assert_equal(2, count(l, 'a'))
@@ -3074,5 +3469,4 @@ func Test_delfunc_while_listing()
call StopVimInTerminal(buf)
endfunc
" vim: shiftwidth=2 sts=2 expandtab