mirror of
https://github.com/vim/vim.git
synced 2025-09-25 03:54:15 -04:00
patch 8.2.1536: cannot get the class of a character; emoji widths are wrong
Problem: Cannot get the class of a character; emoji widths are wrong in some environments. Solution: Add charclass(). Update some emoji widths. Add script to check emoji widths.
This commit is contained in:
1
Filelist
1
Filelist
@@ -197,6 +197,7 @@ SRC_ALL = \
|
|||||||
src/testdir/samples/*.txt \
|
src/testdir/samples/*.txt \
|
||||||
src/testdir/samples/test000 \
|
src/testdir/samples/test000 \
|
||||||
src/testdir/color_ramp.vim \
|
src/testdir/color_ramp.vim \
|
||||||
|
src/testdir/emoji_list.vim \
|
||||||
src/testdir/silent.wav \
|
src/testdir/silent.wav \
|
||||||
src/testdir/popupbounce.vim \
|
src/testdir/popupbounce.vim \
|
||||||
src/proto.h \
|
src/proto.h \
|
||||||
|
@@ -2425,6 +2425,7 @@ ch_status({handle} [, {options}])
|
|||||||
String status of channel {handle}
|
String status of channel {handle}
|
||||||
changenr() Number current change number
|
changenr() Number current change number
|
||||||
char2nr({expr} [, {utf8}]) Number ASCII/UTF8 value of first char in {expr}
|
char2nr({expr} [, {utf8}]) Number ASCII/UTF8 value of first char in {expr}
|
||||||
|
charclass({string}) Number character class of {string}
|
||||||
chdir({dir}) String change current working directory
|
chdir({dir}) String change current working directory
|
||||||
cindent({lnum}) Number C indent for line {lnum}
|
cindent({lnum}) Number C indent for line {lnum}
|
||||||
clearmatches([{win}]) none clear all matches
|
clearmatches([{win}]) none clear all matches
|
||||||
@@ -3520,6 +3521,18 @@ char2nr({expr} [, {utf8}]) *char2nr()*
|
|||||||
Can also be used as a |method|: >
|
Can also be used as a |method|: >
|
||||||
GetChar()->char2nr()
|
GetChar()->char2nr()
|
||||||
|
|
||||||
|
|
||||||
|
charclass({string}) *charclass()*
|
||||||
|
Return the character class of the first character in {string}.
|
||||||
|
The character class is one of:
|
||||||
|
0 blank
|
||||||
|
1 punctuation
|
||||||
|
2 word character
|
||||||
|
3 emoji
|
||||||
|
other specific Unicode class
|
||||||
|
The class is used in patterns and word motions.
|
||||||
|
|
||||||
|
|
||||||
chdir({dir}) *chdir()*
|
chdir({dir}) *chdir()*
|
||||||
Change the current working directory to {dir}. The scope of
|
Change the current working directory to {dir}. The scope of
|
||||||
the directory change depends on the directory of the current
|
the directory change depends on the directory of the current
|
||||||
|
@@ -600,6 +600,7 @@ String manipulation: *string-functions*
|
|||||||
strtrans() translate a string to make it printable
|
strtrans() translate a string to make it printable
|
||||||
tolower() turn a string to lowercase
|
tolower() turn a string to lowercase
|
||||||
toupper() turn a string to uppercase
|
toupper() turn a string to uppercase
|
||||||
|
charclass() class of a character
|
||||||
match() position where a pattern matches in a string
|
match() position where a pattern matches in a string
|
||||||
matchend() position where a pattern match ends in a string
|
matchend() position where a pattern match ends in a string
|
||||||
matchstr() match of a pattern in a string
|
matchstr() match of a pattern in a string
|
||||||
|
@@ -564,6 +564,7 @@ static funcentry_T global_functions[] =
|
|||||||
{"ch_status", 1, 2, FEARG_1, ret_string, JOB_FUNC(f_ch_status)},
|
{"ch_status", 1, 2, FEARG_1, ret_string, JOB_FUNC(f_ch_status)},
|
||||||
{"changenr", 0, 0, 0, ret_number, f_changenr},
|
{"changenr", 0, 0, 0, ret_number, f_changenr},
|
||||||
{"char2nr", 1, 2, FEARG_1, ret_number, f_char2nr},
|
{"char2nr", 1, 2, FEARG_1, ret_number, f_char2nr},
|
||||||
|
{"charclass", 1, 1, FEARG_1, ret_number, f_charclass},
|
||||||
{"chdir", 1, 1, FEARG_1, ret_string, f_chdir},
|
{"chdir", 1, 1, FEARG_1, ret_string, f_chdir},
|
||||||
{"cindent", 1, 1, FEARG_1, ret_number, f_cindent},
|
{"cindent", 1, 1, FEARG_1, ret_number, f_cindent},
|
||||||
{"clearmatches", 0, 1, FEARG_1, ret_void, f_clearmatches},
|
{"clearmatches", 0, 1, FEARG_1, ret_void, f_clearmatches},
|
||||||
|
51
src/mbyte.c
51
src/mbyte.c
@@ -132,7 +132,9 @@ static int dbcs_char2cells(int c);
|
|||||||
static int dbcs_ptr2cells_len(char_u *p, int size);
|
static int dbcs_ptr2cells_len(char_u *p, int size);
|
||||||
static int dbcs_ptr2char(char_u *p);
|
static int dbcs_ptr2char(char_u *p);
|
||||||
static int dbcs_head_off(char_u *base, char_u *p);
|
static int dbcs_head_off(char_u *base, char_u *p);
|
||||||
|
#ifdef FEAT_EVAL
|
||||||
static int cw_value(int c);
|
static int cw_value(int c);
|
||||||
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Lookup table to quickly get the length in bytes of a UTF-8 character from
|
* Lookup table to quickly get the length in bytes of a UTF-8 character from
|
||||||
@@ -1388,8 +1390,7 @@ utf_char2cells(int c)
|
|||||||
{0x26ce, 0x26ce},
|
{0x26ce, 0x26ce},
|
||||||
{0x26d4, 0x26d4},
|
{0x26d4, 0x26d4},
|
||||||
{0x26ea, 0x26ea},
|
{0x26ea, 0x26ea},
|
||||||
{0x26f2, 0x26f3},
|
{0x26f2, 0x26f5},
|
||||||
{0x26f5, 0x26f5},
|
|
||||||
{0x26fa, 0x26fa},
|
{0x26fa, 0x26fa},
|
||||||
{0x26fd, 0x26fd},
|
{0x26fd, 0x26fd},
|
||||||
{0x2705, 0x2705},
|
{0x2705, 0x2705},
|
||||||
@@ -1490,6 +1491,21 @@ utf_char2cells(int c)
|
|||||||
// based on http://unicode.org/emoji/charts/emoji-list.html
|
// based on http://unicode.org/emoji/charts/emoji-list.html
|
||||||
static struct interval emoji_wide[] =
|
static struct interval emoji_wide[] =
|
||||||
{
|
{
|
||||||
|
{0x23ed, 0x23ef},
|
||||||
|
{0x23f1, 0x23f2},
|
||||||
|
{0x23f8, 0x23fa},
|
||||||
|
{0x24c2, 0x24c2},
|
||||||
|
{0x261d, 0x261d},
|
||||||
|
{0x26c8, 0x26c8},
|
||||||
|
{0x26cf, 0x26cf},
|
||||||
|
{0x26d1, 0x26d1},
|
||||||
|
{0x26d3, 0x26d3},
|
||||||
|
{0x26e9, 0x26e9},
|
||||||
|
{0x26f0, 0x26f1},
|
||||||
|
{0x26f7, 0x26f9},
|
||||||
|
{0x270c, 0x270d},
|
||||||
|
{0x2934, 0x2935},
|
||||||
|
{0x1f170, 0x1f189},
|
||||||
{0x1f1e6, 0x1f1ff},
|
{0x1f1e6, 0x1f1ff},
|
||||||
{0x1f321, 0x1f321},
|
{0x1f321, 0x1f321},
|
||||||
{0x1f324, 0x1f32c},
|
{0x1f324, 0x1f32c},
|
||||||
@@ -1533,11 +1549,15 @@ utf_char2cells(int c)
|
|||||||
|
|
||||||
if (c >= 0x100)
|
if (c >= 0x100)
|
||||||
{
|
{
|
||||||
|
#if defined(FEAT_EVAL) || defined(USE_WCHAR_FUNCTIONS)
|
||||||
int n;
|
int n;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef FEAT_EVAL
|
||||||
n = cw_value(c);
|
n = cw_value(c);
|
||||||
if (n != 0)
|
if (n != 0)
|
||||||
return n;
|
return n;
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef USE_WCHAR_FUNCTIONS
|
#ifdef USE_WCHAR_FUNCTIONS
|
||||||
/*
|
/*
|
||||||
@@ -2667,8 +2687,7 @@ static struct interval emoji_all[] =
|
|||||||
{0x3299, 0x3299},
|
{0x3299, 0x3299},
|
||||||
{0x1f004, 0x1f004},
|
{0x1f004, 0x1f004},
|
||||||
{0x1f0cf, 0x1f0cf},
|
{0x1f0cf, 0x1f0cf},
|
||||||
{0x1f170, 0x1f171},
|
{0x1f170, 0x1f189},
|
||||||
{0x1f17e, 0x1f17f},
|
|
||||||
{0x1f18e, 0x1f18e},
|
{0x1f18e, 0x1f18e},
|
||||||
{0x1f191, 0x1f19a},
|
{0x1f191, 0x1f19a},
|
||||||
{0x1f1e6, 0x1f1ff},
|
{0x1f1e6, 0x1f1ff},
|
||||||
@@ -2835,6 +2854,10 @@ utf_class_buf(int c, buf_T *buf)
|
|||||||
return 1; // punctuation
|
return 1; // punctuation
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// emoji
|
||||||
|
if (intable(emoji_all, sizeof(emoji_all), c))
|
||||||
|
return 3;
|
||||||
|
|
||||||
// binary search in table
|
// binary search in table
|
||||||
while (top >= bot)
|
while (top >= bot)
|
||||||
{
|
{
|
||||||
@@ -2847,10 +2870,6 @@ utf_class_buf(int c, buf_T *buf)
|
|||||||
return (int)classes[mid].class;
|
return (int)classes[mid].class;
|
||||||
}
|
}
|
||||||
|
|
||||||
// emoji
|
|
||||||
if (intable(emoji_all, sizeof(emoji_all), c))
|
|
||||||
return 3;
|
|
||||||
|
|
||||||
// most other characters are "word" characters
|
// most other characters are "word" characters
|
||||||
return 2;
|
return 2;
|
||||||
}
|
}
|
||||||
@@ -5352,6 +5371,8 @@ string_convert_ext(
|
|||||||
return retval;
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if defined(FEAT_EVAL) || defined(PROTO)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Table set by setcellwidths().
|
* Table set by setcellwidths().
|
||||||
*/
|
*/
|
||||||
@@ -5525,3 +5546,17 @@ f_setcellwidths(typval_T *argvars, typval_T *rettv UNUSED)
|
|||||||
cw_table = table;
|
cw_table = table;
|
||||||
cw_table_size = l->lv_len;
|
cw_table_size = l->lv_len;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
f_charclass(typval_T *argvars, typval_T *rettv UNUSED)
|
||||||
|
{
|
||||||
|
if (argvars[0].v_type != VAR_STRING
|
||||||
|
|| argvars[0].vval.v_string == NULL
|
||||||
|
|| *argvars[0].vval.v_string == NUL)
|
||||||
|
{
|
||||||
|
emsg(_(e_stringreq));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
rettv->vval.v_number = mb_get_class(argvars[0].vval.v_string);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
@@ -85,4 +85,5 @@ int convert_input_safe(char_u *ptr, int len, int maxlen, char_u **restp, int *re
|
|||||||
char_u *string_convert(vimconv_T *vcp, char_u *ptr, int *lenp);
|
char_u *string_convert(vimconv_T *vcp, char_u *ptr, int *lenp);
|
||||||
char_u *string_convert_ext(vimconv_T *vcp, char_u *ptr, int *lenp, int *unconvlenp);
|
char_u *string_convert_ext(vimconv_T *vcp, char_u *ptr, int *lenp, int *unconvlenp);
|
||||||
void f_setcellwidths(typval_T *argvars, typval_T *rettv);
|
void f_setcellwidths(typval_T *argvars, typval_T *rettv);
|
||||||
|
void f_charclass(typval_T *argvars, typval_T *rettv);
|
||||||
/* vim: set ft=c : */
|
/* vim: set ft=c : */
|
||||||
|
22
src/testdir/emoji_list.vim
Normal file
22
src/testdir/emoji_list.vim
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
" Script to fill the window with emoji characters, one per line.
|
||||||
|
|
||||||
|
if &modified
|
||||||
|
new
|
||||||
|
else
|
||||||
|
enew
|
||||||
|
endif
|
||||||
|
|
||||||
|
" Use a compiled Vim9 function for speed
|
||||||
|
def DoIt()
|
||||||
|
let lnum = 1
|
||||||
|
for c in range(0x100, 0x1ffff)
|
||||||
|
let cs = nr2char(c)
|
||||||
|
if charclass(cs) == 3
|
||||||
|
setline(lnum, '|' .. cs .. '| ' .. strwidth(cs))
|
||||||
|
lnum += 1
|
||||||
|
endif
|
||||||
|
endfor
|
||||||
|
enddef
|
||||||
|
|
||||||
|
call DoIt()
|
||||||
|
set nomodified
|
@@ -2077,6 +2077,13 @@ func Test_char2nr()
|
|||||||
set encoding=utf-8
|
set encoding=utf-8
|
||||||
endfunc
|
endfunc
|
||||||
|
|
||||||
|
func Test_charclass()
|
||||||
|
call assert_equal(0, charclass(' '))
|
||||||
|
call assert_equal(1, charclass('.'))
|
||||||
|
call assert_equal(2, charclass('x'))
|
||||||
|
call assert_equal(3, charclass("\u203c"))
|
||||||
|
endfunc
|
||||||
|
|
||||||
func Test_eventhandler()
|
func Test_eventhandler()
|
||||||
call assert_equal(0, eventhandler())
|
call assert_equal(0, eventhandler())
|
||||||
endfunc
|
endfunc
|
||||||
|
@@ -754,6 +754,8 @@ static char *(features[]) =
|
|||||||
|
|
||||||
static int included_patches[] =
|
static int included_patches[] =
|
||||||
{ /* Add new patch number below this line */
|
{ /* Add new patch number below this line */
|
||||||
|
/**/
|
||||||
|
1536,
|
||||||
/**/
|
/**/
|
||||||
1535,
|
1535,
|
||||||
/**/
|
/**/
|
||||||
|
Reference in New Issue
Block a user