1
0
forked from aniani/vim

patch 8.1.2027: MS-Windows: problem with ambiwidth characters

Problem:    MS-Windows: problem with ambiwidth characters.
Solution:   handle ambiguous width characters in ConPTY on Windows 10 (1903).
            (Nobuhiro Takasaki, closes #4411)
This commit is contained in:
Bram Moolenaar
2019-09-13 22:30:11 +02:00
parent dbec74907e
commit 57da698168
12 changed files with 156 additions and 10 deletions

View File

@@ -1192,7 +1192,8 @@ $(OUTDIR)/pathdef.o: $(PATHDEF_SRC) $(INCL)
CCCTERM = $(CC) -c $(CFLAGS) -Ilibvterm/include -DINLINE="" \ CCCTERM = $(CC) -c $(CFLAGS) -Ilibvterm/include -DINLINE="" \
-DVSNPRINTF=vim_vsnprintf \ -DVSNPRINTF=vim_vsnprintf \
-DIS_COMBINING_FUNCTION=utf_iscomposing_uint \ -DIS_COMBINING_FUNCTION=utf_iscomposing_uint \
-DWCWIDTH_FUNCTION=utf_uint2cells -DWCWIDTH_FUNCTION=utf_uint2cells \
-DGET_SPECIAL_PTY_TYPE_FUNCTION=get_special_pty_type
$(OUTDIR)/%.o : libvterm/src/%.c $(TERM_DEPS) $(OUTDIR)/%.o : libvterm/src/%.c $(TERM_DEPS)
$(CCCTERM) $< -o $@ $(CCCTERM) $< -o $@

View File

@@ -1716,6 +1716,7 @@ CCCTERM = $(CC) $(CFLAGS) -Ilibvterm/include -DINLINE="" \
-DVSNPRINTF=vim_vsnprintf \ -DVSNPRINTF=vim_vsnprintf \
-DIS_COMBINING_FUNCTION=utf_iscomposing_uint \ -DIS_COMBINING_FUNCTION=utf_iscomposing_uint \
-DWCWIDTH_FUNCTION=utf_uint2cells \ -DWCWIDTH_FUNCTION=utf_uint2cells \
-DGET_SPECIAL_PTY_TYPE_FUNCTION=get_special_pty_type \
-D_CRT_SECURE_NO_WARNINGS -D_CRT_SECURE_NO_WARNINGS
# Create a default rule for libvterm. # Create a default rule for libvterm.

View File

@@ -127,6 +127,9 @@ size_t vterm_input_write(VTerm *vt, const char *bytes, size_t len)
size_t pos = 0; size_t pos = 0;
const char *string_start = NULL; // init to avoid gcc warning const char *string_start = NULL; // init to avoid gcc warning
vt->in_backspace = 0; // Count down with BS key and activate when
// it reaches 1
switch(vt->parser.state) { switch(vt->parser.state) {
case NORMAL: case NORMAL:
case CSI_LEADER: case CSI_LEADER:
@@ -172,6 +175,13 @@ size_t vterm_input_write(VTerm *vt, const char *bytes, size_t len)
// fallthrough // fallthrough
} }
else if(c < 0x20) { // other C0 else if(c < 0x20) { // other C0
if(vterm_get_special_pty_type() == 2) {
if(c == 0x08) // BS
// Set the trick for BS output after a sequence, to delay backspace
// activation
if(pos + 2 < len && bytes[pos + 1] == 0x20 && bytes[pos + 2] == 0x08)
vt->in_backspace = 2; // Trigger when count down to 1
}
if(vt->parser.state >= STRING) if(vt->parser.state >= STRING)
more_string(vt, string_start, bytes + pos - string_start); more_string(vt, string_start, bytes + pos - string_start);
do_control(vt, c); do_control(vt, c);

View File

@@ -336,6 +336,11 @@ static int on_text(const char bytes[], size_t len, void *user)
for( ; i < glyph_ends; i++) { for( ; i < glyph_ends; i++) {
int this_width; int this_width;
if(vterm_get_special_pty_type() == 2) {
state->vt->in_backspace -= (state->vt->in_backspace > 0) ? 1 : 0;
if(state->vt->in_backspace == 1)
codepoints[i] = 0; // codepoints under this condition must be 0
}
chars[i - glyph_starts] = codepoints[i]; chars[i - glyph_starts] = codepoints[i];
this_width = vterm_unicode_width(codepoints[i]); this_width = vterm_unicode_width(codepoints[i]);
#ifdef DEBUG #ifdef DEBUG
@@ -425,6 +430,12 @@ static int on_control(unsigned char control, void *user)
VTermPos oldpos = state->pos; VTermPos oldpos = state->pos;
VTermScreenCell cell;
// Preparing to see the leading byte
VTermPos leadpos = state->pos;
leadpos.col -= (leadpos.col >= 2 ? 2 : 0);
switch(control) { switch(control) {
case 0x07: // BEL - ECMA-48 8.3.3 case 0x07: // BEL - ECMA-48 8.3.3
if(state->callbacks && state->callbacks->bell) if(state->callbacks && state->callbacks->bell)
@@ -434,6 +445,12 @@ static int on_control(unsigned char control, void *user)
case 0x08: // BS - ECMA-48 8.3.5 case 0x08: // BS - ECMA-48 8.3.5
if(state->pos.col > 0) if(state->pos.col > 0)
state->pos.col--; state->pos.col--;
if(vterm_get_special_pty_type() == 2) {
// In 2 cell letters, go back 2 cells
vterm_screen_get_cell(state->vt->screen, leadpos, &cell);
if(vterm_unicode_width(cell.chars[0]) == 2)
state->pos.col--;
}
break; break;
case 0x09: // HT - ECMA-48 8.3.60 case 0x09: // HT - ECMA-48 8.3.60
@@ -1019,6 +1036,26 @@ static int on_csi(const char *leader, const long args[], int argcount, const cha
row = CSI_ARG_OR(args[0], 1); row = CSI_ARG_OR(args[0], 1);
col = argcount < 2 || CSI_ARG_IS_MISSING(args[1]) ? 1 : CSI_ARG(args[1]); col = argcount < 2 || CSI_ARG_IS_MISSING(args[1]) ? 1 : CSI_ARG(args[1]);
// zero-based // zero-based
if(vterm_get_special_pty_type() == 2) {
// Fix a sequence that is not correct right now
if(state->pos.row == row - 1) {
int cnt, ptr = 0;
for(cnt = 0; cnt < col - 1; ++cnt) {
VTermPos p;
VTermScreenCell c0, c1;
p.row = row - 1;
p.col = ptr;
vterm_screen_get_cell(state->vt->screen, p, &c0);
p.col++;
vterm_screen_get_cell(state->vt->screen, p, &c1);
ptr += (c1.chars[0] == (uint32_t)-1) // double cell?
? (vterm_unicode_is_ambiguous(c0.chars[0])) // is ambiguous?
? vterm_unicode_width(0x00a1) : 1 // &ambiwidth
: 1; // not ambiguous
}
col = ptr + 1;
}
}
state->pos.row = row-1; state->pos.row = row-1;
state->pos.col = col-1; state->pos.col = col-1;
if(state->mode.origin) { if(state->mode.origin) {

View File

@@ -770,11 +770,28 @@ int vterm_screen_get_cell(const VTermScreen *screen, VTermPos pos, VTermScreenCe
cell->fg = intcell->pen.fg; cell->fg = intcell->pen.fg;
cell->bg = intcell->pen.bg; cell->bg = intcell->pen.bg;
if(pos.col < (screen->cols - 1) && if(vterm_get_special_pty_type() == 2) {
getcell(screen, pos.row, pos.col + 1)->chars[0] == (uint32_t)-1) /* Get correct cell width from cell information contained in line buffer */
cell->width = 2; if(pos.col < (screen->cols - 1) &&
else getcell(screen, pos.row, pos.col + 1)->chars[0] == (uint32_t)-1) {
cell->width = 1; if(getcell(screen, pos.row, pos.col)->chars[0] == 0x20) {
getcell(screen, pos.row, pos.col)->chars[0] = 0;
cell->width = 2;
} else if(getcell(screen, pos.row, pos.col)->chars[0] == 0) {
getcell(screen, pos.row, pos.col + 1)->chars[0] = 0;
cell->width = 1;
} else {
cell->width = 2;
}
} else
cell->width = 1;
} else {
if(pos.col < (screen->cols - 1) &&
getcell(screen, pos.row, pos.col + 1)->chars[0] == (uint32_t)-1)
cell->width = 2;
else
cell->width = 1;
}
return 1; return 1;
} }

View File

@@ -68,12 +68,13 @@
* Latest version: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c * Latest version: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
*/ */
#if !defined(IS_COMBINING_FUNCTION) || !defined(WCWIDTH_FUNCTION)
struct interval { struct interval {
int first; int first;
int last; int last;
}; };
#if !defined(WCWIDTH_FUNCTION) || !defined(IS_COMBINING_FUNCTION)
// sorted list of non-overlapping intervals of non-spacing characters // sorted list of non-overlapping intervals of non-spacing characters
// generated by "uniset +cat=Me +cat=Mn +cat=Cf -00AD +1160-11FF +200B c" // generated by "uniset +cat=Me +cat=Mn +cat=Cf -00AD +1160-11FF +200B c"
// Replaced by the combining table from Vim. // Replaced by the combining table from Vim.
@@ -359,6 +360,7 @@ static const struct interval combining[] = {
{0X1E944, 0X1E94A}, {0X1E944, 0X1E94A},
{0XE0100, 0XE01EF} {0XE0100, 0XE01EF}
}; };
#endif
// auxiliary function for binary search in interval table // auxiliary function for binary search in interval table
static int bisearch(uint32_t ucs, const struct interval *table, int max) { static int bisearch(uint32_t ucs, const struct interval *table, int max) {
@@ -379,8 +381,6 @@ static int bisearch(uint32_t ucs, const struct interval *table, int max) {
return 0; return 0;
} }
#endif
/* The following two functions define the column width of an ISO 10646 /* The following two functions define the column width of an ISO 10646
* character as follows: * character as follows:
@@ -478,6 +478,7 @@ static int mk_wcswidth(const uint32_t *pwcs, size_t n)
*/ */
static int mk_wcwidth_cjk(uint32_t ucs) static int mk_wcwidth_cjk(uint32_t ucs)
{ {
#endif
/* sorted list of non-overlapping intervals of East Asian Ambiguous /* sorted list of non-overlapping intervals of East Asian Ambiguous
* characters, generated by "uniset +WIDTH-A -cat=Me -cat=Mn -cat=Cf c" */ * characters, generated by "uniset +WIDTH-A -cat=Me -cat=Mn -cat=Cf c" */
static const struct interval ambiguous[] = { static const struct interval ambiguous[] = {
@@ -534,6 +535,7 @@ static int mk_wcwidth_cjk(uint32_t ucs)
{ 0x273D, 0x273D }, { 0x2776, 0x277F }, { 0xE000, 0xF8FF }, { 0x273D, 0x273D }, { 0x2776, 0x277F }, { 0xE000, 0xF8FF },
{ 0xFFFD, 0xFFFD }, { 0xF0000, 0xFFFFD }, { 0x100000, 0x10FFFD } { 0xFFFD, 0xFFFD }, { 0xF0000, 0xFFFFD }, { 0x100000, 0x10FFFD }
}; };
#if 0
// binary search in table of non-spacing characters // binary search in table of non-spacing characters
if (bisearch(ucs, ambiguous, if (bisearch(ucs, ambiguous,
@@ -557,6 +559,12 @@ static int mk_wcswidth_cjk(const uint32_t *pwcs, size_t n)
} }
#endif #endif
INTERNAL int vterm_unicode_is_ambiguous(uint32_t codepoint)
{
return (bisearch(codepoint, ambiguous,
sizeof(ambiguous) / sizeof(struct interval) - 1)) ? 1 : 0;
}
#ifdef IS_COMBINING_FUNCTION #ifdef IS_COMBINING_FUNCTION
// Use a provided is_combining() function. // Use a provided is_combining() function.
int IS_COMBINING_FUNCTION(uint32_t codepoint); int IS_COMBINING_FUNCTION(uint32_t codepoint);
@@ -569,6 +577,17 @@ vterm_is_combining(uint32_t codepoint)
} }
#endif #endif
#ifdef GET_SPECIAL_PTY_TYPE_FUNCTION
int GET_SPECIAL_PTY_TYPE_FUNCTION(void);
#else
# define GET_SPECIAL_PTY_TYPE_FUNCTION vterm_get_special_pty_type_placeholder
static int
vterm_get_special_pty_type_placeholder(void)
{
return 0;
}
#endif
// ################################ // ################################
// ### The rest added by Paul Evans // ### The rest added by Paul Evans
@@ -581,3 +600,8 @@ INTERNAL int vterm_unicode_is_combining(uint32_t codepoint)
{ {
return IS_COMBINING_FUNCTION(codepoint); return IS_COMBINING_FUNCTION(codepoint);
} }
INTERNAL int vterm_get_special_pty_type(void)
{
return GET_SPECIAL_PTY_TYPE_FUNCTION();
}

View File

@@ -212,6 +212,8 @@ struct VTerm
VTermState *state; VTermState *state;
VTermScreen *screen; VTermScreen *screen;
int in_backspace;
}; };
struct VTermEncoding { struct VTermEncoding {
@@ -259,5 +261,7 @@ VTermEncoding *vterm_lookup_encoding(VTermEncodingType type, char designation);
int vterm_unicode_width(uint32_t codepoint); int vterm_unicode_width(uint32_t codepoint);
int vterm_unicode_is_combining(uint32_t codepoint); int vterm_unicode_is_combining(uint32_t codepoint);
int vterm_unicode_is_ambiguous(uint32_t codepoint);
int vterm_get_special_pty_type(void);
#endif #endif

View File

@@ -4601,3 +4601,22 @@ build_argv_from_list(list_T *l, char ***argv, int *argc)
} }
# endif # endif
#endif #endif
/*
* Change the behavior of vterm.
* 0: As usual.
* 1: Windows 10 version 1809
* The bug causes unstable handling of ambiguous width character.
* 2: Windows 10 version 1903
* Use the wrong result because each result is different.
* 3: Windows 10 insider preview (current latest logic)
*/
int
get_special_pty_type(void)
{
#ifdef MSWIN
return get_conpty_type();
#else
return 0;
#endif
}

View File

@@ -186,6 +186,7 @@ static int win32_setattrs(char_u *name, int attrs);
static int win32_set_archive(char_u *name); static int win32_set_archive(char_u *name);
static int conpty_working = 0; static int conpty_working = 0;
static int conpty_type = 0;
static int conpty_stable = 0; static int conpty_stable = 0;
static void vtp_flag_init(); static void vtp_flag_init();
@@ -7249,9 +7250,25 @@ mch_setenv(char *var, char *value, int x)
/* /*
* Support for pseudo-console (ConPTY) was added in windows 10 * Support for pseudo-console (ConPTY) was added in windows 10
* version 1809 (October 2018 update). However, that version is unstable. * version 1809 (October 2018 update).
*/ */
#define CONPTY_FIRST_SUPPORT_BUILD MAKE_VER(10, 0, 17763) #define CONPTY_FIRST_SUPPORT_BUILD MAKE_VER(10, 0, 17763)
/*
* ConPTY differences between versions, need different logic.
* version 1903 (May 2019 update).
*/
#define CONPTY_1903_BUILD MAKE_VER(10, 0, 18362)
/*
* Confirm until this version. Also the logic changes.
* insider preview.
*/
#define CONPTY_INSIDER_BUILD MAKE_VER(10, 0, 18898)
/*
* Not stable now.
*/
#define CONPTY_STABLE_BUILD MAKE_VER(10, 0, 32767) // T.B.D. #define CONPTY_STABLE_BUILD MAKE_VER(10, 0, 32767) // T.B.D.
static void static void
@@ -7281,6 +7298,12 @@ vtp_flag_init(void)
if (ver >= CONPTY_STABLE_BUILD) if (ver >= CONPTY_STABLE_BUILD)
conpty_stable = 1; conpty_stable = 1;
if (ver <= CONPTY_INSIDER_BUILD)
conpty_type = 3;
if (ver <= CONPTY_1903_BUILD)
conpty_type = 2;
if (ver < CONPTY_FIRST_SUPPORT_BUILD)
conpty_type = 1;
} }
#if !defined(FEAT_GUI_MSWIN) || defined(VIMDLL) || defined(PROTO) #if !defined(FEAT_GUI_MSWIN) || defined(VIMDLL) || defined(PROTO)
@@ -7502,6 +7525,12 @@ has_conpty_working(void)
return conpty_working; return conpty_working;
} }
int
get_conpty_type(void)
{
return conpty_type;
}
int int
is_conpty_stable(void) is_conpty_stable(void)
{ {

View File

@@ -106,4 +106,5 @@ void parse_queued_messages(void);
int mch_parse_cmd(char_u *cmd, int use_shcf, char ***argv, int *argc); int mch_parse_cmd(char_u *cmd, int use_shcf, char ***argv, int *argc);
int build_argv_from_string(char_u *cmd, char ***argv, int *argc); int build_argv_from_string(char_u *cmd, char ***argv, int *argc);
int build_argv_from_list(list_T *l, char ***argv, int *argc); int build_argv_from_list(list_T *l, char ***argv, int *argc);
int get_special_pty_type(void);
/* vim: set ft=c : */ /* vim: set ft=c : */

View File

@@ -76,6 +76,7 @@ int use_vtp(void);
int is_term_win32(void); int is_term_win32(void);
int has_vtp_working(void); int has_vtp_working(void);
int has_conpty_working(void); int has_conpty_working(void);
int get_conpty_type(void);
int is_conpty_stable(void); int is_conpty_stable(void);
void resize_console_buf(void); void resize_console_buf(void);
/* vim: set ft=c : */ /* vim: set ft=c : */

View File

@@ -757,6 +757,8 @@ static char *(features[]) =
static int included_patches[] = static int included_patches[] =
{ /* Add new patch number below this line */ { /* Add new patch number below this line */
/**/
2027,
/**/ /**/
2026, 2026,
/**/ /**/