0
0
mirror of https://github.com/vim/vim.git synced 2025-09-24 03:44:06 -04:00

patch 7.4.1629

Problem:    Handling emoji characters as full width has problems with
            backwards compatibility.
Solution:   Remove ambiguous and double width characters from the emoji table.
            Use a separate table for the character class.
            (partly by Yasuhiro Matsumoto)
This commit is contained in:
Bram Moolenaar
2016-03-21 22:09:44 +01:00
parent 3f3fbd3fdb
commit b86f10ee10
3 changed files with 335 additions and 210 deletions

View File

@@ -229,6 +229,11 @@ func! BuildWidthTable(pattern, tableName)
if start >= 0
" produce previous range
call add(ranges, printf("\t{0x%04x, 0x%04x},", start, end))
if a:pattern == 'A'
call add(s:ambitable, [start, end])
else
call add(s:doubletable, [start, end])
endif
endif
let start = n
endif
@@ -238,6 +243,11 @@ func! BuildWidthTable(pattern, tableName)
endfor
if start >= 0
call add(ranges, printf("\t{0x%04x, 0x%04x},", start, end))
if a:pattern == 'A'
call add(s:ambitable, [start, end])
else
call add(s:doubletable, [start, end])
endif
endif
" New buffer to put the result in.
@@ -253,21 +263,72 @@ endfunc
" Build the amoji width table in a new buffer.
func! BuildEmojiTable(pattern, tableName)
let ranges = []
for line in map(filter(filter(getline(1, '$'), 'v:val=~"^[1-9]"'), 'v:val=~a:pattern'), 'matchstr(v:val,"^\\S\\+")')
let alltokens = []
let widthtokens = []
let lines = map(filter(filter(getline(1, '$'), 'v:val=~"^[1-9]"'), 'v:val=~a:pattern'), 'matchstr(v:val,"^\\S\\+")')
for n in range(len(lines))
let line = lines[n]
let token = split(line, '\.\.')
let first = ('0x' . token[0]) + 0
if len(token) == 1
call add(token, token[0])
let last = first
else
let last = ('0x' . token[1]) + 0
endif
let token = [first, last]
if len(alltokens) > 0 && (token[0] - 1 == alltokens[-1][1])
let alltokens[-1][1] = token[1]
else
call add(alltokens, token)
endif
" exclude characters that are in the "ambiguous" or "doublewidth" table
for ambi in s:ambitable
if first >= ambi[0] && first <= ambi[1]
let first = ambi[1] + 1
endif
if last >= ambi[0] && last <= ambi[1]
let last = ambi[0] - 1
endif
call add(ranges, printf("\t{0x%04x, 0x%04x},", "0x".token[0], "0x".token[1]))
endfor
for double in s:doubletable
if first >= double[0] && first <= double[1]
let first = double[1] + 1
endif
if last >= double[0] && last <= double[1]
let last = double[0] - 1
endif
endfor
if first <= last
let token = [first, last]
if len(widthtokens) > 0 && (token[0] - 1 == widthtokens[-1][1])
let widthtokens[-1][1] = token[1]
else
call add(widthtokens, token)
endif
endif
endfor
let allranges = map(alltokens, 'printf("\t{0x%04x, 0x%04x},", v:val[0], v:val[1])')
let widthranges = map(widthtokens, 'printf("\t{0x%04x, 0x%04x},", v:val[0], v:val[1])')
" New buffer to put the result in.
new
exe "file " . a:tableName
call setline(1, " static struct interval " . a:tableName . "[] =")
exe "file " . a:tableName . '_all'
call setline(1, " static struct interval " . a:tableName . "_all[] =")
call setline(2, " {")
call append('$', ranges)
call append('$', allranges)
call setline('$', getline('$')[:-2]) " remove last comma
call setline(line('$') + 1, " };")
wincmd p
" New buffer to put the result in.
new
exe "file " . a:tableName . '_width'
call setline(1, " static struct interval " . a:tableName . "_width[] =")
call setline(2, " {")
call append('$', widthranges)
call setline('$', getline('$')[:-2]) " remove last comma
call setline(line('$') + 1, " };")
wincmd p
@@ -307,13 +368,16 @@ edit http://www.unicode.org/Public/UNIDATA/EastAsianWidth.txt
call ParseWidthProps()
" Build the double width table.
let s:doubletable = []
call BuildWidthTable('[WF]', 'doublewidth')
" Build the ambiguous width table.
let s:ambitable = []
call BuildWidthTable('A', 'ambiguous')
" Edit the emoji text file. Requires the netrw plugin.
edit http://www.unicode.org/Public/emoji/3.0/emoji-data.txt
" Build the emoji table. Ver. 1.0 - 6.0
" Must come after the "ambiguous" table
call BuildEmojiTable('; Emoji\s\+# [1-6]\.[0-9]', 'emoji')

View File

@@ -1210,148 +1210,6 @@ intable(struct interval *table, size_t size, int c)
return FALSE;
}
/* Sorted list of non-overlapping intervals of Emoji characters,
* based on http://unicode.org/emoji/charts/emoji-list.html */
static struct interval emoji_tab[] =
{
{0x203c, 0x203c},
{0x2049, 0x2049},
{0x2122, 0x2122},
{0x2139, 0x2139},
{0x2194, 0x2199},
{0x21a9, 0x21aa},
{0x231a, 0x231b},
{0x2328, 0x2328},
{0x23cf, 0x23cf},
{0x23e9, 0x23f3},
{0x24c2, 0x24c2},
{0x25aa, 0x25ab},
{0x25b6, 0x25b6},
{0x25c0, 0x25c0},
{0x25fb, 0x25fe},
{0x2600, 0x2604},
{0x260e, 0x260e},
{0x2611, 0x2611},
{0x2614, 0x2615},
{0x2618, 0x2618},
{0x261d, 0x261d},
{0x2620, 0x2620},
{0x2622, 0x2623},
{0x2626, 0x2626},
{0x262a, 0x262a},
{0x262e, 0x262f},
{0x2638, 0x263a},
{0x2648, 0x2653},
{0x2660, 0x2660},
{0x2663, 0x2663},
{0x2665, 0x2666},
{0x2668, 0x2668},
{0x267b, 0x267b},
{0x267f, 0x267f},
{0x2692, 0x2694},
{0x2696, 0x2697},
{0x2699, 0x2699},
{0x269b, 0x269c},
{0x26a0, 0x26a1},
{0x26aa, 0x26ab},
{0x26b0, 0x26b1},
{0x26bd, 0x26be},
{0x26c4, 0x26c5},
{0x26c8, 0x26c8},
{0x26ce, 0x26ce},
{0x26cf, 0x26cf},
{0x26d1, 0x26d1},
{0x26d3, 0x26d4},
{0x26e9, 0x26ea},
{0x26f0, 0x26f5},
{0x26f7, 0x26fa},
{0x26fd, 0x26fd},
{0x2702, 0x2702},
{0x2705, 0x2705},
{0x2708, 0x2709},
{0x270a, 0x270b},
{0x270c, 0x270d},
{0x270f, 0x270f},
{0x2712, 0x2712},
{0x2714, 0x2714},
{0x2716, 0x2716},
{0x271d, 0x271d},
{0x2721, 0x2721},
{0x2728, 0x2728},
{0x2733, 0x2734},
{0x2744, 0x2744},
{0x2747, 0x2747},
{0x274c, 0x274c},
{0x274e, 0x274e},
{0x2753, 0x2755},
{0x2757, 0x2757},
{0x2763, 0x2764},
{0x2795, 0x2797},
{0x27a1, 0x27a1},
{0x27b0, 0x27b0},
{0x27bf, 0x27bf},
{0x2934, 0x2935},
{0x2b05, 0x2b07},
{0x2b1b, 0x2b1c},
{0x2b50, 0x2b50},
{0x2b55, 0x2b55},
{0x3030, 0x3030},
{0x303d, 0x303d},
{0x3297, 0x3297},
{0x3299, 0x3299},
{0x1f004, 0x1f004},
{0x1f0cf, 0x1f0cf},
{0x1f170, 0x1f171},
{0x1f17e, 0x1f17e},
{0x1f17f, 0x1f17f},
{0x1f18e, 0x1f18e},
{0x1f191, 0x1f19a},
{0x1f1e6, 0x1f1ff},
{0x1f201, 0x1f202},
{0x1f21a, 0x1f21a},
{0x1f22f, 0x1f22f},
{0x1f232, 0x1f23a},
{0x1f250, 0x1f251},
{0x1f300, 0x1f320},
{0x1f330, 0x1f335},
{0x1f337, 0x1f37c},
{0x1f380, 0x1f393},
{0x1f3a0, 0x1f3c4},
{0x1f3c6, 0x1f3ca},
{0x1f3e0, 0x1f3f0},
{0x1f400, 0x1f43e},
{0x1f440, 0x1f440},
{0x1f442, 0x1f4f7},
{0x1f4f9, 0x1f4fc},
{0x1f500, 0x1f53d},
{0x1f550, 0x1f567},
{0x1f5fb, 0x1f5ff},
{0x1f600, 0x1f600},
{0x1f601, 0x1f610},
{0x1f611, 0x1f611},
{0x1f612, 0x1f614},
{0x1f615, 0x1f615},
{0x1f616, 0x1f616},
{0x1f617, 0x1f617},
{0x1f618, 0x1f618},
{0x1f619, 0x1f619},
{0x1f61a, 0x1f61a},
{0x1f61b, 0x1f61b},
{0x1f61c, 0x1f61e},
{0x1f61f, 0x1f61f},
{0x1f620, 0x1f625},
{0x1f626, 0x1f627},
{0x1f628, 0x1f62b},
{0x1f62c, 0x1f62c},
{0x1f62d, 0x1f62d},
{0x1f62e, 0x1f62f},
{0x1f630, 0x1f633},
{0x1f634, 0x1f634},
{0x1f635, 0x1f640},
{0x1f645, 0x1f64f},
{0x1f680, 0x1f6c5}
};
/*
* For UTF-8 character "c" return 2 for a double-width character, 1 for others.
* Returns 4 or 6 for an unprintable character.
@@ -1577,6 +1435,90 @@ utf_char2cells(int c)
{0x100000, 0x10fffd}
};
/* Sorted list of non-overlapping intervals of Emoji characters that don't
* have ambiguous or double width,
* based on http://unicode.org/emoji/charts/emoji-list.html */
static struct interval emoji_width[] =
{
{0x203c, 0x203c},
{0x2049, 0x2049},
{0x2139, 0x2139},
{0x21a9, 0x21aa},
{0x231a, 0x231b},
{0x2328, 0x2328},
{0x23cf, 0x23cf},
{0x23e9, 0x23f3},
{0x25aa, 0x25ab},
{0x25fb, 0x25fe},
{0x2600, 0x2604},
{0x2611, 0x2611},
{0x2618, 0x2618},
{0x261d, 0x261d},
{0x2620, 0x2620},
{0x2622, 0x2623},
{0x2626, 0x2626},
{0x262a, 0x262a},
{0x262e, 0x262f},
{0x2638, 0x263a},
{0x2648, 0x2653},
{0x2666, 0x2666},
{0x267b, 0x267b},
{0x267f, 0x267f},
{0x2692, 0x2694},
{0x2696, 0x2697},
{0x2699, 0x2699},
{0x269b, 0x269c},
{0x26a0, 0x26a1},
{0x26aa, 0x26ab},
{0x26b0, 0x26b1},
{0x26bd, 0x26bd},
{0x26ce, 0x26ce},
{0x2702, 0x2702},
{0x2705, 0x2705},
{0x2708, 0x270d},
{0x270f, 0x270f},
{0x2712, 0x2712},
{0x2714, 0x2714},
{0x2716, 0x2716},
{0x271d, 0x271d},
{0x2721, 0x2721},
{0x2728, 0x2728},
{0x2733, 0x2734},
{0x2744, 0x2744},
{0x2747, 0x2747},
{0x274c, 0x274c},
{0x274e, 0x274e},
{0x2753, 0x2755},
{0x2763, 0x2764},
{0x2795, 0x2797},
{0x27a1, 0x27a1},
{0x27b0, 0x27b0},
{0x27bf, 0x27bf},
{0x2934, 0x2935},
{0x2b05, 0x2b07},
{0x2b1b, 0x2b1c},
{0x2b50, 0x2b50},
{0x1f004, 0x1f004},
{0x1f0cf, 0x1f0cf},
{0x1f1e6, 0x1f1ff},
{0x1f300, 0x1f320},
{0x1f330, 0x1f335},
{0x1f337, 0x1f37c},
{0x1f380, 0x1f393},
{0x1f3a0, 0x1f3c4},
{0x1f3c6, 0x1f3ca},
{0x1f3e0, 0x1f3f0},
{0x1f400, 0x1f43e},
{0x1f440, 0x1f440},
{0x1f442, 0x1f4f7},
{0x1f4f9, 0x1f4fc},
{0x1f500, 0x1f53d},
{0x1f550, 0x1f567},
{0x1f5fb, 0x1f640},
{0x1f645, 0x1f64f},
{0x1f680, 0x1f6c5}
};
if (c >= 0x100)
{
#ifdef USE_WCHAR_FUNCTIONS
@@ -1596,7 +1538,7 @@ utf_char2cells(int c)
if (intable(doublewidth, sizeof(doublewidth), c))
return 2;
#endif
if (p_emoji && intable(emoji_tab, sizeof(emoji_tab), c))
if (p_emoji && intable(emoji_width, sizeof(emoji_width), c))
return 2;
}
@@ -2674,6 +2616,123 @@ utf_class(int c)
{0x2b740, 0x2b81f, 0x4e00}, /* CJK Ideographs */
{0x2f800, 0x2fa1f, 0x4e00}, /* CJK Ideographs */
};
/* Sorted list of non-overlapping intervals of all Emoji characters,
* based on http://unicode.org/emoji/charts/emoji-list.html */
static struct interval emoji_all[] =
{
{0x203c, 0x203c},
{0x2049, 0x2049},
{0x2122, 0x2122},
{0x2139, 0x2139},
{0x2194, 0x2199},
{0x21a9, 0x21aa},
{0x231a, 0x231b},
{0x2328, 0x2328},
{0x23cf, 0x23cf},
{0x23e9, 0x23f3},
{0x24c2, 0x24c2},
{0x25aa, 0x25ab},
{0x25b6, 0x25b6},
{0x25c0, 0x25c0},
{0x25fb, 0x25fe},
{0x2600, 0x2604},
{0x260e, 0x260e},
{0x2611, 0x2611},
{0x2614, 0x2615},
{0x2618, 0x2618},
{0x261d, 0x261d},
{0x2620, 0x2620},
{0x2622, 0x2623},
{0x2626, 0x2626},
{0x262a, 0x262a},
{0x262e, 0x262f},
{0x2638, 0x263a},
{0x2648, 0x2653},
{0x2660, 0x2660},
{0x2663, 0x2663},
{0x2665, 0x2666},
{0x2668, 0x2668},
{0x267b, 0x267b},
{0x267f, 0x267f},
{0x2692, 0x2694},
{0x2696, 0x2697},
{0x2699, 0x2699},
{0x269b, 0x269c},
{0x26a0, 0x26a1},
{0x26aa, 0x26ab},
{0x26b0, 0x26b1},
{0x26bd, 0x26be},
{0x26c4, 0x26c5},
{0x26c8, 0x26c8},
{0x26ce, 0x26cf},
{0x26d1, 0x26d1},
{0x26d3, 0x26d4},
{0x26e9, 0x26ea},
{0x26f0, 0x26f5},
{0x26f7, 0x26fa},
{0x26fd, 0x26fd},
{0x2702, 0x2702},
{0x2705, 0x2705},
{0x2708, 0x270d},
{0x270f, 0x270f},
{0x2712, 0x2712},
{0x2714, 0x2714},
{0x2716, 0x2716},
{0x271d, 0x271d},
{0x2721, 0x2721},
{0x2728, 0x2728},
{0x2733, 0x2734},
{0x2744, 0x2744},
{0x2747, 0x2747},
{0x274c, 0x274c},
{0x274e, 0x274e},
{0x2753, 0x2755},
{0x2757, 0x2757},
{0x2763, 0x2764},
{0x2795, 0x2797},
{0x27a1, 0x27a1},
{0x27b0, 0x27b0},
{0x27bf, 0x27bf},
{0x2934, 0x2935},
{0x2b05, 0x2b07},
{0x2b1b, 0x2b1c},
{0x2b50, 0x2b50},
{0x2b55, 0x2b55},
{0x3030, 0x3030},
{0x303d, 0x303d},
{0x3297, 0x3297},
{0x3299, 0x3299},
{0x1f004, 0x1f004},
{0x1f0cf, 0x1f0cf},
{0x1f170, 0x1f171},
{0x1f17e, 0x1f17f},
{0x1f18e, 0x1f18e},
{0x1f191, 0x1f19a},
{0x1f1e6, 0x1f1ff},
{0x1f201, 0x1f202},
{0x1f21a, 0x1f21a},
{0x1f22f, 0x1f22f},
{0x1f232, 0x1f23a},
{0x1f250, 0x1f251},
{0x1f300, 0x1f320},
{0x1f330, 0x1f335},
{0x1f337, 0x1f37c},
{0x1f380, 0x1f393},
{0x1f3a0, 0x1f3c4},
{0x1f3c6, 0x1f3ca},
{0x1f3e0, 0x1f3f0},
{0x1f400, 0x1f43e},
{0x1f440, 0x1f440},
{0x1f442, 0x1f4f7},
{0x1f4f9, 0x1f4fc},
{0x1f500, 0x1f53d},
{0x1f550, 0x1f567},
{0x1f5fb, 0x1f640},
{0x1f645, 0x1f64f},
{0x1f680, 0x1f6c5}
};
int bot = 0;
int top = sizeof(classes) / sizeof(struct clinterval) - 1;
int mid;
@@ -2701,7 +2760,7 @@ utf_class(int c)
}
/* emoji */
if (intable(emoji_tab, sizeof(emoji_tab), c))
if (intable(emoji_all, sizeof(emoji_all), c))
return 3;
/* most other characters are "word" characters */

View File

@@ -748,6 +748,8 @@ static char *(features[]) =
static int included_patches[] =
{ /* Add new patch number below this line */
/**/
1629,
/**/
1628,
/**/