mirror of
https://github.com/vim/vim.git
synced 2025-09-24 03:44:06 -04:00
patch 8.2.1933: cannot sort using locale ordering
Problem: Cannot sort using locale ordering. Solution: Add a flag for :sort and sort() to use the locale. (Dominique Pellé, closes #7237)
This commit is contained in:
@@ -1801,7 +1801,7 @@ Vim has a sorting function and a sorting command. The sorting function can be
|
||||
found here: |sort()|, |uniq()|.
|
||||
|
||||
*:sor* *:sort*
|
||||
:[range]sor[t][!] [b][f][i][n][o][r][u][x] [/{pattern}/]
|
||||
:[range]sor[t][!] [b][f][i][l][n][o][r][u][x] [/{pattern}/]
|
||||
Sort lines in [range]. When no range is given all
|
||||
lines are sorted.
|
||||
|
||||
@@ -1809,6 +1809,14 @@ found here: |sort()|, |uniq()|.
|
||||
|
||||
With [i] case is ignored.
|
||||
|
||||
With [l] sort uses the current locale. See
|
||||
`language collate` to check or set the locale used
|
||||
for ordering. For example, with "en_US.UTF8",
|
||||
Ö will be ordered after O and before P,
|
||||
whereas with the Swedish locale "sv_SE.UTF8",
|
||||
it will be after Z.
|
||||
Case is typically ignored by the locale.
|
||||
|
||||
Options [n][f][x][o][b] are mutually exclusive.
|
||||
|
||||
With [n] sorting is done on the first decimal number
|
||||
@@ -1875,8 +1883,7 @@ found here: |sort()|, |uniq()|.
|
||||
Note that using `:sort` with `:global` doesn't sort the matching lines, it's
|
||||
quite useless.
|
||||
|
||||
The details about sorting depend on the library function used. There is no
|
||||
guarantee that sorting obeys the current locale. You will have to try it out.
|
||||
`:sort` does not use the current locale unless the l flag is used.
|
||||
Vim does do a "stable" sort.
|
||||
|
||||
The sorting can be interrupted, but if you interrupt it too late in the
|
||||
|
@@ -9700,6 +9700,13 @@ sort({list} [, {func} [, {dict}]]) *sort()* *E702*
|
||||
When {func} is given and it is '1' or 'i' then case is
|
||||
ignored.
|
||||
|
||||
When {func} is given and it is 'l' then the current locale
|
||||
is used for ordering. See `language collate` to check or set
|
||||
the locale used for ordering. For example, with "en_US.UTF8",
|
||||
Ö will be ordered after O and before P, whereas with the
|
||||
Swedish locale "sv_SE.UTF8", it will be after Z.
|
||||
Case is typically ignored by the locale.
|
||||
|
||||
When {func} is given and it is 'n' then all items will be
|
||||
sorted numerical (Implementation detail: This uses the
|
||||
strtod() function to parse numbers, Strings, Lists, Dicts and
|
||||
|
@@ -277,6 +277,7 @@ linelen(int *has_tab)
|
||||
static char_u *sortbuf1;
|
||||
static char_u *sortbuf2;
|
||||
|
||||
static int sort_lc; // sort using locale
|
||||
static int sort_ic; // ignore case
|
||||
static int sort_nr; // sort on number
|
||||
static int sort_rx; // sort on regex instead of skipping it
|
||||
@@ -307,7 +308,13 @@ typedef struct
|
||||
} st_u;
|
||||
} sorti_T;
|
||||
|
||||
static int sort_compare(const void *s1, const void *s2);
|
||||
static int
|
||||
string_compare(const void *s1, const void *s2)
|
||||
{
|
||||
if (sort_lc)
|
||||
return strcoll((char *)s1, (char *)s2);
|
||||
return sort_ic ? STRICMP(s1, s2) : STRCMP(s1, s2);
|
||||
}
|
||||
|
||||
static int
|
||||
sort_compare(const void *s1, const void *s2)
|
||||
@@ -350,8 +357,7 @@ sort_compare(const void *s1, const void *s2)
|
||||
l2.st_u.line.end_col_nr - l2.st_u.line.start_col_nr + 1);
|
||||
sortbuf2[l2.st_u.line.end_col_nr - l2.st_u.line.start_col_nr] = 0;
|
||||
|
||||
result = sort_ic ? STRICMP(sortbuf1, sortbuf2)
|
||||
: STRCMP(sortbuf1, sortbuf2);
|
||||
result = string_compare(sortbuf1, sortbuf2);
|
||||
}
|
||||
|
||||
// If two lines have the same value, preserve the original line order.
|
||||
@@ -398,7 +404,7 @@ ex_sort(exarg_T *eap)
|
||||
if (nrs == NULL)
|
||||
goto sortend;
|
||||
|
||||
sort_abort = sort_ic = sort_rx = sort_nr = 0;
|
||||
sort_abort = sort_ic = sort_lc = sort_rx = sort_nr = 0;
|
||||
#ifdef FEAT_FLOAT
|
||||
sort_flt = 0;
|
||||
#endif
|
||||
@@ -409,6 +415,8 @@ ex_sort(exarg_T *eap)
|
||||
;
|
||||
else if (*p == 'i')
|
||||
sort_ic = TRUE;
|
||||
else if (*p == 'l')
|
||||
sort_lc = TRUE;
|
||||
else if (*p == 'r')
|
||||
sort_rx = TRUE;
|
||||
else if (*p == 'n')
|
||||
@@ -614,8 +622,7 @@ ex_sort(exarg_T *eap)
|
||||
change_occurred = TRUE;
|
||||
|
||||
s = ml_get(get_lnum);
|
||||
if (!unique || i == 0
|
||||
|| (sort_ic ? STRICMP(s, sortbuf1) : STRCMP(s, sortbuf1)) != 0)
|
||||
if (!unique || i == 0 || string_compare(s, sortbuf1) != 0)
|
||||
{
|
||||
// Copy the line into a buffer, it may become invalid in
|
||||
// ml_append(). And it's needed for "unique".
|
||||
|
13
src/list.c
13
src/list.c
@@ -1516,6 +1516,7 @@ typedef struct
|
||||
typedef struct
|
||||
{
|
||||
int item_compare_ic;
|
||||
int item_compare_lc;
|
||||
int item_compare_numeric;
|
||||
int item_compare_numbers;
|
||||
#ifdef FEAT_FLOAT
|
||||
@@ -1594,10 +1595,10 @@ item_compare(const void *s1, const void *s2)
|
||||
p2 = (char_u *)"";
|
||||
if (!sortinfo->item_compare_numeric)
|
||||
{
|
||||
if (sortinfo->item_compare_ic)
|
||||
res = STRICMP(p1, p2);
|
||||
if (sortinfo->item_compare_lc)
|
||||
res = strcoll((char *)p1, (char *)p2);
|
||||
else
|
||||
res = STRCMP(p1, p2);
|
||||
res = sortinfo->item_compare_ic ? STRICMP(p1, p2): STRCMP(p1, p2);
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -1706,6 +1707,7 @@ do_sort_uniq(typval_T *argvars, typval_T *rettv, int sort)
|
||||
goto theend; // short list sorts pretty quickly
|
||||
|
||||
info.item_compare_ic = FALSE;
|
||||
info.item_compare_lc = FALSE;
|
||||
info.item_compare_numeric = FALSE;
|
||||
info.item_compare_numbers = FALSE;
|
||||
#ifdef FEAT_FLOAT
|
||||
@@ -1773,6 +1775,11 @@ do_sort_uniq(typval_T *argvars, typval_T *rettv, int sort)
|
||||
info.item_compare_func = NULL;
|
||||
info.item_compare_ic = TRUE;
|
||||
}
|
||||
else if (STRCMP(info.item_compare_func, "l") == 0)
|
||||
{
|
||||
info.item_compare_func = NULL;
|
||||
info.item_compare_lc = TRUE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -15,6 +15,25 @@ func Test_sort_strings()
|
||||
" numbers compared as strings
|
||||
call assert_equal([1, 2, 3], sort([3, 2, 1]))
|
||||
call assert_equal([13, 28, 3], sort([3, 28, 13]))
|
||||
|
||||
call assert_equal(['A', 'O', 'P', 'a', 'o', 'p', 'Ä', 'Ô', 'ä', 'ô', 'œ', 'œ'],
|
||||
\ sort(['A', 'O', 'P', 'a', 'o', 'p', 'Ä', 'Ô', 'ä', 'ô', 'œ', 'œ']))
|
||||
|
||||
call assert_equal(['A', 'a', 'o', 'O', 'p', 'P', 'Ä', 'Ô', 'ä', 'ô', 'œ', 'œ'],
|
||||
\ sort(['A', 'a', 'o', 'O', 'œ', 'œ', 'p', 'P', 'Ä', 'ä', 'ô', 'Ô'], 'i'))
|
||||
|
||||
let lc = execute('language collate')
|
||||
" With the following locales, the accentuated letters are ordered
|
||||
" similarly to the non-accentuated letters...
|
||||
if lc =~? '"\(en\|es\|de\|fr\|it\|nl\).*\.utf-\?8"'
|
||||
call assert_equal(['a', 'A', 'ä', 'Ä', 'o', 'O', 'ô', 'Ô', 'œ', 'œ', 'p', 'P'],
|
||||
\ sort(['A', 'a', 'o', 'O', 'œ', 'œ', 'p', 'P', 'Ä', 'ä', 'ô', 'Ô'], 'l'))
|
||||
" ... whereas with a Swedish locale, the accentuated letters are ordered
|
||||
" after Z.
|
||||
elseif lc =~? '"sv.*utf-\?8"'
|
||||
call assert_equal(['a', 'A', 'o', 'O', 'p', 'P', 'ä', 'Ä', 'œ', 'œ', 'ô', 'Ô'],
|
||||
\ sort(['A', 'a', 'o', 'O', 'œ', 'œ', 'p', 'P', 'Ä', 'ä', 'ô', 'Ô'], 'l'))
|
||||
endif
|
||||
endfunc
|
||||
|
||||
func Test_sort_numeric()
|
||||
@@ -1204,6 +1223,57 @@ func Test_sort_cmd()
|
||||
\ },
|
||||
\ ]
|
||||
|
||||
" With the following locales, the accentuated letters are ordered
|
||||
" similarly to the non-accentuated letters...
|
||||
let lc = execute('language collate')
|
||||
if lc =~? '"\(en\|es\|de\|fr\|it\|nl\).*\.utf-\?8"'
|
||||
let tests += [
|
||||
\ {
|
||||
\ 'name' : 'sort with locale',
|
||||
\ 'cmd' : '%sort l',
|
||||
\ 'input' : [
|
||||
\ 'A',
|
||||
\ 'E',
|
||||
\ 'O',
|
||||
\ 'À',
|
||||
\ 'È',
|
||||
\ 'É',
|
||||
\ 'Ô',
|
||||
\ 'Œ',
|
||||
\ 'Z',
|
||||
\ 'a',
|
||||
\ 'e',
|
||||
\ 'o',
|
||||
\ 'à',
|
||||
\ 'è',
|
||||
\ 'é',
|
||||
\ 'ô',
|
||||
\ 'œ',
|
||||
\ 'z'
|
||||
\ ],
|
||||
\ 'expected' : [
|
||||
\ 'a',
|
||||
\ 'A',
|
||||
\ 'à',
|
||||
\ 'À',
|
||||
\ 'e',
|
||||
\ 'E',
|
||||
\ 'é',
|
||||
\ 'É',
|
||||
\ 'è',
|
||||
\ 'È',
|
||||
\ 'o',
|
||||
\ 'O',
|
||||
\ 'ô',
|
||||
\ 'Ô',
|
||||
\ 'œ',
|
||||
\ 'Œ',
|
||||
\ 'z',
|
||||
\ 'Z'
|
||||
\ ]
|
||||
\ },
|
||||
\ ]
|
||||
endif
|
||||
if has('float')
|
||||
let tests += [
|
||||
\ {
|
||||
|
@@ -750,6 +750,8 @@ static char *(features[]) =
|
||||
|
||||
static int included_patches[] =
|
||||
{ /* Add new patch number below this line */
|
||||
/**/
|
||||
1933,
|
||||
/**/
|
||||
1932,
|
||||
/**/
|
||||
|
Reference in New Issue
Block a user