0
0
mirror of https://github.com/vim/vim.git synced 2025-09-25 03:54:15 -04:00

patch 8.2.1933: cannot sort using locale ordering

Problem:    Cannot sort using locale ordering.
Solution:   Add a flag for :sort and sort() to use the locale. (Dominique
            Pellé, closes #7237)
This commit is contained in:
Bram Moolenaar
2020-11-01 13:57:44 +01:00
parent 963734e316
commit 55e29611d2
6 changed files with 112 additions and 12 deletions

View File

@@ -1801,7 +1801,7 @@ Vim has a sorting function and a sorting command. The sorting function can be
found here: |sort()|, |uniq()|.
*:sor* *:sort*
:[range]sor[t][!] [b][f][i][n][o][r][u][x] [/{pattern}/]
:[range]sor[t][!] [b][f][i][l][n][o][r][u][x] [/{pattern}/]
Sort lines in [range]. When no range is given all
lines are sorted.
@@ -1809,6 +1809,14 @@ found here: |sort()|, |uniq()|.
With [i] case is ignored.
With [l] sort uses the current locale. See
`language collate` to check or set the locale used
for ordering. For example, with "en_US.UTF8",
Ö will be ordered after O and before P,
whereas with the Swedish locale "sv_SE.UTF8",
it will be after Z.
Case is typically ignored by the locale.
Options [n][f][x][o][b] are mutually exclusive.
With [n] sorting is done on the first decimal number
@@ -1875,8 +1883,7 @@ found here: |sort()|, |uniq()|.
Note that using `:sort` with `:global` doesn't sort the matching lines, it's
quite useless.
The details about sorting depend on the library function used. There is no
guarantee that sorting obeys the current locale. You will have to try it out.
`:sort` does not use the current locale unless the l flag is used.
Vim does do a "stable" sort.
The sorting can be interrupted, but if you interrupt it too late in the

View File

@@ -9700,6 +9700,13 @@ sort({list} [, {func} [, {dict}]]) *sort()* *E702*
When {func} is given and it is '1' or 'i' then case is
ignored.
When {func} is given and it is 'l' then the current locale
is used for ordering. See `language collate` to check or set
the locale used for ordering. For example, with "en_US.UTF8",
Ö will be ordered after O and before P, whereas with the
Swedish locale "sv_SE.UTF8", it will be after Z.
Case is typically ignored by the locale.
When {func} is given and it is 'n' then all items will be
sorted numerical (Implementation detail: This uses the
strtod() function to parse numbers, Strings, Lists, Dicts and

View File

@@ -277,6 +277,7 @@ linelen(int *has_tab)
static char_u *sortbuf1;
static char_u *sortbuf2;
static int sort_lc; // sort using locale
static int sort_ic; // ignore case
static int sort_nr; // sort on number
static int sort_rx; // sort on regex instead of skipping it
@@ -307,7 +308,13 @@ typedef struct
} st_u;
} sorti_T;
static int sort_compare(const void *s1, const void *s2);
static int
string_compare(const void *s1, const void *s2)
{
if (sort_lc)
return strcoll((char *)s1, (char *)s2);
return sort_ic ? STRICMP(s1, s2) : STRCMP(s1, s2);
}
static int
sort_compare(const void *s1, const void *s2)
@@ -350,8 +357,7 @@ sort_compare(const void *s1, const void *s2)
l2.st_u.line.end_col_nr - l2.st_u.line.start_col_nr + 1);
sortbuf2[l2.st_u.line.end_col_nr - l2.st_u.line.start_col_nr] = 0;
result = sort_ic ? STRICMP(sortbuf1, sortbuf2)
: STRCMP(sortbuf1, sortbuf2);
result = string_compare(sortbuf1, sortbuf2);
}
// If two lines have the same value, preserve the original line order.
@@ -398,7 +404,7 @@ ex_sort(exarg_T *eap)
if (nrs == NULL)
goto sortend;
sort_abort = sort_ic = sort_rx = sort_nr = 0;
sort_abort = sort_ic = sort_lc = sort_rx = sort_nr = 0;
#ifdef FEAT_FLOAT
sort_flt = 0;
#endif
@@ -409,6 +415,8 @@ ex_sort(exarg_T *eap)
;
else if (*p == 'i')
sort_ic = TRUE;
else if (*p == 'l')
sort_lc = TRUE;
else if (*p == 'r')
sort_rx = TRUE;
else if (*p == 'n')
@@ -614,8 +622,7 @@ ex_sort(exarg_T *eap)
change_occurred = TRUE;
s = ml_get(get_lnum);
if (!unique || i == 0
|| (sort_ic ? STRICMP(s, sortbuf1) : STRCMP(s, sortbuf1)) != 0)
if (!unique || i == 0 || string_compare(s, sortbuf1) != 0)
{
// Copy the line into a buffer, it may become invalid in
// ml_append(). And it's needed for "unique".

View File

@@ -1516,6 +1516,7 @@ typedef struct
typedef struct
{
int item_compare_ic;
int item_compare_lc;
int item_compare_numeric;
int item_compare_numbers;
#ifdef FEAT_FLOAT
@@ -1594,10 +1595,10 @@ item_compare(const void *s1, const void *s2)
p2 = (char_u *)"";
if (!sortinfo->item_compare_numeric)
{
if (sortinfo->item_compare_ic)
res = STRICMP(p1, p2);
if (sortinfo->item_compare_lc)
res = strcoll((char *)p1, (char *)p2);
else
res = STRCMP(p1, p2);
res = sortinfo->item_compare_ic ? STRICMP(p1, p2): STRCMP(p1, p2);
}
else
{
@@ -1706,6 +1707,7 @@ do_sort_uniq(typval_T *argvars, typval_T *rettv, int sort)
goto theend; // short list sorts pretty quickly
info.item_compare_ic = FALSE;
info.item_compare_lc = FALSE;
info.item_compare_numeric = FALSE;
info.item_compare_numbers = FALSE;
#ifdef FEAT_FLOAT
@@ -1773,6 +1775,11 @@ do_sort_uniq(typval_T *argvars, typval_T *rettv, int sort)
info.item_compare_func = NULL;
info.item_compare_ic = TRUE;
}
else if (STRCMP(info.item_compare_func, "l") == 0)
{
info.item_compare_func = NULL;
info.item_compare_lc = TRUE;
}
}
}

View File

@@ -15,6 +15,25 @@ func Test_sort_strings()
" numbers compared as strings
call assert_equal([1, 2, 3], sort([3, 2, 1]))
call assert_equal([13, 28, 3], sort([3, 28, 13]))
call assert_equal(['A', 'O', 'P', 'a', 'o', 'p', 'Ä', 'Ô', 'ä', 'ô', 'œ', 'œ'],
\ sort(['A', 'O', 'P', 'a', 'o', 'p', 'Ä', 'Ô', 'ä', 'ô', 'œ', 'œ']))
call assert_equal(['A', 'a', 'o', 'O', 'p', 'P', 'Ä', 'Ô', 'ä', 'ô', 'œ', 'œ'],
\ sort(['A', 'a', 'o', 'O', 'œ', 'œ', 'p', 'P', 'Ä', 'ä', 'ô', 'Ô'], 'i'))
let lc = execute('language collate')
" With the following locales, the accentuated letters are ordered
" similarly to the non-accentuated letters...
if lc =~? '"\(en\|es\|de\|fr\|it\|nl\).*\.utf-\?8"'
call assert_equal(['a', 'A', 'ä', 'Ä', 'o', 'O', 'ô', 'Ô', 'œ', 'œ', 'p', 'P'],
\ sort(['A', 'a', 'o', 'O', 'œ', 'œ', 'p', 'P', 'Ä', 'ä', 'ô', 'Ô'], 'l'))
" ... whereas with a Swedish locale, the accentuated letters are ordered
" after Z.
elseif lc =~? '"sv.*utf-\?8"'
call assert_equal(['a', 'A', 'o', 'O', 'p', 'P', 'ä', 'Ä', 'œ', 'œ', 'ô', 'Ô'],
\ sort(['A', 'a', 'o', 'O', 'œ', 'œ', 'p', 'P', 'Ä', 'ä', 'ô', 'Ô'], 'l'))
endif
endfunc
func Test_sort_numeric()
@@ -1204,6 +1223,57 @@ func Test_sort_cmd()
\ },
\ ]
" With the following locales, the accentuated letters are ordered
" similarly to the non-accentuated letters...
let lc = execute('language collate')
if lc =~? '"\(en\|es\|de\|fr\|it\|nl\).*\.utf-\?8"'
let tests += [
\ {
\ 'name' : 'sort with locale',
\ 'cmd' : '%sort l',
\ 'input' : [
\ 'A',
\ 'E',
\ 'O',
\ 'À',
\ 'È',
\ 'É',
\ 'Ô',
\ 'Œ',
\ 'Z',
\ 'a',
\ 'e',
\ 'o',
\ 'à',
\ 'è',
\ 'é',
\ 'ô',
\ 'œ',
\ 'z'
\ ],
\ 'expected' : [
\ 'a',
\ 'A',
\ 'à',
\ 'À',
\ 'e',
\ 'E',
\ 'é',
\ 'É',
\ 'è',
\ 'È',
\ 'o',
\ 'O',
\ 'ô',
\ 'Ô',
\ 'œ',
\ 'Œ',
\ 'z',
\ 'Z'
\ ]
\ },
\ ]
endif
if has('float')
let tests += [
\ {

View File

@@ -750,6 +750,8 @@ static char *(features[]) =
static int included_patches[] =
{ /* Add new patch number below this line */
/**/
1933,
/**/
1932,
/**/