0
0
mirror of https://github.com/vim/vim.git synced 2025-09-24 03:44:06 -04:00

patch 8.2.0894: :mkspell can take very long if the word count is high

Problem:    :mkspell can take very long if the word count is high.
Solution:   Use long to avoid negative numbers.  Increase the limits by 20% if
            the compression did not have effect.
This commit is contained in:
Bram Moolenaar
2020-06-03 20:51:11 +02:00
parent fb517bac23
commit 59f88fbf24
2 changed files with 36 additions and 18 deletions

View File

@@ -1995,7 +1995,7 @@ static int tree_add_word(spellinfo_T *spin, char_u *word, wordnode_T *tree, int
static wordnode_T *get_wordnode(spellinfo_T *spin); static wordnode_T *get_wordnode(spellinfo_T *spin);
static void free_wordnode(spellinfo_T *spin, wordnode_T *n); static void free_wordnode(spellinfo_T *spin, wordnode_T *n);
static void wordtree_compress(spellinfo_T *spin, wordnode_T *root); static void wordtree_compress(spellinfo_T *spin, wordnode_T *root);
static int node_compress(spellinfo_T *spin, wordnode_T *node, hashtab_T *ht, int *tot); static long node_compress(spellinfo_T *spin, wordnode_T *node, hashtab_T *ht, long *tot);
static int node_equal(wordnode_T *n1, wordnode_T *n2); static int node_equal(wordnode_T *n1, wordnode_T *n2);
static void clear_node(wordnode_T *node); static void clear_node(wordnode_T *node);
static int put_node(FILE *fd, wordnode_T *node, int idx, int regionmask, int prefixtree); static int put_node(FILE *fd, wordnode_T *node, int idx, int regionmask, int prefixtree);
@@ -2019,12 +2019,18 @@ static void init_spellfile(void);
#define CONDIT_AFF 8 // word already has an affix #define CONDIT_AFF 8 // word already has an affix
/* /*
* Tunable parameters for when the tree is compressed. See 'mkspellmem'. * Tunable parameters for when the tree is compressed. Filled from the
* 'mkspellmem' option.
*/ */
static long compress_start = 30000; // memory / SBLOCKSIZE static long compress_start = 30000; // memory / SBLOCKSIZE
static long compress_inc = 100; // memory / SBLOCKSIZE static long compress_inc = 100; // memory / SBLOCKSIZE
static long compress_added = 500000; // word count static long compress_added = 500000; // word count
// Actually used values. These can change if compression doesn't result in
// reducing the size.
static long used_compress_inc;
static long used_compress_added;
/* /*
* Check the 'mkspellmem' option. Return FAIL if it's wrong. * Check the 'mkspellmem' option. Return FAIL if it's wrong.
* Sets "sps_flags". * Sets "sps_flags".
@@ -4534,7 +4540,7 @@ tree_add_word(
{ {
if (--spin->si_compress_cnt == 1) if (--spin->si_compress_cnt == 1)
// Did enough words to lower the block count limit. // Did enough words to lower the block count limit.
spin->si_blocks_cnt += compress_inc; spin->si_blocks_cnt += used_compress_inc;
} }
/* /*
@@ -4543,9 +4549,9 @@ tree_add_word(
* need that room, thus only compress in the following situations: * need that room, thus only compress in the following situations:
* 1. When not compressed before (si_compress_cnt == 0): when using * 1. When not compressed before (si_compress_cnt == 0): when using
* "compress_start" blocks. * "compress_start" blocks.
* 2. When compressed before and used "compress_inc" blocks before * 2. When compressed before and used "used_compress_inc" blocks before
* adding "compress_added" words (si_compress_cnt > 1). * adding "used_compress_added" words (si_compress_cnt > 1).
* 3. When compressed before, added "compress_added" words * 3. When compressed before, added "used_compress_added" words
* (si_compress_cnt == 1) and the number of free nodes drops below the * (si_compress_cnt == 1) and the number of free nodes drops below the
* maximum word length. * maximum word length.
*/ */
@@ -4556,11 +4562,11 @@ tree_add_word(
#endif #endif
{ {
// Decrement the block counter. The effect is that we compress again // Decrement the block counter. The effect is that we compress again
// when the freed up room has been used and another "compress_inc" // when the freed up room has been used and another "used_compress_inc"
// blocks have been allocated. Unless "compress_added" words have // blocks have been allocated. Unless "used_compress_added" words have
// been added, then the limit is put back again. // been added, then the limit is put back again.
spin->si_blocks_cnt -= compress_inc; spin->si_blocks_cnt -= used_compress_inc;
spin->si_compress_cnt = compress_added; spin->si_compress_cnt = used_compress_added;
if (spin->si_verbose) if (spin->si_verbose)
{ {
@@ -4655,9 +4661,9 @@ free_wordnode(spellinfo_T *spin, wordnode_T *n)
wordtree_compress(spellinfo_T *spin, wordnode_T *root) wordtree_compress(spellinfo_T *spin, wordnode_T *root)
{ {
hashtab_T ht; hashtab_T ht;
int n; long n;
int tot = 0; long tot = 0;
int perc; long perc;
// Skip the root itself, it's not actually used. The first sibling is the // Skip the root itself, it's not actually used. The first sibling is the
// start of the tree. // start of the tree.
@@ -4666,6 +4672,14 @@ wordtree_compress(spellinfo_T *spin, wordnode_T *root)
hash_init(&ht); hash_init(&ht);
n = node_compress(spin, root->wn_sibling, &ht, &tot); n = node_compress(spin, root->wn_sibling, &ht, &tot);
if (tot == 0)
{
// Compression did not have effect. Increase the limits by 20% to
// avoid wasting time on compression, memory will be used anyway.
used_compress_inc += used_compress_inc / 5;
used_compress_added += used_compress_added / 5;
}
#ifndef SPELL_PRINTTREE #ifndef SPELL_PRINTTREE
if (spin->si_verbose || p_verbose > 2) if (spin->si_verbose || p_verbose > 2)
#endif #endif
@@ -4677,7 +4691,7 @@ wordtree_compress(spellinfo_T *spin, wordnode_T *root)
else else
perc = (tot - n) * 100 / tot; perc = (tot - n) * 100 / tot;
vim_snprintf((char *)IObuff, IOSIZE, vim_snprintf((char *)IObuff, IOSIZE,
_("Compressed %d of %d nodes; %d (%d%%) remaining"), _("Compressed %ld of %ld nodes; %ld (%ld%%) remaining"),
n, tot, tot - n, perc); n, tot, tot - n, perc);
spell_message(spin, IObuff); spell_message(spin, IObuff);
} }
@@ -4692,12 +4706,12 @@ wordtree_compress(spellinfo_T *spin, wordnode_T *root)
* Compress a node, its siblings and its children, depth first. * Compress a node, its siblings and its children, depth first.
* Returns the number of compressed nodes. * Returns the number of compressed nodes.
*/ */
static int static long
node_compress( node_compress(
spellinfo_T *spin, spellinfo_T *spin,
wordnode_T *node, wordnode_T *node,
hashtab_T *ht, hashtab_T *ht,
int *tot) // total count of nodes before compressing, long *tot) // total count of nodes before compressing,
// incremented while going through the tree // incremented while going through the tree
{ {
wordnode_T *np; wordnode_T *np;
@@ -4705,9 +4719,9 @@ node_compress(
wordnode_T *child; wordnode_T *child;
hash_T hash; hash_T hash;
hashitem_T *hi; hashitem_T *hi;
int len = 0; long len = 0;
unsigned nr, n; unsigned nr, n;
int compressed = 0; long compressed = 0;
/* /*
* Go through the list of siblings. Compress each child and then try * Go through the list of siblings. Compress each child and then try
@@ -5899,6 +5913,8 @@ mkspell(
ga_init2(&spin.si_prefcond, (int)sizeof(char_u *), 50); ga_init2(&spin.si_prefcond, (int)sizeof(char_u *), 50);
hash_init(&spin.si_commonwords); hash_init(&spin.si_commonwords);
spin.si_newcompID = 127; // start compound ID at first maximum spin.si_newcompID = 127; // start compound ID at first maximum
used_compress_inc = compress_inc;
used_compress_added = compress_added;
// default: fnames[0] is output file, following are input files // default: fnames[0] is output file, following are input files
innames = &fnames[1]; innames = &fnames[1];

View File

@@ -746,6 +746,8 @@ static char *(features[]) =
static int included_patches[] = static int included_patches[] =
{ /* Add new patch number below this line */ { /* Add new patch number below this line */
/**/
894,
/**/ /**/
893, 893,
/**/ /**/