vim/src/spell.c

/* vi:set ts=8 sts=4 sw=4:
 *
 * VIM - Vi IMproved	by Bram Moolenaar
 *
 * Do ":help uganda"  in Vim to read copying and usage conditions.
 * Do ":help credits" in Vim to see a list of people who contributed.
 * See README.txt for an overview of the Vim source code.
 */

/*
 * spell.c: code for spell checking
 */

#if defined(MSDOS) || defined(WIN16) || defined(WIN32) || defined(_WIN64)
# include <io.h>	/* for lseek(), must be before vim.h */
#endif

#include "vim.h"

#if defined(FEAT_SYN_HL) || defined(PROTO)

#ifdef HAVE_FCNTL_H
# include <fcntl.h>
#endif

/*
 * Structure that is used to store the text from the language file.  This
 * avoids the need to allocate each individual word and copying it.  It's
 * allocated in big chunks for speed.
 */
#define  SBLOCKSIZE 4096	/* default size of sb_data */
typedef struct sblock_S sblock_T;
struct sblock_S
{
    sblock_T	*sb_next;	/* next block in list */
    char_u	sb_data[1];	/* data, actually longer */
};

/*
 * Structure used to store words and other info for one language.
 */
typedef struct slang_S slang_T;

struct slang_S
{
    slang_T	*sl_next;	/* next language */
    char_u	sl_name[2];	/* language name "en", "nl", etc. */
    hashtab_T	sl_ht;		/* hashtable with all words */
    garray_T	sl_match;	/* table with pointers to matches */
    garray_T	sl_add;		/* table with pointers to additions */
    char_u	sl_regions[13];	/* table with up to 6 region names */
    sblock_T	*sl_block;	/* list with allocated memory blocks */
};

static slang_T *first_lang = NULL;

/*
 * Structure used in "b_langp", filled from 'spelllang'.
 */
typedef struct langp_S
{
    slang_T	*lp_slang;	/* info for this language (NULL for last one) */
    int		lp_region;	/* bitmask for region or REGION_ALL */
} langp_T;

#define LANGP_ENTRY(ga, i)	(((langp_T *)(ga).ga_data) + (i))
#define MATCH_ENTRY(gap, i)	*(((char_u **)(gap)->ga_data) + i)

/*
 * The byte before a word in the hashtable indicates the type of word.
 * Also used for the byte just before a match.
 * The top two bits are used to indicate rare and case-sensitive words.
 * The lower bits are used to indicate the region in which the word is valid.
 * Words valid in all regions use REGION_ALL.
 */
#define REGION_MASK	0x3f
#define REGION_ALL	0x3f
#define CASE_MASK	0x40
#define RARE_MASK	0x80

#define SP_OK		0
#define SP_BAD		1
#define SP_RARE		2
#define SP_LOCAL	3

static slang_T *spell_load_lang __ARGS((char_u *lang));
static void spell_load_file __ARGS((char_u *fname));
static int find_region __ARGS((char_u *rp, char_u *region));

/*
 * Main spell-checking function.
 * "ptr" points to the start of a word.
 * "*attrp" is set to the attributes for a badly spelled word.  For a non-word
 * or when it's OK it remains unchanged.
 * This must only be called when 'spelllang' is not empty.
 * Returns the length of the word in bytes, also when it's OK, so that the
 * caller can skip over the word.
 */
    int
spell_check(wp, ptr, attrp)
    win_T	*wp;		/* current window */
    char_u	*ptr;
    int		*attrp;
{
    char_u	*e;
    langp_T	*lp;
    int		result;
    int		len = 0;
    hash_T	hash;
    hashitem_T	*hi;
    int		c;
#define MAXWLEN 80	/* assume max. word len is 80 */
    char_u	word[MAXWLEN + 1];
    garray_T	*gap;
    int		l, h, t;
    char_u	*p;
    int		n;

    /* Find the end of the word.  We already know that *ptr is a word char. */
    e = ptr;
    do
    {
	mb_ptr_adv(e);
	++len;
    } while (*e != NUL && vim_iswordc_buf(e, wp->w_buffer));

    /* The word is bad unless we find it in the dictionary. */
    result = SP_BAD;

    /* Words are always stored with folded case. */
    (void)str_foldcase(ptr, e - ptr, word, MAXWLEN + 1);
    hash = hash_hash(word);

    /*
     * Loop over the languages specified in 'spelllang'.
     * We check them all, because a match may find a longer word.
     */
    for (lp = LANGP_ENTRY(wp->w_buffer->b_langp, 0); lp->lp_slang != NULL;
								     ++lp)
    {
	/* Check words when it wasn't recognized as a good word yet. */
	if (result != SP_OK)
	{
	    /* Word lookup.  Using a hash table is fast. */
	    hi = hash_lookup(&lp->lp_slang->sl_ht, word, hash);
	    if (!HASHITEM_EMPTY(hi))
	    {
		/* The character before the key indicates the type of word. */
		c = hi->hi_key[-1];
		if ((c & CASE_MASK) != 0)
		{
		    /* Need to check first letter is uppercase.  If it is,
		     * check region.  If it isn't it may be a rare word. */
		    if (
#ifdef FEAT_MBYTE
			    MB_ISUPPER(mb_ptr2char(ptr))
#else
			    MB_ISUPPER(*ptr)
#endif
			    )
		    {
			if ((c & lp->lp_region) == 0)
			    result = SP_LOCAL;
			else
			    result = SP_OK;
		    }
		    else if (c & RARE_MASK)
			result = SP_RARE;
		}
		else
		{
		    if ((c & lp->lp_region) == 0)
			result = SP_LOCAL;
		    else if (c & RARE_MASK)
			result = SP_RARE;
		    else
			result = SP_OK;
		}
	    }
	}

	/* Match lookup.  Uses a binary search.  If there is a match adjust
	 * "e" to the end.  This is also done when a word matched, because
	 * "you've" is longer than "you". */
	gap = &lp->lp_slang->sl_match;
	l = 0;			/* low index */
	h = gap->ga_len - 1;	/* high index */
	/* keep searching, the match must be between "l" and "h" (inclusive) */
	while (h >= l)
	{
	    t = (h + l) / 2;
	    p = MATCH_ENTRY(gap, t) + 1;
	    for (n = 0; p[n] != 0 && p[n] == ptr[n]; ++n)
		;
	    if (p[n] == 0)
	    {
		if ((ptr[n] == 0 || !vim_iswordc_buf(ptr + n, wp->w_buffer)))
		{
		    /* match! */
		    e = ptr + n;
		    if (result != SP_OK)
		    {
			if ((lp->lp_region & p[-1]) == 0)
			    result = SP_LOCAL;
			else
			    result = SP_OK;
		    }
		    break;
		}
		/* match is too short, next item is new low index */
		l = t + 1;
	    }
	    else if (p[n] < ptr[n])
		/* match is before word, next item is new low index */
		l = t + 1;
	    else
		/* match is after word, previous item is new high index */
		h = t - 1;
	}

	/* Addition lookup.  Uses a linear search, there should be very few.
	 * If there is a match adjust "e" to the end.  This doesn't change
	 * whether a word was good or bad, only the length. */
	gap = &lp->lp_slang->sl_add;
	for (t = 0; t < gap->ga_len; ++t)
	{
	    p = MATCH_ENTRY(gap, t) + 1;
	    for (n = 0; p[n] != 0 && p[n] == e[n]; ++n)
		;
	    if (p[n] == 0
		      && (e[n] == 0 || !vim_iswordc_buf(e + n, wp->w_buffer)))
	    {
		/* match */
		e += n;
		break;
	    }
	}
    }

    if (result != SP_OK)
    {
	if (result == SP_BAD)
	    *attrp = highlight_attr[HLF_SPB];
	else if (result == SP_RARE)
	    *attrp = highlight_attr[HLF_SPR];
	else
	    *attrp = highlight_attr[HLF_SPL];
    }

    return (int)(e - ptr);
}

static slang_T	    *load_lp;	/* passed from spell_load_lang() to
				   spell_load_file() */

/*
 * Load language "lang[2]".
 */
    static slang_T *
spell_load_lang(lang)
    char_u	*lang;
{
    slang_T	*lp;
    char_u	fname_enc[80];
    char_u	fname_ascii[20];
    char_u	*p;

    lp = (slang_T *)alloc(sizeof(slang_T));
    if (lp != NULL)
    {
	lp->sl_name[0] = lang[0];
	lp->sl_name[1] = lang[1];
	hash_init(&lp->sl_ht);
	ga_init2(&lp->sl_match, sizeof(char_u *), 20);
	ga_init2(&lp->sl_add, sizeof(char_u *), 4);
	lp->sl_regions[0] = NUL;
	lp->sl_block = NULL;

	/* Find all spell files for "lang" in 'runtimepath' and load them.
	 * Use 'encoding', except that we use "latin1" for "latin9". */
#ifdef FEAT_MBYTE
	if (STRLEN(p_enc) < 60 && STRCMP(p_enc, "iso-8859-15") != 0)
	    p = p_enc;
	else
#endif
	    p = (char_u *)"latin1";
	load_lp = lp;
	sprintf((char *)fname_enc, "spell/%c%c.%s.spl", lang[0], lang[1], p);
	if (do_in_runtimepath(fname_enc, TRUE, spell_load_file) == FAIL)
	{
	    /* Try again to find an ASCII spell file. */
	    sprintf((char *)fname_ascii, "spell/%c%c.spl", lang[0], lang[1]);
	    if (do_in_runtimepath(fname_ascii, TRUE, spell_load_file) == FAIL)
	    {
		vim_free(lp);
		lp = NULL;
		smsg((char_u *)_("Warning: Cannot find dictionary \"%s\""),
							       fname_enc + 6);
	    }
	}
	else
	{
	    lp->sl_next = first_lang;
	    first_lang = lp;
	}
    }

    return lp;
}

/*
 * Load one spell file into "load_lp".
 * Invoked through do_in_runtimepath().
 */
    static void
spell_load_file(fname)
    char_u	*fname;
{
    int		fd;
    size_t	len;
    size_t	l;
    size_t	rest = 0;
    char_u	*p = NULL, *np;
    sblock_T	*bl;
    hash_T	hash;
    hashitem_T	*hi;
    int		c;
    int		region = REGION_ALL;
    char_u	word[MAXWLEN + 1];
    int		n;

    fd = mch_open((char *)fname, O_RDONLY | O_EXTRA, 0);
    if (fd < 0)
    {
	EMSG2(_(e_notopen), fname);
	return;
    }

    /* Get the length of the whole file. */
    len = lseek(fd, (off_t)0, SEEK_END);
    lseek(fd, (off_t)0, SEEK_SET);

    /* Loop, reading the file one block at a time.
     * "rest" is the length of an incomplete line at the previous block.
     * "p" points to the remainder. */
    while (len > 0)
    {
	/* Allocate a block of memory to store the info in.  This is not freed
	 * until spell_reload() is called. */
	if (len > SBLOCKSIZE)
	    l = SBLOCKSIZE;
	else
	    l = len;
	len -= l;
	bl = (sblock_T *)alloc((unsigned)(sizeof(sblock_T) - 1 + l + rest));
	if (bl == NULL)
	    break;
	bl->sb_next = load_lp->sl_block;
	load_lp->sl_block = bl;

	/* Read a block from the file.  Prepend the remainder of the previous
	 * block. */
	if (rest > 0)
	    mch_memmove(bl->sb_data, p, rest);
	if (read(fd, bl->sb_data + rest, l) != l)
	{
	    EMSG2(_(e_notread), fname);
	    break;
	}
	l += rest;
	rest = 0;

	/* Deal with each line that was read until we finish the block. */
	for (p = bl->sb_data; l > 0; p = np)
	{
	    /* "np" points to the char after the line (CR or NL). */
	    for (np = p; l > 0 && *np >= ' '; ++np)
		--l;
	    if (l == 0)
	    {
		/* Incomplete line (or end of file). */
		rest = np - p;
		if (len == 0)
		    EMSG2(_("E751: Truncated spell file: %s"), fname);
		break;
	    }
	    *np = NUL;	    /* terminate the line with a NUL */

	    /* Skip comment and empty lines. */
	    c = *p;
	    if (c != '#' && np > p)
	    {
		if (c == '=' || c == '+')
		{
		    garray_T *gap;

		    /* Match or Add item. */
		    if (c == '=')
			gap = &load_lp->sl_match;
		    else
			gap = &load_lp->sl_add;

		    if (ga_grow(gap, 1) == OK)
		    {
			for (n = 0; n < gap->ga_len; ++n)
			    if ((c = STRCMP(p + 1,
						MATCH_ENTRY(gap, n) + 1)) < 0)
				break;
			if (c == 0)
			{
			    if (p_verbose > 0)
				smsg((char_u *)_("Warning: duplicate match \"%s\" in %s"),
								p + 1, fname);
			}
			else
			{
			    mch_memmove((char_u **)gap->ga_data + n + 1,
				    (char_u **)gap->ga_data + n,
				    (gap->ga_len - n) * sizeof(char_u *));
			    *(((char_u **)gap->ga_data) + n) = p;
			    *p = region;
			    ++gap->ga_len;
			}
		    }
		}
		else if (c == '-')
		{
		    /* region item */
		    ++p;
		    if (*p == '-')
			/* end of a region */
			region = REGION_ALL;
		    else
		    {
			char_u	*rp = load_lp->sl_regions;
			int	r;

			/* The region may be repeated: "-ca-uk".  Fill
			 * "region" with the bit mask for the ones we find. */
			region = 0;
			for (;;)
			{
			    /* start of a region */
			    r = find_region(rp, p);
			    if (r == REGION_ALL)
			    {
				/* new region, add it */
				r = STRLEN(rp);
				if (r >= 12)
				{
				    EMSG2(_("E752: Too many regions in %s"),
								       fname);
				    r = REGION_ALL;
				}
				else
				{
				    rp[r] = p[0];
				    rp[r + 1] = p[1];
				    rp[r + 2] = NUL;
				    r = 1 << (r / 2);
				}
			    }
			    else
				r = 1 << r;

			    region |= r;
			    if (p[2] != '-')
			    {
				if (p[2] != NUL)
				    EMSG2(_("E753: Invalid character in \"%s\""),
								       p - 1);
				break;
			    }
			    p += 3;
			}
		    }
		}
		else
		{
		    /* add the word */
		    if (c == '>')
			c = region | RARE_MASK;
		    else
		    {
			if (c != ' ')
			    EMSG2(_("E753: Invalid character in \"%s\""), p);
			c = region;
		    }
#ifdef FEAT_MBYTE
		    if (MB_ISUPPER(mb_ptr2char(p + 1)))
#else
		    if (MB_ISUPPER(p[1]))
#endif
			c |= CASE_MASK;
		    *p++ = c;
		    (void)str_foldcase(p, np - p, word, MAXWLEN + 1);
		    n = STRLEN(word);
		    if (n > np - p)
		    {
			sblock_T	*s;

			/* Folding case made word longer!  We need to allocate
			 * memory for it. */
			s = (sblock_T *)alloc((unsigned)sizeof(sblock_T)
								     + n + 1);
			if (s != NULL)
			{
			    s->sb_next = load_lp->sl_block;
			    load_lp->sl_block = s;
			    s->sb_data[0] = p[-1];
			    p = s->sb_data + 1;
			}
		    }
		    mch_memmove(p, word, n + 1);

		    hash = hash_hash(p);
		    hi = hash_lookup(&load_lp->sl_ht, p, hash);
		    if (!HASHITEM_EMPTY(hi))
		    {
			c = hi->hi_key[-1];
			if ((c & (CASE_MASK | RARE_MASK))
					 == (p[-1] & (CASE_MASK | RARE_MASK)))
			{
			    if (p_verbose > 0)
				smsg((char_u *)_("Warning: duplicate word \"%s\" in %s"),
								    p, fname);
			}
			else
			    hi->hi_key[-1] |= (p[-1] & (CASE_MASK | RARE_MASK));
		    }
		    else
			hash_add_item(&load_lp->sl_ht, hi, p, hash);
		}
	    }

	    while (l > 0 && *np < ' ')
	    {
		++np;
		--l;
	    }
	}
    }

    close(fd);
}

/*
 * Parse 'spelllang' and set buf->b_langp accordingly.
 * Returns an error message or NULL.
 */
    char_u *
did_set_spelllang(buf)
    buf_T	*buf;
{
    garray_T	ga;
    char_u	*lang;
    char_u	*e;
    char_u	*region;
    int		region_mask;
    slang_T	*lp;
    int		c;

    ga_init2(&ga, sizeof(langp_T), 2);

    /* loop over comma separated languages. */
    for (lang = buf->b_p_spl; *lang != NUL; lang = e)
    {
	e = vim_strchr(lang, ',');
	if (e == NULL)
	    e = lang + STRLEN(lang);
	if (e > lang + 2)
	{
	    if (lang[2] != '_' || e - lang != 5)
	    {
		ga_clear(&ga);
		return e_invarg;
	    }
	    region = lang + 3;
	}
	else
	    region = NULL;

	for (lp = first_lang; lp != NULL; lp = lp->sl_next)
	    if (STRNICMP(lp->sl_name, lang, 2) == 0)
		break;

	if (lp == NULL)
	    /* Not found, load the language. */
	    lp = spell_load_lang(lang);

	if (lp != NULL)
	{
	    if (region == NULL)
		region_mask = REGION_ALL;
	    else
	    {
		/* find region in sl_regions */
		c = find_region(lp->sl_regions, region);
		if (c == REGION_ALL)
		{
		    c = lang[5];
		    lang[5] = NUL;
		    smsg((char_u *)_("Warning: region %s not supported"), lang);
		    lang[5] = c;
		    region_mask = REGION_ALL;
		}
		else
		    region_mask = 1 << c;
	    }

	    if (ga_grow(&ga, 1) == FAIL)
	    {
		ga_clear(&ga);
		return e_outofmem;
	    }
	    LANGP_ENTRY(ga, ga.ga_len)->lp_slang = lp;
	    LANGP_ENTRY(ga, ga.ga_len)->lp_region = region_mask;
	    ++ga.ga_len;
	}

	if (*e == ',')
	    ++e;
    }

    /* Add a NULL entry to mark the end of the list. */
    if (ga_grow(&ga, 1) == FAIL)
    {
	ga_clear(&ga);
	return e_outofmem;
    }
    LANGP_ENTRY(ga, ga.ga_len)->lp_slang = NULL;
    ++ga.ga_len;

    /* Everything is fine, store the new b_langp value. */
    ga_clear(&buf->b_langp);
    buf->b_langp = ga;

    return NULL;
}

/*
 * Find the region "region[2]" in "rp" (points to "sl_regions").
 * Each region is simply stored as the two characters of it's name.
 * Returns the index if found, REGION_ALL if not found.
 */
    static int
find_region(rp, region)
    char_u	*rp;
    char_u	*region;
{
    int		i;

    for (i = 0; ; i += 2)
    {
	if (rp[i] == NUL)
	    return REGION_ALL;
	if (rp[i] == region[0] && rp[i + 1] == region[1])
	    break;
    }
    return i / 2;
}

# if defined(FEAT_MBYTE) || defined(PROTO)
/*
 * Clear all spelling tables and reload them.
 * Used after 'encoding' is set.
 */
    void
spell_reload()
{
    buf_T	*buf;
    slang_T	*lp;
    sblock_T	*sp;

    /* Unload all allocated memory. */
    while (first_lang != NULL)
    {
	lp = first_lang;
	first_lang = lp->sl_next;

	hash_clear(&lp->sl_ht);
	ga_clear(&lp->sl_match);
	ga_clear(&lp->sl_add);
	while (lp->sl_block != NULL)
	{
	    sp = lp->sl_block;
	    lp->sl_block = sp->sb_next;
	    vim_free(sp);
	}
    }

    /* Go through all buffers and handle 'spelllang'. */
    for (buf = firstbuf; buf != NULL; buf = buf->b_next)
    {
	ga_clear(&buf->b_langp);
	if (*buf->b_p_spl != NUL)
	    did_set_spelllang(buf);
    }
}
# endif

#endif  /* FEAT_SYN_HL */
updated for version 7.0062 2005-03-21 08:23:33 +00:00			`/* vi:set ts=8 sts=4 sw=4:`
			`*`
			`* VIM - Vi IMproved by Bram Moolenaar`
			`*`
			`* Do ":help uganda" in Vim to read copying and usage conditions.`
			`* Do ":help credits" in Vim to see a list of people who contributed.`
			`* See README.txt for an overview of the Vim source code.`
			`*/`

			`/*`
			`* spell.c: code for spell checking`
			`*/`

			`#if defined(MSDOS) \|\| defined(WIN16) \|\| defined(WIN32) \|\| defined(_WIN64)`
			`# include <io.h> /* for lseek(), must be before vim.h */`
			`#endif`

			`#include "vim.h"`

			`#if defined(FEAT_SYN_HL) \|\| defined(PROTO)`

			`#ifdef HAVE_FCNTL_H`
			`# include <fcntl.h>`
			`#endif`

			`/*`
			`* Structure that is used to store the text from the language file. This`
			`* avoids the need to allocate each individual word and copying it. It's`
			`* allocated in big chunks for speed.`
			`*/`
			`#define SBLOCKSIZE 4096 /* default size of sb_data */`
			`typedef struct sblock_S sblock_T;`
			`struct sblock_S`
			`{`
			`sblock_T sb_next; / next block in list */`
			`char_u sb_data[1]; /* data, actually longer */`
			`};`

			`/*`
			`* Structure used to store words and other info for one language.`
			`*/`
			`typedef struct slang_S slang_T;`

			`struct slang_S`
			`{`
			`slang_T sl_next; / next language */`
			`char_u sl_name[2]; /* language name "en", "nl", etc. */`
			`hashtab_T sl_ht; /* hashtable with all words */`
			`garray_T sl_match; /* table with pointers to matches */`
			`garray_T sl_add; /* table with pointers to additions */`
			`char_u sl_regions[13]; /* table with up to 6 region names */`
			`sblock_T sl_block; / list with allocated memory blocks */`
			`};`

			`static slang_T *first_lang = NULL;`

			`/*`
			`* Structure used in "b_langp", filled from 'spelllang'.`
			`*/`
			`typedef struct langp_S`
			`{`
			`slang_T lp_slang; / info for this language (NULL for last one) */`
			`int lp_region; /* bitmask for region or REGION_ALL */`
			`} langp_T;`

			`#define LANGP_ENTRY(ga, i) (((langp_T *)(ga).ga_data) + (i))`
			`#define MATCH_ENTRY(gap, i) (((char_u *)(gap)->ga_data) + i)`

			`/*`
			`* The byte before a word in the hashtable indicates the type of word.`
			`* Also used for the byte just before a match.`
			`* The top two bits are used to indicate rare and case-sensitive words.`
			`* The lower bits are used to indicate the region in which the word is valid.`
			`* Words valid in all regions use REGION_ALL.`
			`*/`
			`#define REGION_MASK 0x3f`
			`#define REGION_ALL 0x3f`
			`#define CASE_MASK 0x40`
			`#define RARE_MASK 0x80`

			`#define SP_OK 0`
			`#define SP_BAD 1`
			`#define SP_RARE 2`
			`#define SP_LOCAL 3`

			`static slang_T spell_load_lang __ARGS((char_u lang));`
			`static void spell_load_file __ARGS((char_u *fname));`
			`static int find_region __ARGS((char_u rp, char_u region));`

			`/*`
			`* Main spell-checking function.`
			`* "ptr" points to the start of a word.`
			`* "*attrp" is set to the attributes for a badly spelled word. For a non-word`
			`* or when it's OK it remains unchanged.`
			`* This must only be called when 'spelllang' is not empty.`
			`* Returns the length of the word in bytes, also when it's OK, so that the`
			`* caller can skip over the word.`
			`*/`
			`int`
			`spell_check(wp, ptr, attrp)`
			`win_T wp; / current window */`
			`char_u *ptr;`
			`int *attrp;`
			`{`
			`char_u *e;`
			`langp_T *lp;`
			`int result;`
			`int len = 0;`
			`hash_T hash;`
			`hashitem_T *hi;`
			`int c;`
			`#define MAXWLEN 80 /* assume max. word len is 80 */`
			`char_u word[MAXWLEN + 1];`
			`garray_T *gap;`
			`int l, h, t;`
			`char_u *p;`
			`int n;`

			`/* Find the end of the word. We already know that ptr is a word char. /`
			`e = ptr;`
			`do`
			`{`
			`mb_ptr_adv(e);`
			`++len;`
			`} while (*e != NUL && vim_iswordc_buf(e, wp->w_buffer));`

			`/* The word is bad unless we find it in the dictionary. */`
			`result = SP_BAD;`

			`/* Words are always stored with folded case. */`
			`(void)str_foldcase(ptr, e - ptr, word, MAXWLEN + 1);`
			`hash = hash_hash(word);`

			`/*`
			`* Loop over the languages specified in 'spelllang'.`
			`* We check them all, because a match may find a longer word.`
			`*/`
			`for (lp = LANGP_ENTRY(wp->w_buffer->b_langp, 0); lp->lp_slang != NULL;`
			`++lp)`
			`{`
			`/* Check words when it wasn't recognized as a good word yet. */`
			`if (result != SP_OK)`
			`{`
			`/* Word lookup. Using a hash table is fast. */`
			`hi = hash_lookup(&lp->lp_slang->sl_ht, word, hash);`
			`if (!HASHITEM_EMPTY(hi))`
			`{`
			`/* The character before the key indicates the type of word. */`
			`c = hi->hi_key[-1];`
			`if ((c & CASE_MASK) != 0)`
			`{`
			`/* Need to check first letter is uppercase. If it is,`
			`* check region. If it isn't it may be a rare word. */`
			`if (`
			`#ifdef FEAT_MBYTE`
			`MB_ISUPPER(mb_ptr2char(ptr))`
			`#else`
			`MB_ISUPPER(*ptr)`
			`#endif`
			`)`
			`{`
			`if ((c & lp->lp_region) == 0)`
			`result = SP_LOCAL;`
			`else`
			`result = SP_OK;`
			`}`
			`else if (c & RARE_MASK)`
			`result = SP_RARE;`
			`}`
			`else`
			`{`
			`if ((c & lp->lp_region) == 0)`
			`result = SP_LOCAL;`
			`else if (c & RARE_MASK)`
			`result = SP_RARE;`
			`else`
			`result = SP_OK;`
			`}`
			`}`
			`}`

			`/* Match lookup. Uses a binary search. If there is a match adjust`
			`* "e" to the end. This is also done when a word matched, because`
			`* "you've" is longer than "you". */`
			`gap = &lp->lp_slang->sl_match;`
			`l = 0; /* low index */`
			`h = gap->ga_len - 1; /* high index */`
			`/* keep searching, the match must be between "l" and "h" (inclusive) */`
			`while (h >= l)`
			`{`
			`t = (h + l) / 2;`
			`p = MATCH_ENTRY(gap, t) + 1;`
			`for (n = 0; p[n] != 0 && p[n] == ptr[n]; ++n)`
			`;`
			`if (p[n] == 0)`
			`{`
			`if ((ptr[n] == 0 \|\| !vim_iswordc_buf(ptr + n, wp->w_buffer)))`
			`{`
			`/* match! */`
			`e = ptr + n;`
			`if (result != SP_OK)`
			`{`
			`if ((lp->lp_region & p[-1]) == 0)`
			`result = SP_LOCAL;`
			`else`
			`result = SP_OK;`
			`}`
			`break;`
			`}`
			`/* match is too short, next item is new low index */`
			`l = t + 1;`
			`}`
			`else if (p[n] < ptr[n])`
			`/* match is before word, next item is new low index */`
			`l = t + 1;`
			`else`
			`/* match is after word, previous item is new high index */`
			`h = t - 1;`
			`}`

			`/* Addition lookup. Uses a linear search, there should be very few.`
			`* If there is a match adjust "e" to the end. This doesn't change`
			`* whether a word was good or bad, only the length. */`
			`gap = &lp->lp_slang->sl_add;`
			`for (t = 0; t < gap->ga_len; ++t)`
			`{`
			`p = MATCH_ENTRY(gap, t) + 1;`
			`for (n = 0; p[n] != 0 && p[n] == e[n]; ++n)`
			`;`
			`if (p[n] == 0`
			`&& (e[n] == 0 \|\| !vim_iswordc_buf(e + n, wp->w_buffer)))`
			`{`
			`/* match */`
			`e += n;`
			`break;`
			`}`
			`}`
			`}`

			`if (result != SP_OK)`
			`{`
			`if (result == SP_BAD)`
			`*attrp = highlight_attr[HLF_SPB];`
			`else if (result == SP_RARE)`
			`*attrp = highlight_attr[HLF_SPR];`
			`else`
			`*attrp = highlight_attr[HLF_SPL];`
			`}`

			`return (int)(e - ptr);`
			`}`

			`static slang_T load_lp; / passed from spell_load_lang() to`
			`spell_load_file() */`

			`/*`
			`* Load language "lang[2]".`
			`*/`
			`static slang_T *`
			`spell_load_lang(lang)`
			`char_u *lang;`
			`{`
			`slang_T *lp;`
			`char_u fname_enc[80];`
			`char_u fname_ascii[20];`
			`char_u *p;`

			`lp = (slang_T *)alloc(sizeof(slang_T));`
			`if (lp != NULL)`
			`{`
			`lp->sl_name[0] = lang[0];`
			`lp->sl_name[1] = lang[1];`
			`hash_init(&lp->sl_ht);`
			`ga_init2(&lp->sl_match, sizeof(char_u *), 20);`
			`ga_init2(&lp->sl_add, sizeof(char_u *), 4);`
			`lp->sl_regions[0] = NUL;`
			`lp->sl_block = NULL;`

			`/* Find all spell files for "lang" in 'runtimepath' and load them.`
			`* Use 'encoding', except that we use "latin1" for "latin9". */`
			`#ifdef FEAT_MBYTE`
			`if (STRLEN(p_enc) < 60 && STRCMP(p_enc, "iso-8859-15") != 0)`
			`p = p_enc;`
			`else`
			`#endif`
			`p = (char_u *)"latin1";`
			`load_lp = lp;`
			`sprintf((char *)fname_enc, "spell/%c%c.%s.spl", lang[0], lang[1], p);`
			`if (do_in_runtimepath(fname_enc, TRUE, spell_load_file) == FAIL)`
			`{`
			`/* Try again to find an ASCII spell file. */`
			`sprintf((char *)fname_ascii, "spell/%c%c.spl", lang[0], lang[1]);`
			`if (do_in_runtimepath(fname_ascii, TRUE, spell_load_file) == FAIL)`
			`{`
			`vim_free(lp);`
			`lp = NULL;`
			`smsg((char_u *)_("Warning: Cannot find dictionary \"%s\""),`
			`fname_enc + 6);`
			`}`
			`}`
			`else`
			`{`
			`lp->sl_next = first_lang;`
			`first_lang = lp;`
			`}`
			`}`

			`return lp;`
			`}`

			`/*`
			`* Load one spell file into "load_lp".`
			`* Invoked through do_in_runtimepath().`
			`*/`
			`static void`
			`spell_load_file(fname)`
			`char_u *fname;`
			`{`
			`int fd;`
			`size_t len;`
			`size_t l;`
			`size_t rest = 0;`
			`char_u p = NULL, np;`
			`sblock_T *bl;`
			`hash_T hash;`
			`hashitem_T *hi;`
			`int c;`
			`int region = REGION_ALL;`
			`char_u word[MAXWLEN + 1];`
			`int n;`

			`fd = mch_open((char *)fname, O_RDONLY \| O_EXTRA, 0);`
			`if (fd < 0)`
			`{`
			`EMSG2(_(e_notopen), fname);`
			`return;`
			`}`

			`/* Get the length of the whole file. */`
			`len = lseek(fd, (off_t)0, SEEK_END);`
			`lseek(fd, (off_t)0, SEEK_SET);`

			`/* Loop, reading the file one block at a time.`
			`* "rest" is the length of an incomplete line at the previous block.`
			`* "p" points to the remainder. */`
			`while (len > 0)`
			`{`
			`/* Allocate a block of memory to store the info in. This is not freed`
			`* until spell_reload() is called. */`
			`if (len > SBLOCKSIZE)`
			`l = SBLOCKSIZE;`
			`else`
			`l = len;`
			`len -= l;`
			`bl = (sblock_T *)alloc((unsigned)(sizeof(sblock_T) - 1 + l + rest));`
			`if (bl == NULL)`
			`break;`
			`bl->sb_next = load_lp->sl_block;`
			`load_lp->sl_block = bl;`

			`/* Read a block from the file. Prepend the remainder of the previous`
			`* block. */`
			`if (rest > 0)`
			`mch_memmove(bl->sb_data, p, rest);`
			`if (read(fd, bl->sb_data + rest, l) != l)`
			`{`
			`EMSG2(_(e_notread), fname);`
			`break;`
			`}`
			`l += rest;`
			`rest = 0;`

			`/* Deal with each line that was read until we finish the block. */`
			`for (p = bl->sb_data; l > 0; p = np)`
			`{`
			`/* "np" points to the char after the line (CR or NL). */`
			`for (np = p; l > 0 && *np >= ' '; ++np)`
			`--l;`
			`if (l == 0)`
			`{`
			`/* Incomplete line (or end of file). */`
			`rest = np - p;`
			`if (len == 0)`
			`EMSG2(_("E751: Truncated spell file: %s"), fname);`
			`break;`
			`}`
			`np = NUL; / terminate the line with a NUL */`

			`/* Skip comment and empty lines. */`
			`c = *p;`
			`if (c != '#' && np > p)`
			`{`
			`if (c == '=' \|\| c == '+')`
			`{`
			`garray_T *gap;`

			`/* Match or Add item. */`
			`if (c == '=')`
			`gap = &load_lp->sl_match;`
			`else`
			`gap = &load_lp->sl_add;`

			`if (ga_grow(gap, 1) == OK)`
			`{`
			`for (n = 0; n < gap->ga_len; ++n)`
			`if ((c = STRCMP(p + 1,`
			`MATCH_ENTRY(gap, n) + 1)) < 0)`
			`break;`
			`if (c == 0)`
			`{`
			`if (p_verbose > 0)`
			`smsg((char_u *)_("Warning: duplicate match \"%s\" in %s"),`
			`p + 1, fname);`
			`}`
			`else`
			`{`
			`mch_memmove((char_u **)gap->ga_data + n + 1,`
			`(char_u **)gap->ga_data + n,`
			`(gap->ga_len - n) * sizeof(char_u *));`
			`(((char_u *)gap->ga_data) + n) = p;`
			`*p = region;`
			`++gap->ga_len;`
			`}`
			`}`
			`}`
			`else if (c == '-')`
			`{`
			`/* region item */`
			`++p;`
			`if (*p == '-')`
			`/* end of a region */`
			`region = REGION_ALL;`
			`else`
			`{`
			`char_u *rp = load_lp->sl_regions;`
			`int r;`

			`/* The region may be repeated: "-ca-uk". Fill`
			`* "region" with the bit mask for the ones we find. */`
			`region = 0;`
			`for (;;)`
			`{`
			`/* start of a region */`
			`r = find_region(rp, p);`
			`if (r == REGION_ALL)`
			`{`
			`/* new region, add it */`
			`r = STRLEN(rp);`
			`if (r >= 12)`
			`{`
			`EMSG2(_("E752: Too many regions in %s"),`
			`fname);`
			`r = REGION_ALL;`
			`}`
			`else`
			`{`
			`rp[r] = p[0];`
			`rp[r + 1] = p[1];`
			`rp[r + 2] = NUL;`
			`r = 1 << (r / 2);`
			`}`
			`}`
			`else`
			`r = 1 << r;`

			`region \|= r;`
			`if (p[2] != '-')`
			`{`
			`if (p[2] != NUL)`
			`EMSG2(_("E753: Invalid character in \"%s\""),`
			`p - 1);`
			`break;`
			`}`
			`p += 3;`
			`}`
			`}`
			`}`
			`else`
			`{`
			`/* add the word */`
			`if (c == '>')`
			`c = region \| RARE_MASK;`
			`else`
			`{`
			`if (c != ' ')`
			`EMSG2(_("E753: Invalid character in \"%s\""), p);`
			`c = region;`
			`}`
			`#ifdef FEAT_MBYTE`
			`if (MB_ISUPPER(mb_ptr2char(p + 1)))`
			`#else`
			`if (MB_ISUPPER(p[1]))`
			`#endif`
			`c \|= CASE_MASK;`
			`*p++ = c;`
			`(void)str_foldcase(p, np - p, word, MAXWLEN + 1);`
			`n = STRLEN(word);`
			`if (n > np - p)`
			`{`
			`sblock_T *s;`

			`/* Folding case made word longer! We need to allocate`
			`* memory for it. */`
			`s = (sblock_T *)alloc((unsigned)sizeof(sblock_T)`
			`+ n + 1);`
			`if (s != NULL)`
			`{`
			`s->sb_next = load_lp->sl_block;`
			`load_lp->sl_block = s;`
			`s->sb_data[0] = p[-1];`
			`p = s->sb_data + 1;`
			`}`
			`}`
			`mch_memmove(p, word, n + 1);`

			`hash = hash_hash(p);`
			`hi = hash_lookup(&load_lp->sl_ht, p, hash);`
			`if (!HASHITEM_EMPTY(hi))`
			`{`
			`c = hi->hi_key[-1];`
			`if ((c & (CASE_MASK \| RARE_MASK))`
			`== (p[-1] & (CASE_MASK \| RARE_MASK)))`
			`{`
			`if (p_verbose > 0)`
			`smsg((char_u *)_("Warning: duplicate word \"%s\" in %s"),`
			`p, fname);`
			`}`
			`else`
			`hi->hi_key[-1] \|= (p[-1] & (CASE_MASK \| RARE_MASK));`
			`}`
			`else`
			`hash_add_item(&load_lp->sl_ht, hi, p, hash);`
			`}`
			`}`

			`while (l > 0 && *np < ' ')`
			`{`
			`++np;`
			`--l;`
			`}`
			`}`
			`}`

			`close(fd);`
			`}`

			`/*`
			`* Parse 'spelllang' and set buf->b_langp accordingly.`
			`* Returns an error message or NULL.`
			`*/`
			`char_u *`
			`did_set_spelllang(buf)`
			`buf_T *buf;`
			`{`
			`garray_T ga;`
			`char_u *lang;`
			`char_u *e;`
			`char_u *region;`
			`int region_mask;`
			`slang_T *lp;`
			`int c;`

			`ga_init2(&ga, sizeof(langp_T), 2);`

			`/* loop over comma separated languages. */`
			`for (lang = buf->b_p_spl; *lang != NUL; lang = e)`
			`{`
			`e = vim_strchr(lang, ',');`
			`if (e == NULL)`
			`e = lang + STRLEN(lang);`
			`if (e > lang + 2)`
			`{`
			`if (lang[2] != '_' \|\| e - lang != 5)`
			`{`
			`ga_clear(&ga);`
			`return e_invarg;`
			`}`
			`region = lang + 3;`
			`}`
			`else`
			`region = NULL;`

			`for (lp = first_lang; lp != NULL; lp = lp->sl_next)`
			`if (STRNICMP(lp->sl_name, lang, 2) == 0)`
			`break;`

			`if (lp == NULL)`
			`/* Not found, load the language. */`
			`lp = spell_load_lang(lang);`

			`if (lp != NULL)`
			`{`
			`if (region == NULL)`
			`region_mask = REGION_ALL;`
			`else`
			`{`
			`/* find region in sl_regions */`
			`c = find_region(lp->sl_regions, region);`
			`if (c == REGION_ALL)`
			`{`
			`c = lang[5];`
			`lang[5] = NUL;`
			`smsg((char_u *)_("Warning: region %s not supported"), lang);`
			`lang[5] = c;`
			`region_mask = REGION_ALL;`
			`}`
			`else`
			`region_mask = 1 << c;`
			`}`

			`if (ga_grow(&ga, 1) == FAIL)`
			`{`
			`ga_clear(&ga);`
			`return e_outofmem;`
			`}`
			`LANGP_ENTRY(ga, ga.ga_len)->lp_slang = lp;`
			`LANGP_ENTRY(ga, ga.ga_len)->lp_region = region_mask;`
			`++ga.ga_len;`
			`}`

			`if (*e == ',')`
			`++e;`
			`}`

			`/* Add a NULL entry to mark the end of the list. */`
			`if (ga_grow(&ga, 1) == FAIL)`
			`{`
			`ga_clear(&ga);`
			`return e_outofmem;`
			`}`
			`LANGP_ENTRY(ga, ga.ga_len)->lp_slang = NULL;`
			`++ga.ga_len;`

			`/* Everything is fine, store the new b_langp value. */`
			`ga_clear(&buf->b_langp);`
			`buf->b_langp = ga;`

			`return NULL;`
			`}`

			`/*`
			`* Find the region "region[2]" in "rp" (points to "sl_regions").`
			`* Each region is simply stored as the two characters of it's name.`
			`* Returns the index if found, REGION_ALL if not found.`
			`*/`
			`static int`
			`find_region(rp, region)`
			`char_u *rp;`
			`char_u *region;`
			`{`
			`int i;`

			`for (i = 0; ; i += 2)`
			`{`
			`if (rp[i] == NUL)`
			`return REGION_ALL;`
			`if (rp[i] == region[0] && rp[i + 1] == region[1])`
			`break;`
			`}`
			`return i / 2;`
			`}`

			`# if defined(FEAT_MBYTE) \|\| defined(PROTO)`
			`/*`
			`* Clear all spelling tables and reload them.`
			`* Used after 'encoding' is set.`
			`*/`
			`void`
			`spell_reload()`
			`{`
			`buf_T *buf;`
			`slang_T *lp;`
			`sblock_T *sp;`

			`/* Unload all allocated memory. */`
			`while (first_lang != NULL)`
			`{`
			`lp = first_lang;`
			`first_lang = lp->sl_next;`

			`hash_clear(&lp->sl_ht);`
			`ga_clear(&lp->sl_match);`
			`ga_clear(&lp->sl_add);`
			`while (lp->sl_block != NULL)`
			`{`
			`sp = lp->sl_block;`
			`lp->sl_block = sp->sb_next;`
			`vim_free(sp);`
			`}`
			`}`

			`/* Go through all buffers and handle 'spelllang'. */`
			`for (buf = firstbuf; buf != NULL; buf = buf->b_next)`
			`{`
			`ga_clear(&buf->b_langp);`
			`if (*buf->b_p_spl != NUL)`
			`did_set_spelllang(buf);`
			`}`
			`}`
			`# endif`

			`#endif /* FEAT_SYN_HL */`