#include "SpellDict.h"
#include "Sys.h"
#include <sys/mman.h>

/*
 * Hash table for spelling checker has n bits.
 * Each word w is hashed by k different (modular) hash functions, hi.
 * The bits hi(w), i=1..k, are set for words in the dictionary.
 * Assuming independence, the probability that no word of a d-word
 * dictionary sets a particular bit is given by the Poisson formula
 * P = exp(-y)*y**0/0!, where y=d*k/n.
 * The probability that a random string is recognized as a word is then
 * (1-P)**k.  For given n and d this is minimum when y=log(2), P=1/2,
 * whence one finds, for example, that a 25000-word dictionary in a
 * 400000-bit table works best with k=11.
 */
static long primes[] =
{
    399871,
    399887,
    399899,
    399911,
    399913,
    399937,
    399941,
    399953,
    399979,
    399983,
    399989,
};
const int NP = sizeof (primes) / sizeof (primes[0]);
const int NW = 30;

SpellDict*
SpellDict::open(const char* hlistFile)
{
    if (!Sys::isRegularFile(hlistFile))
	return (NULL);
    int fd = ::open(hlistFile, O_RDONLY);
    if (fd < 0)
	return (NULL);
    struct stat sb;
    if (Sys::fstat(fd, sb) < 0) {
	::close(fd);
	return (NULL);
    }
    const short* hash = (const short*)
	::mmap(0, sb.st_size, PROT_READ, MAP_SHARED, fd, 0);
    if ((int) hash == -1) {
	::close(fd);
	return (NULL);
    }
    return new SpellDict(fd, hash, sb.st_size);
}

SpellDict::SpellDict(int f, const short* h, size_t hs)
{
    fd = f;
    hash = h;
    hashsize = hs;
    pow2 = new long*[NP];
    for (int i = 0; i < NP; i++) {
	long* lp = new long[NW];
	pow2[i] = lp;
	long h = *lp = 1<<14;
	for (int j = 1; j < NW; j++)
	    h = *++lp = (h<<7) % primes[i];
    }
}

SpellDict::~SpellDict()
{
    for (int i = 0; i < NP; i++)
	delete pow2[i];
    delete pow2;
    ::munmap((void*) hash, hashsize);
    ::close(fd);
}
#define SHIFT	4

inline int SpellDict::get(u_long h)
   { return (hash[h >> SHIFT] & (1 << (h & ((1 << SHIFT)-1)))); }

/*
 * Lookup routine that checks spell's hash table.
 */
fxBool
SpellDict::dictLookup(char* bp, int len)
{
    for (register int i = 0; i < NP; i++) {
	register long* lp = pow2[i];
	register char *wp;
	u_long h;
	for (wp = bp, h = 0; wp < bp + len; ++wp, ++lp)
	    h += *wp * *lp;
	h += '\n' * *lp;
	h %= primes[i];
	if (get(h) == 0)
	    return (FALSE);
    }
    return (TRUE);
}
