#include "ixDB.h"
#include "Str.h"
#include "StackBuffer.h"
#include <stdlib.h>
#include "StrSet.h"
#include "WordSplit.h"
#include "SpellDict.h"
#include "Dictionary.h"

fxDECLARE_StrKeyDictionary(WeightDict, ixDB::weight_t);
fxIMPLEMENT_StrKeyDictionary(WeightDict, ixDB::weight_t);
void WeightDict::copyValue(void const* s, void* d) const
    { *((ixDB::weight_t*) d) = *(ixDB::weight_t const*)s; }
void WeightDict::destroyValue(void*) const {}
void WeightDict::createValue(void* d) const
    { *(ixDB::weight_t*)d = 0; }

ixDB::ixDB(DB* _db)
{
    db = _db;
    minWordLength = 2;			// min length of a word
    maxWordLength = 20;			// max length of a word
    dict = NULL;			// don't strip suffixes w/o dictionary
    stopListRead = FALSE;
    getSplitInfo();
    trace = 0;
    stoppedWords = 0;
    shortWords = 0;
    wdict = NULL;
}

ixDB::~ixDB()
{
    delete wdict;
    if (split) {
	if (splitInfoDirty)
	    putSplitInfo();
	delete split;
    }
    if (stopList) {
	if (stopListDirty)
	    putStopList();
	delete stopList;
    }
    if (db)
	(*db->close)(db);
}

void
ixDB::getStopList()
{
    stopList = new fxStrSet;

    DBT key;
    key.data = "__stop__";
    key.size = strlen((const char*) key.data);
    DBT cont;
    fxStackBuffer buf;
    if ((*db->get)(db, &key, &cont, 0) == 0) {
	const char* cp = (const char*) cont.data;
	while (cp < (const char*)(cont.data)+cont.size) {
	    stopList->add(cp);
	    if (trace > 1)
		printf("STOP %s\n", cp);
	    cp = strchr(cp,'\0')+1;
	}
    }
    stopListDirty = FALSE;
}

void
ixDB::putStopList()
{
    fxStackBuffer buf;
    for (fxStrSetIter iter(*stopList); iter.notDone(); iter++) {
	const fxStr& s = iter;
	buf.put(s, s.length()+1);
    }
    DBT cont;
    cont.data = (char*) buf;
    cont.size = buf.getLength();
    DBT key;
    key.data = "__stop__";
    key.size = strlen((const char*) key.data);
    (void) (*db->put)(db, &key, &cont, 0);	// XXX check error
    stopListDirty = FALSE;
}

void
ixDB::addStopList(const char* w)
{
    if (dict) {
	fxStackBuffer buf;
	buf.put(w);
	int len = buf.getLength();
	dict->Strip((char*) buf, len);
	fxStr sw((const char*) buf, len);
	if (!stopList->contains(sw)) {
	    stopList->add(sw);
	    stopListDirty = TRUE;
	    if (trace > 1)
		printf("STOP %s (stemmed from %s)\n", (const char*) sw, w);
	}
    } else {
	if (!stopList->contains(w)) {
	    stopList->add(w);
	    stopListDirty = TRUE;
	    if (trace > 1)
		printf("STOP %s\n", w);
	}
    }
}

void
ixDB::getSplitInfo()
{
    /*
     * Treat slash and underscore as lower case
     * alphabetics so that variable names and
     * pathnames are treated as indivisible words.
     */
    split = new WordSplit(FALSE, TRUE, TRUE);
    split->setClassification("_", WS_LOWER);
    split->setClassification("/", WS_LOWER);
    split->skipHexNumbers(TRUE);
    splitInfoDirty = FALSE;
}

void
ixDB::putSplitInfo()
{
    splitInfoDirty = FALSE;
}

DB* ixDB::getDB()				{ return db; }
u_short ixDB::getMinWordLength() const		{ return minWordLength; }
void ixDB::setMinWordLength(u_short l)		{ minWordLength = l; }
u_short ixDB::getMaxWordLength() const		{ return maxWordLength; }
void ixDB::setMaxWordLength(u_short l)		{ maxWordLength = l; }
SpellDict* ixDB::getDictionary() const		{ return dict; }
void ixDB::setDictionary(SpellDict* d)		{ dict = d; }
int ixDB::getTracing() const			{ return trace; }
void ixDB::setTracing(int l)			{ trace = l; }

u_int ixDB::getStoppedWords() const		{ return stoppedWords; }
u_int ixDB::getTooShortWords() const		{ return shortWords; }
u_int ixDB::getTooLongWords() const		{ return longWords; }
u_int ixDB::getTotalWords() const		{ return totalWords; }

void
ixDB::beginDocument()
{
    if (!stopListRead) {
	getStopList();
	stopListRead = TRUE;
    }
    wdict = new WeightDict;
    stoppedWords = 0;
    shortWords = 0;
    longWords = 0;
    totalWords = 0;
}

void
ixDB::indexStr(fxStr& s, u_int weight)
{
    int len = s.length();
    split->setString(s, len);			// prime splitting machinery
    char* w;
    while (w = split->nextWord(len)) {
	totalWords++;
	if (len < minWordLength) {
	    if (trace > 1)
		printf("%.*s TOO SHORT\n", len, w);
	    shortWords++;
	    continue;
	}
	if (dict)				// use dict to do stemming
	    dict->Strip(w, len);
	if (len > maxWordLength) {
	    if (trace > 1)
		printf("%.*s TOO LONG\n", len, w);
	    longWords++;
	    continue;
	}
	fxStr word(w, len);
	if (stopList->contains(word)) {
	    if (trace > 1)
		printf("%.*s STOPPED\n", len, w);
	    stoppedWords++;
	} else {
	    if (trace > 1)
		printf("COUNT %.*s (%d)\n", len, w, weight);
	    (*wdict)[word] += weight;
	}
    }
}

void
ixDB::endDocument(docid_t id)
{
    ixRec rec;
    rec.id = id;
    fxStackBuffer buf;
    for (WeightDictIter iter(*wdict); iter.notDone(); iter++) {
	buf.reset();
	const fxStr& w = iter.key();
	DBT key;
	key.data = (void*)(const char*) w; key.size = w.length();
	DBT cont;
	if ((*db->get)(db, &key, &cont, 0) == 0)
	    buf.put((const char*) cont.data, cont.size); // copy old information
	rec.w = iter.value();
	if (trace > 1)
	    printf("RECORD msg %u, word \"%.*s\" weight %u\n",
		rec.id, key.size, key.data, rec.w);
	buf.put((const char*) &rec, sizeof (rec));
	cont.data = (void*)(const char*) buf;	// XXX should be const
	cont.size = buf.getLength();
	if ((*db->put)(db, &key, &cont, 0) != 0)
	    error("Unable to store reference to \"%s\"", (const char*) w);
    }
    delete wdict;
}

fxBool
ixDB::Fetch(const char* w, DBT& cont)
{
    DBT key;
    key.data = (void*) w;		// XXX should be const
    key.size = strlen(w);
    return ((*db->get)(db, &key, &cont, 0) == 0);
}

fxBool
ixDB::Fetch(const fxStr& w, DBT& cont)
{
    DBT key;
    key.data = (void*)(const char*) w;	// XXX should be const
    key.size = w.length();
    return ((*db->get)(db, &key, &cont, 0) == 0);
}

fxBool
ixDB::Fetch(const char* w, size_t wlen, DBT& cont)
{
    DBT key;
    key.data = (void*) w;		// XXX should be const
    key.size = wlen;
    return ((*db->get)(db, &key, &cont, 0) == 0);
}

fxBool
ixDB::First(DBT& key, DBT& cont)
{
    return ((*db->seq)(db, &key, &cont, R_FIRST) == 0);
}

fxBool
ixDB::Next(DBT& key, DBT& cont)
{
    return ((*db->seq)(db, &key, &cont, R_NEXT) == 0);
}

fxBool
ixDB::Sync()
{
    if (stopListDirty)
	putStopList();
    if (splitInfoDirty)
	putSplitInfo();
    return ((*db->sync)(db, 0) ? TRUE : FALSE);
}

void
ixDB::vprintMsg(const char* fmt, va_list ap) const
{
    vfprintf(stderr, fmt, ap);
    fputs(".\n", stderr);
}

void
ixDB::error(const char* fmt ...) const
{
    fflush(stdout);
    va_list ap;
    va_start(ap, fmt);
    vprintMsg(fmt, ap);
    va_end(ap);
}
