#include "WordSplit.h"
#include <string.h>
#include <ctype.h>

WordSplit::WordSplit(fxBool ah, fxBool mc, fxBool sn)
{
    ebuf = cp = 0;
    charMap = 0;
    breakAtHyphen = ah;
    mapCase(mc);
    skipNums = sn;
    skipHexadecimals = sn;
    ::memset(typeInfo, 0, sizeof (typeInfo));
    for (u_int i = 0; i < 256; i++) {
	u_char& ti = typeInfo[i];
	if (ispunct(i))
	    ti |= WS_PUNCT;
	if (isspace(i))
	    ti |= WS_SPACE;
	if (isdigit(i))
	    ti |= WS_DIGIT;
	if (isxdigit(i))
	    ti |= WS_HEXDIGIT;
	if (isupper(i))
	    ti |= WS_UPPER;
	if (islower(i))
	    ti |= WS_LOWER;
	if (isalpha(i))
	    ti |= WS_ALPHA;
	if (iscntrl(i))
	    ti |= WS_CNTRL;
    }
}

WordSplit::~WordSplit()
{
    if (charMap)
	delete charMap;
}

void
WordSplit::addClassification(char* cp, int bits)
{
    int c;
    while (c = *cp++)
	typeInfo[c] = bits;
}

void
WordSplit::clrClassification(int bits, char* cp)
{
    if (!cp) {
	for (int i = 0; i < 256; i++)
	    typeInfo[i] &= ~bits;
    } else {
	int c;
	while (c = *cp++)
	    typeInfo[c] &= ~bits;
    }
}

void
WordSplit::setClassification(char* cp, int bits)
{
    int c;
    while (c = *cp++)
	typeInfo[c] = bits;
}

void
WordSplit::initMap()
{
    if (!charMap) {
	charMap = new char[256];
	for (int i = 0; i < 256; i++)
	    charMap[i] = i;		// identity map
    }
}

void
WordSplit::setMapping(char *cp)
{
    initMap();
    int c;
    while (c = *cp++)
	charMap[c] = *cp++;
}

void
WordSplit::setString(char* buf, int len)
{
    if (len == 0)
	len = strlen(buf);
    cp = buf;
    ebuf = buf + len;
}

void
WordSplit::mapCase(fxBool b)
{
    if (mapUpperToLower = b) {
	initMap();
	for (int i = 0; i <= 'Z'-'A'; i++)
	    charMap['A'+i] = 'a'+i;
    } else if (charMap) {
	for (int i = 'A'; i <= 'Z'; i++)
	    charMap[i] = i;
    }
}

inline int WordSplit::isAscii(int c)
    { return (!(c &~ 0177)); }
inline int WordSplit::mapChar(int c)
    { return (charMap[c]); }
inline int WordSplit::isDigit(int c)
    { return (typeInfo[c] & WS_DIGIT); }
inline int WordSplit::isHexDigit(int c)
    { return (typeInfo[c] & (WS_DIGIT|WS_HEXDIGIT)); }
inline int WordSplit::isLeader(int c)
    { return (typeInfo[c] & (WS_SPACE|WS_PUNCT|WS_CNTRL)); }
inline int WordSplit::isBody(int c)
    { return (typeInfo[c] & (WS_DIGIT|WS_ALPHA)); }

/*
 * Split the next word from the input string.
 */
char*
WordSplit::nextWord(int& len)
{
    int cc;
    char* w;
    do {
	char* start = cp;
	/*
	 * Ignore leading non-ascii, spaces,
	 * control characters  & punctuation.
	 */
	while (cp < ebuf && (!isAscii(*cp) || isLeader(*cp)))
	    cp++;
	int digits = 0;
	int hexdigits = 0;
	w = cp;
	if (!skipNums) {
	    /*
	     * If we're also collecting numbers, check for
	     * leading minus sign or plus sign that we have
	     * skipped as punctuation.
	     */
	    if (cp > start && (cp[-1] == '-' || cp[-1] == '+')) {
		w--;
		digits++;		// count sign as digit
	    }
	}
	for (; cp < ebuf && isBody(*cp); cp++) {
	    if (isDigit(*cp))
		digits++;
	    else if (isHexDigit(*cp))
		hexdigits++;
	}
	/*
	 * Handle hyphenation:
	 * - there must be some leading non-numeric text
	 * - there must be some trailing text (that is not
	 *   a hyphen).
	 */
	cc = cp - w;
	allDigits = (cc == digits);
	if (!breakAtHyphen && cp < ebuf-1 && cp[0] == '-' && isBody(cp[1])) {
	    if (cc > 0 && !allDigits) {
		for (cp++; cp < ebuf && isBody(*cp); cp++)
		    if (isDigit(*cp))
			digits++;
		cc = cp - w;
		allDigits = (len = digits);
	    }
	}
	allHexDigits = (digits + hexdigits == cc);
	if (!allHexDigits && digits > 0 && cc >= 2) {
	    /*
	     * Special check for C-style hexadecimal numbers.
	     * They're difficult to handle with the
	     * typeInfo classifications because of the
	     * overlap with alphabetics.
	     */
	    switch (cc - (hexdigits + digits)) {
	    case 1:
		allHexDigits = (w[0] == '0' && tolower(w[1]) == 'x');
		break;
	    case 0:
		allHexDigits = TRUE;
		break;
	    }
	}
    } while (cc > 0 &&
	((allDigits && skipNums) || (allHexDigits && skipHexadecimals)));
    len = cc;
    if (cc > 0) {
	if (charMap)
	    for (cp = w; cc-- > 0; *cp++)
		*cp = charMap[*cp];
	return (w);
    } else
	return ((char*)0);
}
