/*
 * Suffix manipulation class.
 *
 * This is a base class that provides suffix removal
 * and derivation support.  It depends on subclass
 * methods to do dictionary lookup and related processing.
 */
#include "Suffix.h"
#include <ctype.h>
#include <stdlib.h>

const int DLEV = 2;

const Suffix::suftab Suffix::suffixtab[] = {
    { "ssen",	 &Suffix::ily,		4,	"-y+iness",	"+ness"  },
    { "ssel",	 &Suffix::ily,		4,	"-y+i+less",	"+less"  },
    { "se",	 &Suffix::s,		1,	"",		"+s",
		 &Suffix::es,		2,	"-y+ies",	"+es"  },
    { "s'",	 &Suffix::s,		2,	"",		"+'s" },
    { "s",	 &Suffix::s,		1,	"",		"+s" },
    { "ecn",	 &Suffix::ncy,		1,	"",		"-t+ce" },
    { "ycn",	 &Suffix::ncy,		1,	"",		"-cy+t" },
    { "ytilb",	 &Suffix::nop,		0,	"",		"" },
    { "ytilib",	 &Suffix::bility,	5,	"-le+ility",	"" },
    { "elbaif",	 &Suffix::i_to_y,	4,	"-y+iable",	"" },
    { "elba",	 &Suffix::CCe,		4,	"-e+able",	"+able" },
    { "yti",	 &Suffix::CCe,		3,	"-e+ity",	"+ity" },
    { "ylb",	 &Suffix::y_to_e,	1,	"-e+y",		"" },
    { "yl",	 &Suffix::ily,		2,	"-y+ily",	"+ly" },
    { "laci",	 &Suffix::strip,	2,	"",		"+al" },
    { "latnem",	 &Suffix::strip,	2,	"",		"+al" },
    { "lanoi",	 &Suffix::strip,	2,	"",		"+al" },
    { "tnem",	 &Suffix::strip,	4,	"",		"+ment" },
    { "gni",	 &Suffix::CCe,		3,	"-e+ing",	"+ing" },
    { "reta",	 &Suffix::nop,		0,	"",		"" },
    { "re",	 &Suffix::strip,	1,	"",		"+r",
		 &Suffix::i_to_y,	2,	"-y+ier",	"+er" },
    { "de",	 &Suffix::strip,	1,	"",		"+d",
		 &Suffix::i_to_y,	2,	"-y+ied",	"+ed" },
    { "citsi",	 &Suffix::strip,	2,	"",		"+ic" },
    { "cihparg", &Suffix::i_to_y,	1,	"-y+ic",	"" },
    { "tse",	 &Suffix::strip,	2,	"",		"+st",
		 &Suffix::i_to_y,	3,	"-y+iest",	"+est" },
    { "cirtem",	 &Suffix::i_to_y,	1,	"-y+ic",	"" },
    { "yrtem",	 &Suffix::metry,	0,	"-ry+er",	"" },
    { "cigol",	 &Suffix::i_to_y,	1,	"-y+ic",	"" },
    { "tsigol",	 &Suffix::i_to_y,	2,	"-y+ist",	"" },
    { "tsi",	 &Suffix::VCe,		3,	"-e+ist",	"+ist" },
    { "msi",	 &Suffix::VCe,		3,	"-e+ism",	"+ist" },
    { "noitacif",&Suffix::i_to_y,	6,	"-y+ication",	"" },
    { "noitazi", &Suffix::ize,		5,	"-e+ation",	"" },
    { "rota",	 &Suffix::tion,		2,	"-e+or",	"" },
    { "noit",	 &Suffix::tion,		3,	"-e+ion",	"+ion" },
    { "naino",	 &Suffix::an,		3,	"",		"+ian" },
    { "na",	 &Suffix::an,		1,	"",		"+n" },
    { "evit",	 &Suffix::tion,		3,	"-e+ive",	"+ive" },
    { "ezi",	 &Suffix::CCe,		3,	"-e+ize",	"+ize" },
    { "pihs",	 &Suffix::strip,	4,	"",		"+ship" },
    { "dooh",	 &Suffix::ily,		4,	"-y+hood",	"+hood" },
    { "ekil",	 &Suffix::strip,	4,	"",		"+like" },
    { 0 },
};
const char* Suffix::prefixtab[] = {
    "anti",
    "bio",
    "dis",
    "electro",
    "en",
    "fore",
    "hyper",
    "intra",
    "inter",
    "iso",
    "kilo",
    "magneto",
    "meta",
    "micro",
    "milli",
    "mis",
    "mono",
    "multi",
    "non",
    "out",
    "over",
    "photo",
    "poly",
    "pre",
    "pseudo",
    "re",
    "semi",
    "stereo",
    "sub",
    "super",
    "thermo",
    "ultra",
    "under",	// NB: must precede un
    "un",
    0
};

Suffix::Suffix(int space)
{
    wspace = space;
    if (wspace == 0)
	wspace = 40;
    word = (char*) malloc(wspace);
}

Suffix::~Suffix()
{
    if (word)
	free(word);
}

void
Suffix::getword(char* w, int len)
{
    if (len > wspace/2) {
	char* cp = (char*) realloc(word, 2*len);
	if (cp) {
	    word = cp;
	    wspace = 2*len;
	}
    }
    ::memcpy(word, w, len);
}

fxBool
Suffix::Defined(char* w, int len)
{
    if (len == 0)
	len = strlen(w);
    getword(w, len);
    /* XXX handle upper->lower case conversion */
    return (putsuf(word + len, ".", 0) || suffix(word + len, 0));
}

void
Suffix::Expand(char* w, int len)
{
#ifdef notdef
    if (len == 0)
	len = strlen(w);
    getword(w, len);
#else
    (void) Defined(w, len);
#endif
}

void
Suffix::Strip(char* w, int& len)
{
    if (len == 0)
       len = strlen(w);
    getword(w, len);
    if (putsuf(word + len, ".", 0))
	return;
    ::memset(deriv, 0, sizeof (deriv));
    if (suffix(word + len, 0)) {
	const char* cp;
	for (int i = derivlevel; i > 0; i--) {
	    if (cp = deriv[i])
		while (*cp) {
		    if (*cp == '+')
			for (cp++; *cp && *cp != '-'; cp++)
			    len--;
		    else if (*cp == '-')
			for (cp++; *cp && *cp != '+'; cp++)
			    len++;
		}
	}
    }
}

fxBool
Suffix::suffix(char* ep, int lev)
{
    lev += DLEV;
    deriv[lev] = deriv[lev-1] = 0;
    const char *sp;
    for (const suftab* t =  suffixtab; sp = t->suf; t++) {
	char* cp = ep;
	/*
	 * Match the word's suffix against
	 * the suffix string in the table.
	 */
	while (*sp)
	    if (*--cp != *sp++)
		goto next;
	/*
	 * Backup to the previous vowel.
	 */
	for (sp = cp; --sp >= word && !vowel(*sp);)
	    ;
	if (sp < word)
	    return (FALSE);
	/*
	 * Try applying the transformation.
	 */
	if ((this->*t->p1)(ep - t->n1, t->d1, t->a1, lev+1))
	    return (TRUE);
	/*
	 * If there is a second transformation, try that.
	 */
	if (t->p2 != 0) {
	    deriv[lev] = deriv[lev+1] = 0;
	    return ((this->*t->p2)(ep - t->n2, t->d2, t->a2, lev));
	}
	return (FALSE);
next:
	;
    }
    return (FALSE);
}

fxBool
Suffix::nop(char*, char*, char*, int)
{
    return (FALSE);
}

fxBool
Suffix::strip(char* ep, char*, char* a, int lev)
{
    return (putsuf(ep, a, lev) || suffix(ep, lev));
}

fxBool
Suffix::s(char* ep, char* d, char* a, int lev)
{
    if (lev > DLEV+1)
	return (FALSE);
    if (*ep == 's' && ep[-1] == 's')
	return (FALSE);
    return (strip(ep, d, a, lev));
}

fxBool
Suffix::an(char* ep, char*, char* a, int lev)
{
    if (!isupper(word[0]))	/* must be proper name */
	return (FALSE);
    return (putsuf(ep, a, lev));
}

fxBool
Suffix::ize(char* ep, char* d, char*, int lev)
{
    *ep++ = 'e';
    return (strip(ep, "", d, lev));
}

fxBool
Suffix::y_to_e(char* ep, char* d, char*, int lev)
{
    *ep++ = 'e';
    return (strip(ep, "", d, lev));
}

fxBool
Suffix::ily(char* ep, char* d, char* a, int lev)
{
    if (ep[-1] == 'i')
	return (i_to_y(ep, d, a, lev));
    else
	return (strip(ep, d, a, lev));
}

fxBool
Suffix::ncy(char* ep, char* d, char* a, int lev)
{
    if (skipv(skipv(ep-1)) < word)
	return (FALSE);
    ep[-1] = 't';
    return (strip(ep, d, a, lev));
}

fxBool
Suffix::bility(char* ep, char* d, char* a, int lev)
{
    *ep++ = 'l';
    return(y_to_e(ep, d, a, lev));
}

fxBool
Suffix::i_to_y(char* ep, char* d, char* a, int lev)
{
    if (ep[-1]=='i') {
	ep[-1] = 'y';
	a = d;
    }
    return (strip(ep, "", a, lev));
}

fxBool
Suffix::es(char* ep, char* d, char* a, int lev)
{
    if (lev > DLEV)
	return (FALSE);
    switch (ep[-1]) {
    case 'i':
	return (i_to_y(ep, d, a, lev));
    case 's':
    case 'h':
    case 'z':
    case 'x':
	return (strip(ep, d, a, lev));
    }
    return (FALSE);
}

fxBool
Suffix::metry(char* ep, char* d, char* a, int lev)
{
    ep[-2] = 'e';
    ep[-1] = 'r';
    return (strip(ep, d, a, lev));
}

fxBool
Suffix::tion(char* ep, char* d, char* a, int lev)
{
    switch (ep[-2]) {
    case 'c':
    case 'r':
	return (putsuf(ep, a, lev));
    case 'a':
	return(y_to_e(ep, d, a, lev));
    }
    return (FALSE);
}

/* possible consonant-consonant-e ending */
fxBool
Suffix::CCe(char* ep, char* d, char* a, int lev)
{
    switch(ep[-1]) {
    case 'l':
	if (vowel(ep[-2]))
	    break;
	switch (ep[-2]) {
	case 'l':
	case 'r':
	case 'w':
	    break;
	default:
	    return(y_to_e(ep, d, a, lev));
	}
	break;
    case 's':
	if (ep[-2] == 's')
	    break;
    case 'c':
    case 'g':
	if (*ep == 'a')
	    return (FALSE);
    case 'v':
    case 'z':
	if (vowel(ep[-2]))
	    break;
	/* fall thru... */
    case 'u':
	if (y_to_e(ep, d, a, lev))
	    return (TRUE);
	if (!(ep[-2] == 'n' && ep[-1] == 'g'))
	    return (FALSE);
	break;
    }
    return (VCe(ep, d, a, lev));
}

/* possible consonant-vowel-consonant-e ending*/
fxBool
Suffix::VCe(char* ep, char* d, char* a, int lev)
{
    char c = ep[-1];

    if (c == 'e')
	return (FALSE);
    if (!vowel(c) && vowel(ep[-2])) {
	c = *ep;
	*ep++ = 'e';
	if (putsuf(ep, d, lev) || suffix(ep, lev))
	    return (TRUE);
	ep--;
	*ep = c;
    }
    return (strip(ep, d, a, lev));
}

const char*
Suffix::checkPrefix(char** wp, char* ep)
{
    for (const char** sp = prefixtab; *sp; sp++) {
	const char* cp;
	const char* bp = *wp;
	for (cp = *sp; *cp; cp++, bp++)
	    if (tolower(*bp) != *cp)
		goto next;
	for (cp = bp; cp < ep; cp++) 
	    if (vowel(*cp)) {
		*wp = (char*) bp;		// XXX
		return (*sp);
	    }
next:
	;
    }
    return (0);
}

fxBool
Suffix::putsuf(char* ep, char* a, int lev)
{
    char* bp;
    register char* pp;
    fxBool val = FALSE;
    char space[20];

    deriv[lev] = a;
    if (putw(word, ep, lev))
	return (TRUE);
    bp = word;
    pp = space;
    deriv[lev+1] = pp;
    const char* cp;
    while (cp = checkPrefix(&bp, ep)) {
	for (*pp++ = '+'; *pp = *cp++; pp++)
	    ;
	if (putw(bp, ep, lev+1)) {
	    val = TRUE;
	    break;
	}
    }
    deriv[lev+1] = deriv[lev+2] = 0;
    return (val);
}

fxBool
Suffix::putw(char* bp, char* ep, int lev)
{
    if (ep - bp <= 1)
	return (FALSE);
    if (vowel(*ep) && monosyl(bp, ep))
	return (FALSE);
    char duple[3];
    fxBool b = dictLookup(bp, ep-bp);
    if (!b && vowel(*ep) && ep[-1] == ep[-2] && monosyl(bp, ep-1)) {
	ep--;
	deriv[++lev] = duple;
	duple[0] = '+';
	duple[1] = *ep;
	duple[2] = 0;
	b = dictLookup(bp, ep-bp);
    }
    if (b)
	derivlevel = lev;
    return (b);
}

fxBool
Suffix::monosyl(char* bp, char* ep)
{
    if (ep < bp+2)
	return (FALSE);
    if (vowel(*--ep) || !vowel(*--ep) || ep[1] == 'x' || ep[1] == 'w')
	return (FALSE);
    while (--ep >= bp)
	if (vowel(*ep))
	    return (FALSE);
    return (TRUE);
}

char*
Suffix::skipv(char* s)
{
    if (s >= word && vowel(*s))
	s--;
    while (s >= word && !vowel(*s))
	s--;
    return (s);
}

fxBool
Suffix::vowel(int c)
{
    c = tolower(c);
    return (c == 'a' || c == 'e' || c == 'i' || c == 'o' || c == 'u' || c == 'y');
}

fxBool
Suffix::dictLookup(char*, int)
{
    return (FALSE);
}
