#include "ReadWriteMLA.h"
#include "Sys.h"
#include <ctype.h>
extern "C" {
#include <sys/socket.h>
#include <netinet/in.h>
#include <netdb.h>		// XXX
}
#include "ixDB.h"

#define	NONAME	"(no name)"
#define	NOEMAIL	"(no email)"
#define	NOSUBJ	"(no subject)"

void
ReadWriteMLA::setupParsing()
{
    if (defAddr == "") {
	char hostname[64];
	(void) gethostname(hostname, sizeof (hostname));
	struct hostent* hp = gethostbyname(hostname);
	defAddr = hp ? hp->h_name : hostname;
    }
    time_t t = time(0);
    struct tm* tm = localtime(&t);
    char s[80];
    strftime(s, sizeof (s), "%Y", tm);
    thisyear = fxStr::format(":00 %.4s", (char*) s);
    strftime(s, sizeof (s), "%Z", tm);
    timezonestr = s;
    if (!ixdb && ixFile != "")
	(void) openIXDB(O_RDWR|O_CREAT);
}

fxBool
ReadWriteMLA::isRe(const char* cp)
{
    return ((cp[0] == 'r' || cp[0] == 'R')
	 && (cp[1] == 'e' || cp[1] == 'E')
	 && cp[2] == ':'
    );
}

#define MAXLINE	   1024

static inline fxBool
isSeparator(const char* cp, const fxStr& sep)
    { return (strncmp(cp, sep, sep.length()) == 0); }
static inline fxBool
isHeader(const char* a, const char* b, size_t n)
    { return (strncasecmp(a,b,n) == 0); }
#ifdef notdef
static inline fxBool
isHeader(const char* a, const char* b)
    { return (strcasecmp(a,b) == 0); }
#endif

void
ReadWriteMLA::nextMessage(FILE* fp, fxStr& date)
{
    char line[MAXLINE];
    while (fgets(line, sizeof (line), fp) != NULL) {
	lineno++;
	if (isSeparator(line, msgSeparator) && getfromdate(line, date))
	    break;
    }
}

fxBool
ReadWriteMLA::load(FILE* fp, fxBool skipToStart, fxBool processBody)
{
    char line[MAXLINE];
    fxStr fromdate;

    lineno = 0;

    if (skipToStart)
	nextMessage(fp, fromdate);		// discard to start of 1st msg

    mnum_t onum = nmsgs;
    mnum_t num = onum;
    while (!feof(fp)) {
	fxStr to;
	fxStr name(NONAME);
	fxStr emailaddr;
	fxStr replytoaddr;
	fxStr replytoname;
	fxStr date;
	fxStr deliverydate;
	fxStr msgid;
	fxStr subject(NOSUBJ);
	const char* inreply = nullstr;
	u_int startline = lineno;

	/*
	 * Parse RFC-822 headers.
	 */
	while (fgets(line, sizeof (line), fp) != NULL) {
	    lineno++;
	    if (isspace(line[0])) {
		if (line[0] == '\n')		// end of headers
		    break;
		continue;			// continuation line
	    }
	    const char* value = strchr(line, ':');
	    if (!value)
		continue;
	    for (value++; isspace(*value); value++)
		;
	    char* cp = strchr(line, '\n');
	    if (cp)
		*cp = '\0';
	    switch (tolower(line[0])) {
	    case 'a':
		if (isHeader(line, "Apparently-To:", 14))
		    to = value;
		break;
	    case 'd':
		if (isHeader(line, "Date:", 5))
		    date = value;
		else if (isHeader(line, "Delivery-Date:", 14))
		    deliverydate = value;
		break;
	    case 'f':
		if (isHeader(line, "From:", 5))
		    getname(value, name, emailaddr);
		break;
	    case 'm':
		if (isHeader(line, "Message-Id:", 11))
		    getid(value, msgid);
		break;
	    case 'n':
		if (isHeader(line, "Newsgroups:", 11) && to == "")
		    to = value;
		break;
	    case 's':
		if (isHeader(line, "Subject:", 8)) {
		    subject = value;
		    trimWS(subject);
		    if (strcasecmp(subject, "Re:") == 0)
			subject = "";
		}
		break;
	    case 'i':
		if (isHeader(line, "In-Reply-To:", 12)) {
		    // collect continuation lines
		    fxStr reply(value);
		    collectContinuation(fp, reply);
		    fxStr s1;
		    getreply(reply, s1);
		    inreply = hashstr(s1);
		}
		break;
	    case 'r':
		if (isHeader(line, "Reply-To:", 9))
		    getname(value, replytoname, replytoaddr);
		else if (inreply == nullstr && isHeader(line, "References:", 11)) {
		    fxStr reply(value);
		    collectContinuation(fp, reply);
		    fxStr s1;
		    getreply(reply, s1);
		    inreply = hashstr(s1);
		}
		break;
	    case 't':
		if (isHeader(line, "To:", 3)) {
		    to = value;
		    collectContinuation(fp, to);
		}
		break;
	    }
	}
	if (feof(fp))				// message must have a body
	    break;
	if (date == "" || msgid == "") {
	    warning("line %u: Missing required %s: header, discarding message",
		 lineno,
		 date == "" ? "Date" : "Message-ID");
	    nextMessage(fp, fromdate);
	    continue;
	}
	/*
	 * Fill in unspecified field values.
	 */
	if (replytoname != "" && name == "")
	    name = replytoname;
	if (replytoaddr == "")
	    replytoaddr = emailaddr;
	/*
	 * Append default address information if email
	 * addresses appear to be less than fully specified;
	 * e.g. sam -> sam@flake.asd.sgi.com.
	 */
	if (defAddr != "") {
	    if (emailaddr.next(0, "!@") == emailaddr.length())
		emailaddr.append("@" | defAddr);
	    if (replytoaddr.next(0, "!@") == replytoaddr.length())
		replytoaddr.append("@" | defAddr);
	}
	/*
	 * Verify space is available for this message
	 * before adding it to the database.
	 */
	size_t cc = 
	      name.length() + 1
	    + emailaddr.length() + 1
	    + replytoaddr.length() + 1
	    + to.length() + 1
	    + date.length() + 1
	    + subject.length() + 1
	    + msgid.length() + 1
	    ;
	if (nmsgs == maxmsgs || nthreads == maxthreads ||
	  nreplies == maxreplies || strspace+cc > maxstrspace)
	    grow();
	/*
	 * Create a new message and fill in
	 * the info from the parsed envelope.
	 */
	MailMsg& msg = *(MailMsg*)&table[nmsgs];
	msg.setup(num);
	if (fromdate != "")
	    msg.datetime = MailMsg::cvtFromDate(fromdate);
	else if (deliverydate != "")
	    msg.datetime = MailMsg::cvtDeliveryDate(deliverydate);
	else
	    msg.datetime = Sys::now();
	msg.to = hashstr(to);
	msg.name = hashstr(name);
	msg.emailaddr = hashstr(emailaddr);
	msg.replytoaddr = hashstr(replytoaddr);
	msg.date = hashstr(date);
	msg.msgid = hashstr(msgid);
	msg.subject = hashstr(subject);
	/*
	 * Collect body up to the next
	 * recognized message separator.
	 */
	fxStr body;
	while (fgets(line, MAXLINE, fp) != NULL) {
	    lineno++;
	    if (isSeparator(line, msgSeparator) && getfromdate(line, fromdate))
		break;
	    body.append(line);
	}
	body.cut(0, body.skip(0,'\n'));
	body.resize(body.skipR(body.length(), '\n')+1);
	/*
	 * Process parsed message.
	 */
	if (inreply == nullstr && isRe(msg.subject))
	    inreply = hashstr(stripre(msg.subject));
	if (ixdb) {
	    ixdb->beginDocument();
	    ixdb->indexStr(name,       10);
	    ixdb->indexStr(emailaddr,  10);
	    ixdb->indexStr(subject,    10);
	    fxStr bodyCopy(body);		// NB: must copy 'cuz modified
	    ixdb->indexStr(bodyCopy, 1);
	    if (trace) {
		u_int totWords = ixdb->getTotalWords();
		u_int goodWords = totWords -
		    (ixdb->getTooShortWords() + ixdb->getStoppedWords());
		printf("Msg %u, %u of %u words indexed (%u%%), %u too short, %u in stop list\n"
		    , msg.msgnum
		    , goodWords
		    , totWords
		    , 100*goodWords/totWords
		    , ixdb->getTooShortWords()
		    , ixdb->getStoppedWords()
		    );
	    }
	    ixdb->endDocument(msg.msgnum);
	}
	if (!writeArticle(msg, body, processBody))
	     return (FALSE);
	addMessage(msg, inreply, startline);
	num++, nmsgs++;
    }
    if (trace)
	printf("Loaded %d new messages.\n", num - onum);
    return (TRUE);
}

/*
 * Extract the date string from a UNIX-style From line.
 */
fxBool
ReadWriteMLA::getfromdate(const char* line, fxStr& date)
{
    static const char *days[7] =
	{ " Sun", " Mon", " Tue", " Wed", " Thu", " Fri", " Sat" }; 

    for (int i = 0; i < 7; i++) {
	const char* c = strstr(line, days[i]);
	if (c) {
	    date = c;
	    trimWS(date);
	    if (date.length() > 16 && date[16] != ':') {
		date.resize(16);
		date.append(thisyear);
	    }
	    date.append(" " | timezonestr);
	    return (TRUE);
	}
    }
    date = "";
    return (FALSE);
}

/*
 * Split From: line into user name and email address.
 */
void
ReadWriteMLA::getname(const char* value, fxStr& name, fxStr& email)
{
    fxStr s(value);
    u_int l;
    if ((l = s.next(0, '<')) < s.length()) {		// Joe Blow <joe@...>
	email = s.extract(l+1, s.next(l,'>')-(l+1));
	name = s.head(l);
    } else if ((l = s.next(0, '(')) < s.length()) {	// joe@.. (Joe Blow)
	name = s.extract(l+1, s.next(l, ')')-(l+1));
	email = s.head(l);
    } else {
	email = s;
	name = s;
    }
    trimWS(email), trimWS(name);
    if (email == "") {
	email = NOEMAIL;
	if (name == "")
	    name = NONAME;
    } else if (name == "")
	name = email;
}

/*
 * Extract the message ID from the Message-ID: header.
 */
void
ReadWriteMLA::getid(const char* value, fxStr& msgid)
{
    const char* c = strchr(value, '<');
    if (c) {
	msgid = c+1;
	u_int l = 0;
	while ((l = msgid.next(l, '>')) > 0 && msgid[l-1] == '\\')
	    l++;
	msgid.resize(l);
    } else
	msgid = "";
}

inline fxBool lineAfter(const char* c)
    { return (c = strchr(c+1, '\n')) && isspace(c[1]); }

static void
unre(fxStr& subject)
{
    if (strncasecmp(subject, "Re:", 3) == 0) {
	do
	    subject.remove(0, subject.skip(3,' '));
	while (strncasecmp(subject, "Re:", 3) == 0);
    }
}

/*
 * Extract the message ID or date, from the In-reply-to: header.
 */
void
ReadWriteMLA::getreply(const char* value, fxStr& reply)
{
    const char* c;
    /*
     * Beware of misinterpreting an email address as a
     * message id.  Some mailers use an In-Reply-To field
     * of the form:
     *
     * In-Reply-To: Alan Crosswell <alan@curta.cc.columbia.edu>
     *	     "Re: Various ..." (Aug 20, 16:43)
     *
     * Mailers that put the message id in the field always
     * put it on the last line (so far at least).
     */
    if ((c = strchr(value, '<')) && !lineAfter(c)) {	// message id
	getid(value, reply);
    } else if (c = strstr(value, "sage of ")) {		// date of reply
	reply = c+(*c == '"' ? 9 : 8);
	reply.resize(reply.next(0, ".f"));
	trimWS(reply);
    } else if (c = strstr(value, "dated: ")) {		// date of reply
	reply = c+7;
	reply.resize(reply.next(0, '.'));
	trimWS(reply);
    } else if (c = strchr(value,'\n')) {		// subject
	reply = c;
	reply.remove(0, reply.skip(reply.next(0, '"'), " \""));
	reply.resize(reply.skipR(reply.nextR(reply.length(), '"'), " \""));
	unre(reply);
    } else
	reply = "";
}

void
ReadWriteMLA::collectContinuation(FILE* fp, fxStr& s)
{
    char line[MAXLINE];

    for (;;) {
	int c = getc(fp);
	if (c == EOF)
	    break;
	if (!isspace(c)) {
	    ungetc(c, fp);
	    break;
	}
	line[0] = '\n';		// separate lines
	line[1] = c;
	(void) fgets(line+2, sizeof (line)-2, fp);
	lineno++;
	char* cp = strchr(line+1, '\n');
	if (cp)
	    *cp = '\0';
	s.append(line);
    }
}
