/************************************************************************
 *                                                                      *
 *  Program:                    mail2HTML.c                             *
 *                                                                      *
 *----------------------------------------------------------------------*
 *  Description:  Convert Mail/News Files to HTML (Prototype)           *
 *                                                                      *
 *----------------------------------------------------------------------*
 *  Copyright (C) 1993  Basis Systeme netzwerk (BSn)                    *
 *                      Franz-Wolter Strasse 42                         *
 *                      D-8000 Munich 81                                *
 *                      Federal Republic of Germany                     *
 *                                                                      *
 *    Redistribution and use in source and binary forms are permitted   *
 *    provided that the above copyright notice and this paragraph are   *
 *    duplicated in  all such forms  and that  any documentation,       *
 *    advertising materials,  and other materials related to such       *
 *    distribution and use acknowledge that the software was developed  *
 *    by Basis Systeme netzwerk/Munich.                                 *
 *                                                                      *
 *    This is distributed in the hope that it will be useful, but       *
 *    WITHOUT ANY WARRANTY;  without even the implied warranty of       *
 *    MERCHANTABILITY  or  FITNESS FOR A PARTICULAR PURPOSE.            *
 *                                                                      *
 ************************************************************************/

#ifndef lint
static char     RCS_id[] = "$Header: /usr/export/home/edz/WWW/experimental/RCS/mail2html.c,v 0.02 1993/01/21 20:36:49 edz Exp edz $";

#endif
/*************
 * $Log: mail2html.c,v $
 * Revision 0.3  1993/01/21  20:36:49  edz
 * Prepare for distribution
 *
 * Revision 0.2  1993/01/21  08:10:49  edz
 * Removed Redundant address from Table of Contents structure
 * Changed the name of a few functions
 *
 * Revision 0.1  1993/01/10  20:25:49  edz
 * Initial revision
 *
 *************/
#define _MAIN_C
/************************************************************************
 * WARNING:                                                             *
 *    DON'T complain that this looks like a "one-off" hack! Why?        *
 *    'cause it is.                                                     *
 *    I don't seem to have any mail around here that breaks this        *
 *    program and it seems to work (and I hope follows RFC822) but      *
 *    I am sure that SOMEONE must have some mail that this very simple  *
 *    parser breaks (Greetings from Murphy).                            *
 *                                                                      *
 * NOTES:                                                               *
 *    You need an ANSI-C compiler and libraries to compile this.        *
 *                                                                      *
 *    (Q) Why did I not use Perl?                                       *
 *    (A) I have not figured out why EVERYONE else in using Perl        *
 *        for these kinds of programs.                                  *
 *    (Q) Why did I write this?                                         *
 *    (A) Mail like Usenet News is a very common information source     *
 *        that is already Hypertext (The file system, man pages and     *
 *        your Window systems are also Hypertext).                      *
 *        Before you even have a chance to ask. Hypertext is a MODEL    *
 *        of Representation not an implementation. The World Wide Web,  *
 *        SOFABED, HyTime, Xerox Notes, HyperCard, HyTelnet ... are     *
 *        all implementations of a model for hypertext. The programs    *
 *        www, viola, hytelnet, notes, ... are all user interfaces to   *
 *        the respective implementations.                               *
 *                                                                      *
 *        The often cited reasons for the success of the Gopher model   *
 *        in contrast to the W3 model: that W3 requires hypertext       *
 *        documents and that hypertext documents are rare is faulty.    *
 *        A html editor would be welcome but is not as important as     *
 *        the community belief has stipulated.                          *
 *        W3 represents a paradigm change.                              *
 *                                                                      *
 *        With  Man pages, Mail, News, native HTML, GNU Info,           *
 *        Hytelnet/MaxThink, Internet Resource files (FAQs),            *
 *        WAIS, Gopher (the current gateway is incomplete),             *
 *        Archie, Directory Assistance (DIXIE/X500), CSO phone          *
 *        books                                                         *
 *        ... there is more than enough!                                *
 *                                                                      *
 *        With some simple and transparents auto-tagers the whole       *
 *        Internet, Usenet and ... is the web. We hope to have a man    *
 *        page autotagger done in the near future (the current crop     *
 *        --- at least the ones I know--- are NOT autotaggers).         *
 *                                                                      *
 *    This program is only a technology test. We hope to have a         *
 *    a mail transport done is the very near future.                    *
 *    We are currently experimenting with several user authenitication  *
 *    schemes (a'la POP).                                               *
 *                                                                      *
 *    If you use this TEST-code we would like to hear from you.         *
 *                                                                      *
 * TODO:                                                                *
 *    (i)   Add Support for MIME (RFC1341)                              *
 *    (ii)  Add Support for Configuration (RRC1343)                     *
 *    (ii)  Add Support for Multinational headers (RFC1342)             *
 *    (iii) Add X.400 Support                                           *
 *    (iv)  Use a more intellegent message ID algorithm. The current    *
 *          implementation requires that the folder that contains the   *
 *          reference was at one time accessed, viz. that a delivery    *
 *          or cron process ran a bogus parse. Given the other info     *
 *          available (eg. time) one could narrow the search.           *
 *    (v)   Use ndbm/gdbm instead of stupid ASCII file list/Remove      *
 *          ID Duplication.                                             *
 *    (vi)  Fold into WWW Daemon (see Gopher), eg.                      *
 *          WWW/Mail/mbox  returns the table of Contents                *
 *          references have names like "/MessageID/AA04187@BSNGATE"     *
 *    (vii) Fold in state system transport (currently its stateless)    *
 ************************************************************************/
/*-
 *  /MessageID/<MAIL-ID> returns the .html file corresponding to <MAIL-ID>
 *
 */
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <sys/stat.h>
#include "Entities.h"
#include "Ctypes.h"
#include "Ctypes.c"

#ifndef MAXPATHLEN
# ifdef _MAX_PATH
#  define MAXPATHLEN _MAX_PATH
# else
#  define MAXPATHLEN 1024
# endif
#endif

/*----------------------- User Configuration ----------------------------- */
/* The file below must be readable and writeable */
#define MESSAGE_INDEX_FILE  "/var/adm/MESSAGES.INDEX"
#define CONTENTS_EXTENSION  ".MAIL_BODIES"
#define DIRECTORY_EXTENSION ".TABLE_OF_CONTENTS"

/*---------------------- End User Configuration -------------------------- */

#ifndef TRUE
# define TRUE  1
# define FALSE 0
#endif

/* Exit Codes */
#define E_ENOENT    02, "Sorry Document is not available or access is restricted"
#define E_USAGE     64, "Incorrect Document request"
#define E_NOINPUT   66, "Cannot open input"
#define E_IOERR     74	"input/output error"
#define E_SOFTWARE  70, "internal software error"
#define E_NOTEMP    75, "INTERNAL ERROR: Can't create a file!"

static const char IndexFile[] = MESSAGE_INDEX_FILE;
static const char body_ext[] = CONTENTS_EXTENSION;
static const char HEX[] = "0123456789ABCDEF";

#ifdef _MSC_VER			/* The Microsoft compiler (Xenix/OS2/NT) */
# define strncasecomp strnicmp
#else

/* Case INDEPENDENT version of strncmp() */
static int
strncasecomp(const char *str1, const char *str2, size_t n)
{
	const char     *p = str1;
	const char     *q = str2;
	int             diff;

	for (p = str1, q = str2;; p++, q++) {
		if (p == str1 + n)
			return 0;
		if (*p == '\0' || *q == '\0')
			return *p - *q;
		if ((diff = tolower(*p) - tolower(*q)) != 0)
			return diff;
	}
	/* NOTREACHED */
}

#endif

#ifdef NEED_STRDUP
static char    *
strdup(const char *str)
{
	char           *tcp;

	if ((tcp = (char *) malloc(strlen(str) + 1)) != NULL)
		strcpy(tcp, str);
	return tcp;
}
#endif


/* Structure to Build a table of contents */
typedef struct _contents {
	char           *anchor;	/* Reference (HREF="<anchor>")   */
	char           *subject;/* Name of anchor                */
	char           *author;	/* Full Name of Author           */
	int             isNews;	/* 1 ==> News else Mail          */
	char           *group;	/* Newgroup (NULL if Mail)       */
	long            start;	/* start of message (offset)     */
	struct _contents *next;	/* Next element in linked list   */
}               contents_t;

/* Add an anchor to the table of contents */
void
AddMessage(contents_t ** Contents, long start, int isNews, const char *anchor,
	   const char *subject, const char *author, const char *group)
{
	contents_t     *tp;

	/* Build a table of contents in reverse order */
	if ((tp = (contents_t *) malloc(sizeof(contents_t))) != NULL) {
		tp -> start = start;
		tp -> isNews = isNews;
		tp -> anchor = strdup(anchor);
		tp -> subject = strdup(subject);
		tp -> author = strdup(author);
		tp -> group = (isNews ? strdup(group) : NULL);
		tp -> next = *Contents;
		*Contents = tp;
	}
}

/*
 * This is a quick hack to speed up searching the MESSAGES.INDEX file
 * for the correct entry
 */
char
HASH(const unsigned char *name)
{
	unsigned short  hash = 0;
	int             i;

	for (i = 0; name[i]; i++)
		hash += (((short)TOISO(name[i])) << (1 + (i % 8))) + name[i];
	return (char) ((hash % 225) + 30);
}

/* Encode reference as HTML compliant */
static char    *
EncodeAnchor(char *buf, const unsigned char *anchor, int case_sensitive)
{
	char           *tp1 = buf;
	const char     *tp2;
	unsigned char   ch;

	/* Note RFC822 specifies 7-bit headers (Message IDs are 6 bit) */
	/* Replace non acceptable chars (# and %) and make uppercase */
	for (tp2 = anchor; (ch = *tp2) != '\0'; tp2++)
		if (!ISPATH(ch) || ch == '#' || ch == '%') {
			*tp1++ = '%';
			*tp1++ = HEX[(TOISO(ch) & '\377') >> 4];
			*tp1++ = HEX[(TOISO(ch) & '\377') % 16];
		} else
			*tp1++ = (case_sensitive ? (char)ch : (char)TOUPPER(ch));
	*tp1 = '\0';
	return (buf);
}

/* Append dictionary to parsed message id list */
void
DumpDictionary(contents_t * Contents, const char *filename, long end)
{
	if (Contents) {
		contents_t     *tp;
		FILE           *fp;
		char            path[MAXPATHLEN + 256];

		if ((fp = fopen(IndexFile, "a")) == NULL)
			return;

		EncodeAnchor(path, filename, TRUE);
		for (tp = Contents; tp != NULL; tp = tp -> next) {
			fprintf(fp, "%c%s\t%s%s\t%ld-%ld\tFrom %s: %s\n",
				HASH(tp -> anchor),
				tp -> anchor,
				path, body_ext,
				tp -> start, end,
				tp -> author, tp -> subject);
			end = tp -> start;
		}
	}
}


/* Print the Table of Contents */
void
PrintContents(contents_t * Contents, FILE * fp)
{
	if (Contents != NULL) {
		contents_t     *tp;

		fprintf(fp, "<!-- Table of Contents for this file (reverse order) -->\n");
		fprintf(fp, "<H1>Table of Contents</H1>\n<DL>\n");
		for (tp = Contents; tp != NULL; tp = tp -> next) {
			fprintf(fp, "<DT><A HREF=\"/MessageID/%s\">", tp -> anchor);
			if (tp -> isNews)
				fprintf(fp, "%s in %s", tp -> author, tp -> group);
			else
				fprintf(fp, "%s from %s", "Mail", tp -> author);
			fprintf(fp, "</A><DD>%s\n", tp -> subject);
		}
		fprintf(fp, "</DL><P>\n\n");
	}
}

/* Strip trailing white space */
char           *
StripTail(char *line)
{
	char           *tcp = line + strlen(line) - 1;

	while (*tcp == '\r' || *tcp == '\n' || *tcp == ' ' || *tcp == '\t')
		*tcp-- = '\0';
	return line;
}

/* Strip trailing white space and move to first non-white character */
static char    *
HTStrip(char *line)
{
	char           *tcp;

	for (tcp = StripTail(line); ISWHITE(*tcp); tcp++)
		 /* loop */ ;
	return tcp;
}


/* Rewind input and copy to output stream */
static void
CatStream(FILE * infp, FILE * outfp)
{
	register int    ch;

	if (infp) {
		fflush(infp);
		rewind(infp);
		while ((ch = getc(infp)) != EOF)
			putc((char) ch, outfp);
	}
}

/* Decode HTML reference */
static char    *
DecodeAnchor(char *buf, const unsigned char *anchor)
{
	char           *tp1 = buf;
	const char     *tp2 = anchor;

	while (*tp2) {
		if (*tp2 == '%') {
			char           *tcp;
			unsigned        ch = 0;

			if ((tcp = strchr(HEX, *++tp2)) != NULL)
				ch = (tcp - HEX) << 4;
			if ((tcp = strchr(HEX, *++tp2)) != NULL)
				ch += tcp - HEX;
			*tp1 = (char)ch;
			tp2++;
		} else
			*tp1++ = *tp2++;
	}
	*tp1 = '\0';
	return buf;
}


/* In "XXX <YYY> ZZZZ" return "YYY" */
static char    *
MessageKey(char *buf, char *line)
{
	char           *tp1;
	char           *tp2;

	if ((tp1 = strchr(line, '<')) != NULL) {
		if ((tp2 = strchr(++tp1, '>')) != NULL)
			*tp2 = '\0';
	} else
		tp1 = line;
	/* Message Keys are CASE INSENSITIVE */
	return EncodeAnchor(buf, HTStrip(tp1), FALSE);
}

/*-
 * Find Author's name in mail address
 * In "XXX (YYY)" or YYY <XXX>" return "YYY"
 * Find Author's address in mail address
 * In "XXX (YYY)" or YYY <XXX>" return "XXX"
 */
static char    *
NameKey(char *buf, const char *key, int author)
{
	char           *s, *e;
	char            email[256];
	char            p1, p2, b1, b2;

	if (author) {
		p1 = '('; p2 = ')';
		b1 = '<'; b2 = '>';
	} else {
		p1 = '<'; p2 = '>';
		b1 = '('; b2 = ')';
	}

	strcpy(email, key);
	if (((s = strchr(email, p1)) != NULL) && ((e = strchr(email, p2)) != NULL)) {
		if (e > s) {
			*e = '\0';	/* Chop off everything after p2 (')' or '>') */
			strcpy(email, s + 1);
		}
	} else if (((s = strchr(email, b1)) != NULL) && ((e = strchr(email, b2)) != NULL)) {
		if (e > s)
			strcpy(s, e + 1);	/* Remove <...> or (...) */
	}
	strcpy(buf, HTStrip(email));	/* Remove leading and trailing spaces */
	return buf;
}



/*
 * This Function returns a static storage area, it is the duty of the caller
 * to save it.
 */
static char    *
Anchor(char *line)
{
	char           *tp1;
	char           *tp2 = line;
	static char     tmp[BUFSIZ];

	tmp[0] = '\0';
	while (tp2 != NULL && (tp1 = strchr(tp2, '<')) != NULL) {
		if (tp1 > tp2) {
			*tp1 = '\0';
			strcat(tmp, tp2);
		}
		if ((tp2 = strchr(++tp1, '>')) != NULL)
			*tp2++ = '\0';
		tp1 = HTStrip(tp1);
		strcat(tmp, "<A HREF=\"/MessageID/");
		EncodeAnchor(tmp + strlen(tmp), tp1, FALSE);
		sprintf(tmp + strlen(tmp), "\">%s</A>", tp1);
	}			/* while */
	if (tp2 != NULL)
		strcat(tmp, tp2);
	return tmp;
}

/*-
 * Send out a line of the message body.
 * (1) Use Latin-1 public entities
 * (2) Translate URLs, eg. ftp://site:port/path to:
 *       <A HREF="ftp/site:port/path">ftp/site:port/path</A>
 * (3) Markup VT100 Style underlined text (eg. man) as
 *     Strong.
 * We don't confirm that the protocol is valid (registered)
 * --- would be very simple but the list is growing too fast
 * (file, ftp, http, wais, gopher, prospero, ... )
 */
static char *
BodyLine(char *outbuf, unsigned char *line, int nl)
{
	unsigned char  *tcp = outbuf;
	unsigned char  *tp = line;
	unsigned char   ch;

	while (( ch = *tp) != '\0')
		if (ch == '_' && *(tp+1) == '\b') {
			char            buf[256];
			char            tmp[256];
			char           *ptr = buf;

			/* VT100 Underlined text */
			do {
				tp += 2; /* Skip _<Ctrl-H> */
				*ptr++ = *tp++;
			} while (*tp == '_' && *(tp+1) == '\b');
			*ptr = '\0';

			ptr = BodyLine(tmp, buf, FALSE); /* Fixup chars */
			/* Underlined Text is marked strong */
			strcpy(tcp, "<strong>");
			strcat(tcp, ptr);
			strcat(tcp, "</strong>");
			tcp += strlen(tcp);

		} else if ((ch == ':') && (*(tp+1) == '/') && (*(tp+2) == '/') &&
			   (tp >= &line[2]) && ISALPHA(*(tp-1))) {
			unsigned char  *tp2;

			/* Saw a URL Magic Back up */
			do {
				--tp; --tcp;
			} while (ISALPHA(*tp) && tp >= line);
			if (!ISALPHA(*tp)) {
				tp++; tcp++;
			}

			/* Insert Anchor */
			for (tp2 = tp; ISPATH(*tp2); tp2++)
				 /* loop */ ;
			if (( *(tcp-1) == '"' || *(tcp-1) == '\'') && *tp2 == *(tp-1)) {
				/* quoted arguments */
				*tp2++ = '\0';	/* ASCIIZ */
				sprintf(--tcp, "<A HREF=\"%s\">%s%c</A>", tp, tp - 1, *(tp-1));
			} else {
				char            ch;

				ch = *tp2;	/* Save character */
				*tp2 = '\0';	/* ASCIIZ */
				sprintf(tcp, "<A HREF=\"%s\">%s</A>", tp, tp);
				*tp2 = ch;	/* Replace character */
			}
			tp = tp2;	/* Set pointer to tail */
			tcp += strlen(tcp);	/* Go to tail */
		} else if (Markups[ch].len) {
			memcpy(tcp, Markups[ch].entity, Markups[ch].len);
			tcp += Markups[ch].len;
			tp++;
		} else if (!ISASCII(ch) || (ISCNTRL(ch) && !ISWHITE(ch))) {
			*tcp++ = '&';
			*tcp++ = '#';
			*tcp++ = (unsigned char) ((TOISO(ch) / 100) + '0');
			*tcp++ = (unsigned char) ((TOISO(ch) % 100) / 10 + '0');
			*tcp++ = (unsigned char) ((TOISO(ch) % 10) + '0');
			*tcp++ = ';';
			tp++;
		} else
			*tcp++ = *tp++;
	if (nl) *tcp++ = '\n';
	*tcp = '\0';
	return outbuf;
}

/* Put a line of the Body */
static void
PutBodyLine(unsigned char *line, FILE * outfp)
{
	unsigned char   outbuf[BUFSIZ];

	fputs(BodyLine(outbuf, line, TRUE), outfp);
}

/* Headers should be 7 bit */
static char *
HeaderLine(char *outbuf, unsigned char *line)
{
	/* For now pretend we have 8 bit headers (latter RFC1342) */
	return BodyLine(outbuf, line, FALSE);
}

/*-
 * IsMailFromLine - Is this a legal unix mail "From " line?
 *
 * Given a line of input will check to see if it matches the standard
 * unix mail "from " header format. Returns 0 if it does and <0 if not.
 *
 * 2 - Very strict, also checks that each field contains a legal value.
 *
 * Assumptions: Not having the definitive unix mailbox reference I have
 * assumed that unix mailbox headers follow this format:
 *
 * From <person> <date> <garbage>
 *
 * Where <person> is the address of the sender, being an ordinary
 * string with no white space imbedded in it, and <date> is the date of
 * posting, in ctime(3C) format.
 *
 * This would, on the face of it, seem valid. I (Bernd) have yet to find a
 * unix mailbox header which doesn't follow this format.
 *
 * From: Bernd Wechner (bernd@bhpcpd.kembla.oz.au)
 * Obfuscated by: KFS (as usual)
 */

static int
IsMailFromLine(char *line)
{
#define MAX_FIELDS 10
	char           *fields[MAX_FIELDS];
	char           *sender_tail;
	register char  *lp, **fp;
	register int    n, i;
	const char      legal_day[] = "SunMonTueWedThuFriSat";
	const char      legal_month[] = "JanFebMarAprMayJunJulAugSepOctNovDec";
	const int       legal_numbers[] = {1, 31, 0, 23, 0, 59, 0, 60, 1969, 2199};

	if (strncmp(line, "From ", 5))   return -100;

	lp = line + 5;
	/* sender day mon dd hh:mm:ss year */
	for (n = 0, fp = fields; n < MAX_FIELDS; n++) {
		while (*lp && *lp != '\n' && ISASCII(*lp) && ISWHITE(*lp))  lp++;
		if (*lp == '\0' || *lp == '\n')  break;
		*fp++ = lp;
		while (*lp && ISASCII(*lp) && !ISWHITE(*lp))
			if (*lp++ == ':' && (n == 4 || n == 5))   break;
		if (n == 0) sender_tail = lp;
	}

	if (n < 8)  return -200 - n;

	fp = fields;

	if (n > 8 && !ISNUM(fp[7][0]))   fp[7] = fp[8];	/* ... TZ year */
	if (n > 9 && !ISNUM(fp[7][0]))   fp[7] = fp[9];	/* ... TZ DST year */

	fp++;
	for (i = 0; i < 21; i += 3)
		if (strncmp(*fp, &legal_day[i], 3) == 0)  break;
	if (i == 21)   return -1;

	fp++;
	for (i = 0; i < 36; i += 3)
		if (strncmp(*fp, &legal_month[i], 3) == 0)
			break;
	if (i == 36)   return -2;

	for (i = 0; i < 10; i += 2) {
		lp = *++fp;
		if (!ISNUM(*lp))  return -20 - i;
		n = atoi(lp);
		if (n < legal_numbers[i] || legal_numbers[i + 1] < n) return -10 - i;
	}
	return 0;
}

/*-
 * Start of News:
 * "Article <Number> of <Newsgroup>:"
 */
static int
IsNewsLine(char *line, int *article, char **group)
{
	int             i;

	if (strncmp(line, "Article ", 8)) return -500;
	line += 8;
	/* Skip white space */
	while (ISWHITE(*line))  line++;

	if (!ISNUM(*line))    return -400;
	i = atoi(line);
	/* skip number data */
	while (ISNUM(*line))  line++;

	if (!ISWHITE(*line))    return -300;
	/* Skip white space */
	while (ISWHITE(*line))  line++;

	if (line[0] != 'o' || line[1] != 'f')  return -200;
	/* Skip the of */
	line += 2;

	if (!ISWHITE(*line))    return -100;
	/* Skip white space */
	while (ISWHITE(*line))  line++;

	if (*line == '\0') return -10;	/* Missing Group */

	/* OK, if was "Article NNN of XXX.XXX.XXXXX:" */
	{
		static char     grp[60];
		char            tmp[126];
		size_t          tail;

		strncpy(grp, HeaderLine(tmp, line), sizeof(grp) - 1);
		grp[sizeof(grp)] = '\0';
		/* Strip trailing ':' if it has one */
		if (grp[tail = strlen(grp) - 1] == ':')
			grp[tail] = '\0';
		if (group)     *group = grp;
		if (article)   *article = i;
	}
	return 0;
}

/* Structure to store the header information (envelope) */
typedef struct {
	char            cc[128];            /* cc:                  */
	char            bcc[128];           /* bcc:                 */
	char            from[64];           /* Reply-To: or From:   */
	char            address[64];
	char            subject[128];       /* Subject:             */
	char            date[40];           /* Date:                */
	char            id[80];             /* Message-ID:          */
	char            keywords[256];      /* Keywords:            */
	char            organization[80];   /* Organization:        */
	char            followup[126];      /* Followup-To:         */
	char            newsgroups[BUFSIZ]; /* Newgroups:           */
	char            xrefs[BUFSIZ / 2];	/* In-Reply-To:         */
	char            refs[BUFSIZ];       /* References:          */
}               envelope_t;

static char    *
ReadHeaderLine(char *buf, size_t len, FILE *infp)
{
	char           *tcp;
	int             ahead; /* lookahead token */

	if ((tcp = fgets(buf, len, infp)) != NULL) {
		/* Check if continuation line */
		while ((ahead = fgetc(infp)) == '\t' || ahead == ' ') {
			char            tmp[256];

			if (fgets(tmp, sizeof(tmp), infp) != NULL) {
				tcp = StripTail(buf);
				strcat(tcp, " ");
				strcat(tcp, tmp);
			}
		}		/* while */
		ungetc(ahead, infp);		/* push back */
	}
	return StripTail(tcp);
}

static int
ParseRFC822Header(envelope_t * envelope, FILE * infp)
{
	char            tmp[BUFSIZ];
	char           *tcp;
	int             mime = 0;

	memset(envelope, 0, sizeof(envelope_t));

	/* Read the header bits */
	/* Everything after first null line is message body */
	while ((tcp = ReadHeaderLine(tmp, sizeof(tmp), infp)) != NULL && *tcp) {
		/* TAGS in RFC-822 Header */
		switch (*tcp++) {
			case 'b': case 'B':	/* possible bcc: */
				if (strncasecomp("cc: ", tcp, 4) == 0)
					HeaderLine(envelope -> bcc, tcp + 4);
				break;
			case 'c': case 'C':	/* possible cc: or Content-<*>: */
				/* MIME NOT YET SUPPORTED */
				if (strncasecomp("ontent-", tcp, 7) == 0)
					mime++;
				else if (strncasecomp("c: ", tcp, 3) == 0)
					HeaderLine(envelope -> cc, tcp + 3);
				break;
			case 'd': case 'D':	/* possible Date: */
				if (strncasecomp("ate: ", tcp, 5) == 0)
					HeaderLine(envelope -> date, tcp + 5);
				break;
			case 'f': case 'F':	/* possible From: or Followup-To: */
				/* "Reply-to" SUPERSEDES the "From" field */
				if (*(envelope -> from) == '\0' && strncasecomp("rom: ", tcp, 5) == 0) {
					NameKey(envelope -> address, tcp + 5, FALSE);
					NameKey(envelope -> from, tcp + 5, TRUE);
				} else if (strncasecomp("ollowup-To: ", tcp, 12) == 0)
					strcpy(envelope -> followup, tcp + 12);
				break;
			case 'i': case 'I':	/* possible In-Reply-To: */
				if (strncasecomp("n-Reply-To: ", tcp, 12) == 0)
					strcpy(envelope -> xrefs, tcp + 12);
				break;
			case 'k': case 'K':	/* possible Keywords: */
				if (strncasecomp("eywords: ", tcp, 9) == 0)
					HeaderLine(envelope -> keywords, tcp + 9);
				break;
			case 'm': case 'M':	/* possible Message-ID: or MIME-Version: */
				if (strncasecomp("essage-ID: ", tcp, 11) == 0)
					MessageKey(envelope -> id, tcp + 11);
				else if (strncasecomp("IME-Version: ", tcp, 13) == 0)
					mime++;
				break;
			case 'n': case 'N':	/* possible Newsgroups: */
				if (strncasecomp("ewsgroups: ", tcp, 11) == 0)
					HeaderLine(envelope -> newsgroups, tcp + 11);
				break;
			case 'o': case 'O':	/* possible Organization: */
				if (strncasecomp("rganization: ", tcp, 13) == 0)
					HeaderLine(envelope -> organization, tcp + 13);
				break;
			case 'r': case 'R':	/* possible Reply-To: or References: */
				if (strncasecomp("eply-To: ", tcp, 9) == 0) {
					NameKey(envelope -> address, tcp + 9, FALSE);
					NameKey(envelope -> from, tcp + 9, TRUE);
				} else if (strncasecomp("eferences: ", tcp, 11) == 0)
					strcpy(envelope -> refs, tcp + 11);
				break;
			case 's': case 'S':	/* possible Subject: or Sender: */
				if (strncasecomp("ubject: ", tcp, 8) == 0)
					HeaderLine(envelope -> subject, tcp + 8);
				break;
		}		/* switch */
	}

	if (*(envelope -> id) == '\0') {
		static int      count = 1;

		/* generate psuedo ID */
		sprintf(envelope -> id, "FAKE%05o", count++);
	}
	if (*(envelope -> subject) == '\0')
		strcpy(envelope -> subject, "No Subject");
	return mime;
}

static void
LocateAnchors(contents_t ** Contents, int isNews, char *group, FILE * infp, FILE * outfp)
{
	int             mime;
	envelope_t      envelope;
	static const char item[] = "<DT>%s:<DD>%s\n";

#define xItem(s, n, v)  if (s) fprintf(outfp, item, n, v);
#define Item(n, v)      xItem(*(v), (n), (v))

	mime = ParseRFC822Header(&envelope, infp);

	/* Add Message To Table of Contents */
	AddMessage(Contents
		   ,ftell(outfp)
		   ,isNews
		   ,envelope.id
		   ,envelope.subject
		   ,envelope.from[0] ? envelope.from : "Annonymous"
		   ,group);

	/* Print Header */
	fprintf(outfp, "<!-- Header -->\n<DL>\n");;
	Item("Subject", envelope.subject);
	Item("From", envelope.from);
	if (envelope.address[0])
		fprintf(outfp, "<DT>Reply to:<DD><ADDRESS>%s</ADDRESS>\n", envelope.address);
	Item("Organization", envelope.organization);
	Item("Date", envelope.date);
	xItem(envelope.xrefs[0], "In-Reply-To", Anchor(envelope.xrefs));
	xItem(envelope.refs[0], "References", Anchor(envelope.refs));
	Item("Followup-To", envelope.followup);
	Item("cc", envelope.cc);
	Item("bcc", envelope.bcc);
	Item("Keywords", envelope.keywords);
	if (isNews)
		fprintf(outfp, "<DT>Usenet %s:<DD>Article %d\n", group, isNews);
	xItem((envelope.newsgroups[0]) && (!isNews || strcmp(group, envelope.newsgroups)),
	      "Cross Posted Newsgroups", envelope.newsgroups);
	fprintf(outfp, "</DL>\n");
	/* If we saw a MIME Header send out a warning message */
	if (mime)
		fprintf(outfp, "\
<P><STRONG>WARNING: The message contained a MIME header (NOT YET Supported)</STRONG><P>\n");
#undef Item
#undef xItem
}

int
ParseMail(contents_t ** Contents, FILE * infp, FILE * outfp)
{
	char            tmp[BUFSIZ];
	int             count = 0;
	int             lines = 0;
	int             isNews;
	char           *group;

	while (fgets(tmp, sizeof(tmp), infp) != NULL) {
		StripTail(tmp);
		if ((isNews = IsMailFromLine(tmp)) == 0 || IsNewsLine(tmp, &isNews, &group) == 0) {
			lines = 0;
			/* Mail header */
			if (count++)
				fprintf(outfp, "</PRE>\n");
			LocateAnchors(Contents, isNews, group, infp, outfp);
		} else {
			if (lines++ == 0)
				fprintf(outfp, "<PRE>\n\n");
			PutBodyLine(tmp, outfp);	/* Body */
		}
	}			/* while */
	if (lines) fprintf(outfp, "</PRE>");
	return count;
}

static void
Fatal(int code, const char *message)
{
	/* Send the error message to stdout */
	printf("<TITLE>Mail Server Error Message</TITLE>\n\
<PLAINTEXT>\n\n%s\n\n", message);
	exit(code);
}

static const char *
basename(char *string)
{
	const char     *tcp;

	for (tcp = string + strlen(string); *tcp != '/' && tcp > string; tcp--)
		 /* loop */ ;
	return (*tcp == '/') ? ++tcp : tcp;
}

static int
SendDocument(const char *title, const char *filename, long start, long end)
{
	long            length = end - start;
	int             ch;
	FILE           *fp;

	if ((fp = fopen(filename, "r")) == NULL)
		return -1;

	if (fseek(fp, start, 0) == -1)
		return -1;	/* seek error */

	/* Produce HTML Document */
	fputs("<HTML>\n<HEAD>\n<TITLE>", stdout);
	fputs(title, stdout);
	fputs("</TITLE>\n</HEAD>\n<BODY>\n", stdout);
	fputs("<!-- Message Body Follows -->\n", stdout);
	while ((ch = getc(fp)) != EOF && length--)
		putc((char) ch, stdout);
	fputs("</BODY></HTML>\n", stdout);
	return 0;
}

int
FetchMessage(const char *name)
{
	FILE           *fp;
	char            tmp[BUFSIZ];
	const char      hash = HASH(name);
	int             result = -1;

	if ((fp = fopen(IndexFile, "r")) != NULL)
		while (fgets(tmp, sizeof(tmp), fp) != NULL)
			if (tmp[0] == hash) {
				char           *anchor;
				char           *filename;
				char           *title;
				char           *range;

				anchor = strtok(tmp+1, "\t");
				filename = strtok(NULL, "\t");
				range = strtok(NULL, "\t");
				title = strtok(NULL, "\n");
				if (anchor && filename && range && strcmp(anchor, name) == 0) {
					long            start, end;

					if (sscanf(range, "%ld-%ld", &start, &end) != 2) continue;
					result = SendDocument(
							title ? title : anchor,
							DecodeAnchor(tmp, filename),
							start, end
 						);
					break;
				}
			}
	fclose(fp);
	return result;
}


static FILE    *
OpenContentsFile(const char *name, const char *mode)
{
	char            tmp[MAXPATHLEN];

	strcpy(tmp, name);
	strcat(tmp, body_ext);
	return fopen(tmp, mode);
}

int
main(int argc, char **argv)
{
	FILE           *outfp;
	FILE           *fp;
	contents_t     *Contents = NULL;
	char            filename[MAXPATHLEN];

	InitCharTable();
	if (argc == 2) {
		DecodeAnchor(filename, argv[1]);
		if (strncmp(filename, "/MessageID/", 11) == 0) {
			if (FetchMessage(filename + 11))
				Fatal(E_ENOENT);
			return 0;
		} else {
			char            tmp[MAXPATHLEN];
			struct stat     statbuf1;
			struct stat     statbuf2;

			/* Check if $1.html exists */
			strcpy(tmp, filename);
			strcat(tmp, DIRECTORY_EXTENSION);
			if (stat(filename, &statbuf1) == 0 && stat(tmp, &statbuf2) == 0 &&
			    statbuf2.st_mtime > statbuf1.st_mtime && statbuf2.st_size > 200) {
				/* The cache exists and its newer */
				if ((outfp = fopen(tmp, "r")) != NULL) {
					CatStream(outfp, stdout);	/* Cat the cache */
					fclose(outfp);
					return 0;	/* DONE */
				} else
					outfp = stdout;	/* Don't bother with caching */
			} else if ((outfp = fopen(tmp, "w+")) == NULL)
				outfp = stdout;
		}
	} else
		Fatal(E_USAGE);

	/* Open Input and parse to build Messages file */
	if ((fp = OpenContentsFile(filename, "w")) != NULL) {
		FILE           *infp;

		if ((infp = fopen(filename, "r")) == NULL)
			Fatal(E_ENOENT);
		ParseMail(&Contents, infp, fp);
		fclose(infp);
		fflush(fp);
		DumpDictionary(Contents, filename, ftell(fp));
		fclose(fp);
	} else
		Fatal(E_NOTEMP);

	/* Produce HTML Entry Document */
	fputs("<!-- This Document has been Machine Converted -->\n", outfp);
	fprintf(outfp, "<HTML>\n<HEAD>\n<TITLE>%s</TITLE>\n</HEAD>\n<BODY>\n", basename(filename));
	if (Contents)
		PrintContents(Contents, outfp);
	else		/* Was an error, so just cat the contents to recover */
		CatStream(OpenContentsFile(filename, "r"), outfp);
	fputs("</BODY></HTML>\n", outfp);
	if (fileno(outfp) != fileno(stdout))
		CatStream(outfp, stdout);	/* Cat File to stdout */
	return 0;
}