/* MIFwriter.c -- MIF output support for WWW
 * $Id: SGMLmain.c,v 1.3 93/01/06 18:40:27 connolly Exp Locker: connolly $
 */

/* implements ... */
#include "MIFwriter.h"

/* uses ... */
#include "SGML.h"
#include "HTParse.h"
#include "HTMLdtd.h"

#include <stdio.h>
#include <string.h>

#include "object.h"
#include "debug.h"


typedef struct{
  char gi[SGML_NAMELEN + 1];
  int content;
}Element;


typedef struct{
  FILE* out;
  Element stack[SGML_TAGLVL];
  int literal;
  int taglvl;
  int needspace;
  int empty; /* current paragraph is empty */

  enum {
    MIFFile,
    VariableFormats, VariableDef,
    TextFlow, ParaLine, Font
    }state;
}MIF;

#define STATE(m, s, l, p) ((m)->state=(s),(m)->literal=l, \
			   (p) ? ((m)->empty=1,(m)->needspace=0) : 0)


static HMStartTagProc start_tag;

static HMEndTagProc end_tag;

static HMDataProc data;

static HMFileWriterProc MIFwriter_new;

static HMDeleteProc MIFwriter_dt;

static VOID
  marker PARAMS((MIF* m,
		 CONST HMBinding *attributes,
		 int nattrs));


HMDoc_Class MIFwriter = {MIFwriter_new, 0, MIFwriter_dt,
			   start_tag, end_tag, data, html_entity_text};


static int FrameEncoding[] =
{
/* 160 /space -> */ 0x20,
/* 161 /exclamdown -> */ 0xc1,
/* 162 /cent -> */ 0xa2,
/* 163 /sterling -> */ 0xa3,
/* 164 /currency -> */ 0xdb,
/* 165 /yen -> */ 0xb4,
/* 166 /brokenbar -> */ 0x00,
/* 167 /section -> */ 0xa4,
/* 168 /dieresis -> */ 0xac,
/* 169 /copyright -> */ 0xa9,
/* 170 /ordfeminine -> */ 0xbb,
/* 171 /guillemotleft -> */ 0xc7,
/* 172 /logicalnot -> */ 0xc2,
/* 173 /hyphen -> */ 0x2d,
/* 174 /registered -> */ 0xa8,
/* 175 /macron -> */ 0xf8,
/* 176 /degree -> */ 0x00,
/* 177 /plusminus -> */ 0x00,
/* 178 /twosuperior -> */ 0x00,
/* 179 /threesuperior -> */ 0x00,
/* 180 /acute -> */ 0xab,
/* 181 /mu -> */ 0x00,
/* 182 /paragraph -> */ 0xa6,
/* 183 /periodcentered -> */ 0xe1,
/* 184 /cedilla -> */ 0xfc,
/* 185 /onesuperior -> */ 0x00,
/* 186 /ordmasculine -> */ 0xbc,
/* 187 /guillemotright -> */ 0xc8,
/* 188 /onequarter -> */ 0x00,
/* 189 /onehalf -> */ 0x00,
/* 190 /threequarters -> */ 0x00,
/* 191 /questiondown -> */ 0xc0,
/* 192 /Agrave -> */ 0xcb,
/* 193 /Aacute -> */ 0xe7,
/* 194 /Acircumflex -> */ 0xe5,
/* 195 /Atilde -> */ 0xcc,
/* 196 /Adieresis -> */ 0x80,
/* 197 /Aring -> */ 0x81,
/* 198 /AE -> */ 0xae,
/* 199 /Ccedilla -> */ 0x82,
/* 200 /Egrave -> */ 0xe9,
/* 201 /Eacute -> */ 0x83,
/* 202 /Ecircumflex -> */ 0xe6,
/* 203 /Edieresis -> */ 0xe8,
/* 204 /Igrave -> */ 0xed,
/* 205 /Iacute -> */ 0xea,
/* 206 /Icircumflex -> */ 0xeb,
/* 207 /Idieresis -> */ 0xec,
/* 208 /Eth -> */ 0x00,
/* 209 /Ntilde -> */ 0x84,
/* 210 /Ograve -> */ 0xf1,
/* 211 /Oacute -> */ 0xee,
/* 212 /Ocircumflex -> */ 0xef,
/* 213 /Otilde -> */ 0xcd,
/* 214 /Odieresis -> */ 0x85,
/* 215 /multiply -> */ 0x00,
/* 216 /Oslash -> */ 0xaf,
/* 217 /Ugrave -> */ 0xf4,
/* 218 /Uacute -> */ 0xf2,
/* 219 /Ucircumflex -> */ 0xf3,
/* 220 /Udieresis -> */ 0x86,
/* 221 /Yacute -> */ 0x00,
/* 222 /Thorn -> */ 0x00,
/* 223 /germandbls -> */ 0xa7,
/* 224 /agrave -> */ 0x88,
/* 225 /aacute -> */ 0x87,
/* 226 /acircumflex -> */ 0x89,
/* 227 /atilde -> */ 0x8b,
/* 228 /adieresis -> */ 0x8a,
/* 229 /aring -> */ 0x8c,
/* 230 /ae -> */ 0xbe,
/* 231 /ccedilla -> */ 0x8d,
/* 232 /egrave -> */ 0x8f,
/* 233 /eacute -> */ 0x8e,
/* 234 /ecircumflex -> */ 0x90,
/* 235 /edieresis -> */ 0x91,
/* 236 /igrave -> */ 0x93,
/* 237 /iacute -> */ 0x92,
/* 238 /icircumflex -> */ 0x94,
/* 239 /idieresis -> */ 0x95,
/* 240 /eth -> */ 0x00,
/* 241 /ntilde -> */ 0x96,
/* 242 /ograve -> */ 0x98,
/* 243 /oacute -> */ 0x97,
/* 244 /ocircumflex -> */ 0x99,
/* 245 /otilde -> */ 0x9b,
/* 246 /odieresis -> */ 0x9a,
/* 247 /divide -> */ 0x00,
/* 248 /oslash -> */ 0xbf,
/* 249 /ugrave -> */ 0x9d,
/* 250 /uacute -> */ 0x9c,
/* 251 /ucircumflex -> */ 0x9e,
/* 252 /udieresis -> */ 0x9f,
/* 253 /yacute -> */ 0x00,
/* 254 /thorn -> */ 0x00,
/* 255 /ydieresis -> */ 0xd8,
};


/* mifwriter constructor */
static HMDoc*
MIFwriter_new(fp)
     FILE* fp;
{
  MIF* m = NEW(MIF, 1);
  m->out = fp;
  m->taglvl = 1;
  strcpy(m->stack[0].gi, "HTML"); /* @@ fake tag minimization */
  STATE(m, MIFFile, 0, 1);

  fprintf(m->out,
	  "<MIFFile 3.00> # Generated by html2mif\n"
	  );
  return (HMDoc*)m;
}


static VOID
MIFwriter_dt(this)
     HMDoc* this;
{
  FREE(this);
}



static VOID
data(document, chars, nchars)
     HMDoc* document;
     CONST char* chars;
     int nchars;
{
  MIF* m = (MIF*)document;
  Element* e = &m->stack[m->taglvl - 1];
  CONST char* p;

  debug(("<emptypar: %d 1st char: %d nchars: %d>\n",
	 m->empty, chars[0], nchars));

  if(chars[0] == '\n' && nchars <2 &&
     m->literal == 0 && m->empty)
    return;

  switch(m->state){
  case MIFFile:
    start_tag((HMDoc*)m, "BODY", 0, 0);
    fprintf(m->out,
	    " <Para\n"
	    "  <PgfTag `BODY'>\n"
	    "  <ParaLine\n"
	    "   <String `");
    STATE(m, ParaLine, 0, 1);
    break;

  case TextFlow:
    fprintf(m->out,
	    " <Para\n"
	    "  <PgfTag `%s'>\n"
	    "  <ParaLine\n"
	    "   <String `", e->gi);
    STATE(m, ParaLine, 0, 1);
    break;

  case VariableFormats:
    /* in element content. Skip data */
    return;

  case VariableDef:
    /* nothing */
    break;

  default:
    fprintf(m->out,
	    "   <String `");
  }

  for(p = chars; p-chars < nchars; p++){
    if(*p != '\n')
      m->empty = 0;

    if(*p & 0x80){
      int i = (*p & 0xFF) - 160;
      if(i < 96) /* in ISOlat1 encoding? */
	printf("\\x%02x ", FrameEncoding[i]);
    }else
      switch(*p){
      case '\n':
	if(m->literal)
	  fprintf(m->out,
		  "'>\n"
		  "   <Char HardReturn>\n"
		  "  > # End ParaLine\n"
		  "  <ParaLine\n"
		  "   <String `");
	else if (m->needspace){
	  fprintf(m->out, " ");
	  m->needspace = 0;
	}
	break;
	
      case '\r':
	/* nothing */
	break;
	
      case '\t':
	fprintf(m->out, "\\t");
	m->needspace = 0;
	break;
	
      case '>':
	fprintf(m->out, "\\>");
	m->needspace = 1;
	break;
	
      case '\'':
	fprintf(m->out, "\\q");
	m->needspace = 1;
	break;
	
      case '`':
	fprintf(m->out, "\\Q");
	m->needspace = 1;
	break;
	
      case '\\':
	fprintf(m->out, "\\\\");
	m->needspace = 1;
	break;
	
      case ' ':
	if(m->literal){
	  fprintf(m->out,
		  "'>\n"
		  "   <Char HardSpace>\n"
		  "   <String `");
	}else{
	  m->needspace = 0;
	  fprintf(m->out, " ");
	}
	break;
	
      default:
	m->needspace = 1;
	fprintf(m->out, "%c", *p);
      }
  }
  
  fprintf(m->out, "'>\n");
}


#if 0
/* save this for insets */
static VOID
entity(document, name)
     HMDoc* document;
     CONST char* name;
{
  MIF* m = (MIF*)document;

  /*@@ same prep work as data */
  fprintf(m->out, "   <Char %s>\n", name);
  m->needspace = 1;
}
#endif


static VOID
marker(m, attributes, nattrs)
     MIF* m;
     CONST HMBinding *attributes;
     int nattrs;
{
  int i;
  char* name = 0;
  char* href = 0;

  for(i = 0; i < nattrs; i++){
    if(!strcmp(attributes[i].name, "NAME"))
      name = attributes[i].value;
    else if(!strcmp(attributes[i].name, "HREF"))    
      href = attributes[i].value;
  }

  if(href){
    char* anchor = HTParse(href, "", PARSE_ANCHOR);
    char* scheme = HTParse(href, "", PARSE_ACCESS);
    char* path = HTParse(href, "", PARSE_HOST|PARSE_PATH|PARSE_PUNCTUATION);
    
    fprintf(m->out,
	    "   <Marker\n"
	    "    <MType 8>\n");
    if(scheme && *scheme)
      fprintf(m->out,
	      "    <MText `message www %s:%s#%s'>\n",
	      scheme, path, anchor);
    else if(path && path[0] && path[1]){ /*@@ in case of just "/" */
      if(anchor && *anchor)
	fprintf(m->out,
		"    <MText `gotolink %s:%s'>\n",
		path, anchor);
      else
	fprintf(m->out,
		"    <MText `gotolink %s:firstpage'>\n",
		path);
    }else
      fprintf(m->out,
	      "    <MText `gotolink %s'>\n",
	      anchor);
    
    fprintf(m->out,
	    "   > #End of Marker\n");
    
    free(scheme);
    free(path);
    free(anchor);
  }
  else if (name){
    fprintf(m->out,
	    "   <Marker\n"
	    "    <MType 8>\n"
	    "    <MText `newlink %s'>\n"
	    "   > #End of Marker\n",
	    name);
  }
}


static int
start_tag(document, gi, attributes, nattrs)
     HMDoc* document;
     CONST char* gi;
     CONST HMBinding attributes[];
     int nattrs;
{
  MIF* m = (MIF*)document;
  Element* e = &m->stack[m->taglvl++];
  int taglevel = -1;

  m->needspace = 0;

  strcpy(e->gi, gi);
  debug(("stacking '%s'\n", gi));

  if(!strcmp(gi, "H1") ||
     !strcmp(gi, "H2") ||
     !strcmp(gi, "H3") ||
     !strcmp(gi, "H4") ||
     !strcmp(gi, "H5") ||
     !strcmp(gi, "H6") ||
     !strcmp(gi, "PRE") ||
     !strcmp(gi, "XMP") ||
     !strcmp(gi, "LISTING") ||
     !strcmp(gi, "ADDRESS") ||
     !strcmp(gi, "BLOCKQUOTE") ||
     !strcmp(gi, "UL") ||
     !strcmp(gi, "OL") ||
     !strcmp(gi, "MENU") ||
     !strcmp(gi, "DIR") ||
     !strcmp(gi, "DL")
     )
    taglevel = ParaLine;
  else
    if(!strcmp(gi, "A") ||
       !strcmp(gi, "EM") ||
       !strcmp(gi, "TT") ||
       !strcmp(gi, "STRONG") ||
       !strcmp(gi, "B") ||
       !strcmp(gi, "I") ||
       !strcmp(gi, "U") ||
       !strcmp(gi, "CODE") ||
       !strcmp(gi, "SAMP") ||
       !strcmp(gi, "KBD") ||
       !strcmp(gi, "KEY") ||
       !strcmp(gi, "VAR") ||
       !strcmp(gi, "DFN") ||
       !strcmp(gi, "CITE"))
      taglevel = Font;

  while(1){
    switch(m->state){
    case MIFFile:
      if(!strcmp(gi, "BODY")){
	fprintf(m->out,	"<TextFlow\n");
	STATE(m, TextFlow, 0, 1);
	return e->content = SGML_MIXED;
      }

      else if(!strcmp(gi, "HEAD")){
	return e->content = SGML_ELEMENT;
      }

      else if(!strcmp(gi, "TITLE")){
	fprintf(m->out,
		"<VariableFormats\n"
		" <VariableFormat\n"
		"  <VariableName `Title'>\n"
		"  <VariableDef `"
		);

	STATE(m, VariableDef, 0, 1);
	return e->content = SGML_RCDATA; /*@@ CDATA? */
      }

      else if(!strcmp(gi, "ISINDEX")){
	fprintf(m->out,
		"<VariableFormats\n"
		" <VariableFormat\n"
		"  <VariableName `Index'>\n"
		"  <VariableDef `True'>\n"
		" >\n"
		);

	STATE(m, VariableFormats, 0, 1);
	m->taglvl--;
	return SGML_EMPTY;
      }

      else if(taglevel == ParaLine || taglevel == Font){
	start_tag((HMDoc*)m, "BODY", 0, 0);
      }

      else{
	debug(("'%s' out of context in state %d", gi, m->state));
	m->taglvl--;
	return SGML_EMPTY;
      }

      break;


    case VariableFormats:
      if(!strcmp(gi, "TITLE")){
	fprintf(m->out,
		" <VariableFormat\n"
		"  <VariableName `Title'>\n"
		"  <VariableDef `"
		);

	STATE(m, VariableDef, 0, 1);
	return e->content = SGML_RCDATA; /*@@ CDATA? */
      }

      else if(!strcmp(gi, "ISINDEX")){
	fprintf(m->out,
		" <VariableFormat\n"
		"  <VariableName `Index'>\n"
		"  <VariableDef `True'>\n"
		" >\n"
		);

	m->taglvl--;
	return SGML_EMPTY;
      }

      else{
	fprintf(m->out,
		" > #End of VariableFormats\n");
	STATE(m, MIFFile, 0, 1);
      }
      break;


    case TextFlow:
      if(!strcmp(gi, "PRE")){
	fprintf(m->out,
		" <Para\n"
		"  <PgfTag `%s'>\n"
		"  <ParaLine\n"
		, gi);
	STATE(m, ParaLine, 1, 1);
	return e->content = SGML_MIXED;
      }

      else if(!strcmp(gi, "XMP") ||
	      !strcmp(gi, "LISTING")){
	fprintf(m->out,
		" <Para\n"
		" <PgfTag `%s'>\n"
		"  <ParaLine\n"
		, gi);
	STATE(m, ParaLine, 1, 1);
	return e->content = SGML_RCDATA;
      }

      else if(taglevel == ParaLine){
	fprintf(m->out,
		" <Para\n"
		"  <PgfTag `%s'>\n"
		"  <ParaLine\n"
		, gi);

	STATE(m, ParaLine, 0, 1);
	return e->content = SGML_MIXED;
      }

      else if(taglevel == Font){
	debug(("%s: transition from TextFlow to BODY ParaLine", gi));

	fprintf(m->out,
		" <Para\n"
		"  <PgfTag `BODY'>\n"
		"  <ParaLine\n");
	
	STATE(m, ParaLine, 0, 1);
      }

      else{
	debug(("'%s' out of context in state %d", gi, m->state));
	m->taglvl--;
	return SGML_EMPTY;
      }

      break;

    case ParaLine:
      if(!strcmp(gi, "A")){
	fprintf(m->out,
		"   <Font\n"
		"    <FTag `%s'>\n"
		"   >\n", gi);

	marker(m, attributes, nattrs);

	STATE(m, Font, m->literal, 0);
	return e->content = SGML_MIXED;
      }

      else if(taglevel == Font){
	fprintf(m->out,
		"   <Font\n"
		"    <FTag `%s'>\n"
		"   >\n"
		, gi);

	STATE(m, Font, m->literal, 0);
	return e->content = SGML_MIXED;
      }

      else if(!strcmp(gi, "P")){
	m->taglvl--;
	if(!m->empty)
	  fprintf(m->out,
		  "  > # End ParaLine\n"
		  " > # End Para\n");
	STATE(m, TextFlow, 0, 1);
	return SGML_EMPTY;
      }

      else if(!strcmp(gi, "DT") ||
	      !strcmp(gi, "LI")){
	m->taglvl--;
	if(!m->empty)
	  fprintf(m->out,
		  "  > # End ParaLine\n"
		  " > # End Para\n"
		  " <Para\n"
		  "  <ParaLine\n");
	
	m->empty = 1;
	m->needspace = 0;
	return SGML_EMPTY;
      }

      else if(!strcmp(gi, "DD")){
	fprintf(m->out,
		"   <Char Tab>\n");

	m->taglvl--;
	return SGML_EMPTY;
      }

      else if(taglevel = ParaLine){
	debug(("'%s' start tag: back to TextFlow state\n", gi));
	fprintf(m->out,
		"  > # End of ParaLine\n"
		" > # End of Para\n"
		);
	STATE(m, TextFlow, 0, 1);
      }

      else{
	debug(("'%s' out of context in state %d", gi, m->state));
	m->taglvl--;
	return SGML_EMPTY;
      }

      break;

    default:
      debug(("state %d unexpected (<%s>)\n", m->state, gi));
      m->taglvl--;
      return SGML_EMPTY;
    }
  }
}



static VOID
end_tag(document, gi)
     HMDoc* document;
     CONST char* gi;
{
  MIF* m = (MIF*)document;
  Element* e;
  int i;

  for(i = m->taglvl - 1; i>=0; i--){
    debug(("found </%s>. stack has %s\n", gi, m->stack[i].gi));
    if(m->stack[i].content == SGML_RCDATA ||
       m->stack[i].content == SGML_CDATA ||
       !strcmp(gi, m->stack[i].gi))
      break;
  }

  if(i < 0){
    debug(("Parse error: '%s' end tag with no such element open.\n", gi));
    return;
  }

  while(m->taglvl > i){
    m->taglvl--;
    switch(m->state){
    case VariableDef:
      fprintf(m->out,
	      " > #End of VariableFormat\n");
      STATE(m, VariableFormats, 0, 1);
      break;

    case VariableFormats:
      fprintf(m->out,
	      "> #End of VariableFormats\n");
      STATE(m, MIFFile, 0, 1);
      break;

    case TextFlow:
      fprintf(m->out,
	      "> # End of TextFlow\n");
      STATE(m, MIFFile, 0, 1);
      break;

    case ParaLine:
      fprintf(m->out,
	      "  > # End of ParaLine\n"
	      " > # End of Para\n");
      STATE(m, TextFlow, 0, 1);
      break;

    case Font:
      fprintf(m->out,
	      "   <Font\n"
	      "    <FTag `'>\n"
	      "   > # End of Font\n");
      STATE(m, ParaLine, m->literal, 0);
      break;

    default:
      debug(("'%s' end tag unexpected in state %d.", gi, m->state));
    }
  }
}