/* Bluefish HTML Editor
 * parsedtd.c - Parse a DTD file with definition of HTML
 *
 * Copyright (C) 2001 Santiago Capel Torres
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

/* ChangeLog
   2001-03-24   Track of the document doctype
   2001-03-21   Create array with doctypes
   2001-03-21   Added two functions for sintax highlighting:
                    getDTDTagList & getDTDAttList
   2001-01-21   Parse of xhtml-strict.dtd

   2001-01-18   All these DTD's have been sucessfully parsed:

   HTML.DTD:                "-//IETF//DTD HTML 2.0//EN"
   HTML-S.DTD:	            "-//IETF//DTD HTML 2.0 Strict//EN"
   HTML-0.DTD:	            "-//IETF//DTD HTML Level 0//EN//2.0"
   HTML-0S.DTD:	            "-//IETF//DTD HTML Strict Level 0//EN//2.0"
   HTML-1.DTD:	            "-//IETF//DTD HTML 2.0 Level 1//EN"
   HTML-1S.DTD:             "-//IETF//DTD HTML 2.0 Strict Level 1//EN"
   html-2.1e.dtd:           "-//IETF//DTD HTML 2.1E//EN"
   HTML-3.DTD:              "-//IETF//DTD HTML 3.0//EN"
   HTML-3S.DTD:	            "-//W3O//DTD W3 HTML Strict 3.0//EN//"
   html-3.2.1996-08-15.dtd: "-//W3C//DTD HTML 3.2//EN"
   html-3.2.1996-09-09.dtd: "-//W3C//DTD HTML 3.2//EN"
   html-3.2.dtd:	    "-//W3C//DTD HTML 3.2 Final//EN"
   html-970421.dtd:         "-//W3C//DTD HTML Experimental 970421//EN"
   html-cougar.dtd:	    "-//W3C//DTD HTML Experimental 19960710//EN"
   
   HTML-HJ.DTD:	   "-//Sun Microsystems Corp.//DTD HotJava HTML//EN"
   HTML-HJS.DTD:   "-//Sun Microsystems Corp.//DTD HotJava Strict HTML//EN"
   html-mcom.dtd:  "-//WebTechs//DTD Mozilla HTML 2.0//EN"
   html-mcoms.dtd: "-//Netscape Comm. Corp. Strict//DTD HTML//EN"
   IEHTML.DTD:	   "-//Microsoft//DTD Internet Explorer 2.0 HTML//EN" 
   IEHTML-S.DTD:   "-//Microsoft//DTD Internet Explorer 2.0 HTML Strict//EN"
   IE30.DTD:       "-//Microsoft//DTD Internet Explorer 3.0 HTML//EN" 
   IE30-S.DTD:	   "-//Microsoft//DTD Internet Explorer 3.0 HTML Strict//EN"
   html-3-as.dtd:  "-//AS//DTD HTML 3.0 asWedit + extensions//EN"
   oreilly-html-relaxed.dtd:  
          "-//OReilly and Associates//DTD HTML Extended Relaxed 1.0//EN"
   oreilly-rfc1866.dtd:  
          "-//OReilly and Associates//DTD HTML Extended 1.0//EN"
   

   2001-01-17   Parsing of HTML 3.0, HTML 3.0 Strict, HTML Hot Java (Strict).
                The HTML 3.0 DTD have some SGML elements for MATH which I have
                ignored:
		<!USEMAP
		<!SHORTREF
		<!NOTATION
		Parsing of IEHTML.DTD, IEHTML-S.DTD
   2001-01-15   Recursive inclusion of public file entities. Now, it isn't needed
                include by hand a .dtd or .ent file in the main .dtd file.
		Parsing of HTML 2.0, HTML 2.0 Level 1, HTML 2.0 Strict and
		HTML 2.0 Strict Level 1
   2001-01-07   Parsing of HTML 4.01 Frameset.
		To do this, a new frameset.dtd is built, in which I have 
		replaced the entity HTML4.Frameset with the content of the file
		loose.dtd (See above)
   2001-01-04   Parsing of HTML 4.01 & HTML 4.01 Transitional


  TODO List

  - Parse the entities of HTML 3.0 as <!USEMAP, <!SHORTREF, <!NOTATION

*/



/*
  How this module works.

  All the information to parse a DTD is taken from:
  http://www.w3.org/TR/html401/intro


  A DTD file is composed of three types of items: elements, attributes and entities.
  The elements are the HTML tags. <!ELEMENT .... >
  The attributes are the attributes that belong to the tags. <!ATTLIST .... >
  The entities are like macros in the DTD file.  <!ENTITY ... >
  Also contains a Feature switch that serves as a c-#define. It starts with <![ 
  followed by a entity and end with ]]>. If that entity expands to "INCLUDE", 
  the contents of this feature are processed. If that entity expands to "IGNORE", 
  the contents are not processed.

  The public interface of this module consists on:

  - filetoDTD - parse a DTD file and builds a GList of SGML_elementinfo
  with a SGML_elementinfo per HTML tag. 
  It first, builds a temporal list of entities (that is finally deleted), then
  builds a list of elements (that will be definitive) and finally, fills the list
  of elements with a list of attributes.

  - delete_elements -  deletes a list of elements

  - get_element_byname - returns the SGML_elementinfo from the list of
  elements for a given tag name.


  Description of the structures than contains the parsed DTD.

  The parsed DTD is stored in a GList which is passed as argument to filetoDTD.
  Each item of this list contains a SGML_elementinfo with the information of a
  HTML tag and a list of attributes (SGML_attrinfo). 
  Each attribute has a defined basic type (CDATA, URI, etc.) and possibly a known
  attribute type (COLOR, TEXT, SCRIPT, etc.). The attribute type is decided based
  on the name of its entity in the DTD file. This is some tricky and is explained
  in more detail in the code.
  Some attributes haven't got a type, but a list (rtl|ltr). In these cases, this
  string is stored as is in the field 'othertype' of the attribute.
  Each attribute has a default value, which is also stored in the field defaultvalue

  Optimization.

  Some elements attributes are defined in common, (for example, H1, H2, H3, ...).
  In these cases, the attributes are stored only once for the first element, and
  for the others the field clonedattrs is set to 1 and its glist attrs points to
  the list of the first element. There is no need for worry about this in any other
  point of the program but print_elements and delete_elements (see).
  (This has reduced the size for frameset.dtd from 70000 to 50000 bytes aprox.)
  	   
*/


#include "default_include.h"

#include <string.h>				/* strchr() */
#include <stdlib.h>				/* atoi() */
#include <sys/stat.h>			/* stat() */
#include <fcntl.h>				/* FILE* */
#include <stdio.h>				/* sprintf() */
#include <ctype.h>				/* toupper() */
#include <assert.h>

#ifdef WIN32
#define DIRSTR "\\"
#define DIRCHR 92
#else
#define DIRSTR "/"
#define DIRCHR '/'
#endif

#include "parsedtd.h"


#define MAXLEN_TOKEN 100
#define MAXLEN_CONTENT 500
#define MAXLEN_ENTITY 100
#define MAXLEN_EXPANSIONBUFFER 10000
#define MAXLEN_TYPEATTRIBUTE 500

static gint nextid;				/* Id of the next element */

const char *SGML_DELIMITERS = "\">';";
const char *SGML_DELIMITERS_PARENTHESIS = "\">';()";
const char *SGML_ATTR_DELIMITERS = ">";
const char *SGML_EMBEDDEDCOMMENT = "--";
const char *SGML_COMMENTOPEN = "<!--";
const char *SGML_ENTITY = "<!ENTITY";
const char *SGML_ELEMENT = "<!ELEMENT";
const char *SGML_ATTLIST = "<!ATTLIST";
const char *SGML_FEATURESWITCHOPEN = "<![";
const char *SGML_FEATURESWITCHCLOSE = "]]>";
const char *SGML_IGNORE = "IGNORE";

const char *HTML_DEFVALUE_LITERAL_STR = "#LITERAL";
const char *HTML_DEFVALUE_IMPLIED_STR = "#IMPLIED";
const char *HTML_DEFVALUE_REQUIRED_STR = "#REQUIRED";
const char *HTML_DEFVALUE_FIXED_STR = "#FIXED";

/* 
  Arrays containing the keywords for basic types and attributes
*/
static gchar *SGML_basictypenames[] = {
	"CDATA", "ID", "NAME", "IDREF", "IDREFS", "NUMBER", "BOOLEAN"
};
static gchar *HTML_attrtypenames[] = {
	"TEXT", "URI", "COLOR", "LENGTH", "PIXELS", "MULTILENGTH",
	"CONTENTTYPE", "CONTENTTYPES",
	"LANGUAGECODE",
	"CHARSET", "CHARSETS",
	"CHARACTER", "DATETIME", "LYNKTYPES", "MEDIADESC", "SCRIPT",
	"STYLESHEET",
	"FRAMETARGET"
};

SGML_doctype HTML_doctypes[] = {
	{DTD_HTML_L_0, "html-0.dtd"},
	{DTD_HTML_L_0S, "html-0s.dtd"},
	{DTD_HTML_L_1, "html-1.dtd"},
	{DTD_HTML_L_1S, "html-1s.dtd"},

	{DTD_HTML_2_0, "html.dtd"},
	{DTD_HTML_2_0S, "html-s.dtd"},
	{DTD_HTML_2_1E, "html-2.1e.dtd"},

	{DTD_HTML_3_0, "html-3.dtd"},
	{DTD_HTML_3_0S, "html-3s.dtd"},
	{DTD_HTML_3_2, "html-3.2.dtd"},

	{DTD_HTML_4_0S, "html-4-strict.dtd"},
	{DTD_HTML_4_0F, "html-4-frameset.dtd"},
	{DTD_HTML_4_0T, "html-4.dtd"},
	{DTD_HTML_4_01S, "strict.dtd"},
	{DTD_HTML_4_01F, "frameset.dtd"},
	{DTD_HTML_4_01T, "loose.dtd"},

	{DTD_XHTML_1_0, "xhtml1-strict.dtd"},
	{DTD_XHTML_1_0S, "xhtml1-strict.dtd"},
	{DTD_XHTML_1_0F, "xhtml1-frameset.dtd"},
	{DTD_XHTML_1_0T, "xhtml1-transitional.dtd"},
	{NULL}						/* Terminator */
};

/* Definition of the structure to hold the entities */
typedef struct {
	gint basictype;
	gint attrtype;
	gchar *name;
	gchar *expandsto;
} SGML_entityinfo;
static GList *entities = NULL;	/* Temporal list for the entities' definitions */


/* static definitions for helper functions to parse a file */
/*******************************************************************/
/*
   Theese functions could be changed to whatever any other way of reading chars.
   The text to parse must be read in a gchar buffer
   Then, to retrieve tokens, its only needed the function getnexttoken.

   The functions mygetch() && myungetch() are only used in getnexttoken, 
   getnextliteral & skipuntil.
*/
static gint __charcnt;
static gchar *__text;
static char mygetch()
{
	if (__charcnt == -1)
		return '\0';			/* Beyond EOF */
	else if (__text[__charcnt] != '\0')
		return __text[__charcnt++];
	else {
		__charcnt = -1;
		return '\0';
	}
}
static void myungetch()
{
	if (__charcnt >= 0)
		__charcnt--;
}
static gchar *realloc_mygetch(gint size) /* needed by insert_public_file */
{ 
	int actsize = strlen(__text) + 1;
	__text = g_realloc(__text, actsize + size);
	if (__text) {
		memmove(__text + __charcnt + size, __text + __charcnt,
				actsize - __charcnt);
		return __text + __charcnt;
	} else {
		return NULL;
	}
}


/* Reads a token from the input chars, delimited by characters in *delimiters 
   The basic delimiters for a DTD file are SGML_DELIMITERS

   stores the token in *token
   returns 0 if no more tokens are left
   returns 1 if a token is sucessfully read
*/
static gint getnexttoken(gchar * token, const gchar * delimiters)
{
	gchar blanks[] = " \n\r\t";

	while ((*token = mygetch()) != '\0' && strchr(blanks, *token));
	if (*token == '\0')
		return 0;
	else if (strchr(delimiters, *token))
		token++;
	else {
		token++;
		while ((*token = mygetch()) != '\0' && !strchr(delimiters, *token)
			   && !strchr(blanks, *token))
			token++;
		if (strchr(delimiters, *token))
			myungetch();
	}
	*token = '\0';
	return 1;
}

/* Reads a literal from the input chars, delimited by single or double quotes

   stores the token in *token
   returns 0 if no more tokens are left
   returns 1 if a token is sucessfully read
*/
static gint getnextliteral(gchar * token, char delimiter)
{
	*token++ = delimiter;
	while ((*token = mygetch()) != '\0' && *token != delimiter)
		token++;
	if (*token == '\0') {
		*token++ = delimiter;
		*token = '\0';
		return 0;
	} else {
		*token++ = delimiter;
		*token = '\0';
		return 1;
	}
}


/* Read characters until and including untilstr, skipping double-quoted text
   returns 0 if no more tokens are left
   returns 1 if a token is sucessfully read
*/
static gint skipuntil(const gchar * untilstr)
{
	char ch;
	const gchar *strcomp = untilstr;
	gint inquotes = 0;

	while ((ch = mygetch()) != '\0') {
		if (!inquotes || inquotes) {	/* ojito */
			while (*strcomp != '\0' && *strcomp == ch) {
				ch = mygetch();
				strcomp++;
			}
			if (*strcomp == '\0') {
				myungetch();
				return 1;
			}
			strcomp = untilstr;
			if (ch == '\"') {
				inquotes = 1;
			}
		} else if (ch == '\"') {
			strcomp = untilstr;
			inquotes = 0;
		}
	}
	return 0;
}


static void skipcomment()
{
	skipuntil(SGML_EMBEDDEDCOMMENT);
	skipuntil(">");
}



/* Some basic functions */
/*******************************************************************/
/* Function: getid_basictype
 * Description:
 *       Look whether a token is a basic SGML type (SGML_basictypenames).
 * Arguments:
 *      token - a token read from the file
 * Return value:
 *      -1 if token is not a basic SGML type or
 *      the id of the basic type if it is (it can be 0)
 *             (as defined in the header file) 
*/
static gint getid_basictype(char *token)
{

	gint i;
	for (i = 0;
		 i < sizeof(SGML_basictypenames) / sizeof(SGML_basictypenames[0]);
		 i++) {
		if (strcasecmp(SGML_basictypenames[i], token) == 0)
			return i + SGML_BT_FIRST;
	}
	return -1;

}

/* Function: getid_attrtype
 * Description:
 *      Look whether token is a known attrib (HTML_attrtypenames)
 * Arguments:
 *      token - a token read from the file
 * Return value:
 *      -1 if token is not a known HTML attr type or
 *      the id of the HTML type if it is (it can be 0)
 *             (as defined in the header file) 
*/
static gint getid_attrtype(gchar * token)
{

	gint i;
	for (i = 0;
		 i < sizeof(HTML_attrtypenames) / sizeof(HTML_attrtypenames[0]);
		 i++) {
		if (strcasecmp(HTML_attrtypenames[i], token) == 0)
			return i + HTML_AT_FIRST;
	}
	return -1;

}

/* Function: getid_type
 * Description:
 *      Look whether token is a basic type or a known attrib.
 *      Calls the 2 funtions just above
 * Arguments:
 *      token - a token read from the file
 * Return value:
 *      -1 if token is not a basic type nor known HTML attr type 
 *      the id of the SGML type or the HTML type if it is (it can be 0)
 *             (as defined in the header file) 
*/
static gint getid_type(gchar * token)
{
	int tmpid;
	if ((tmpid = getid_basictype(token)) == -1)
		tmpid = getid_attrtype(token);
	return tmpid;
}


#ifdef DEBUG
/* Function: get_typename
 * Description:
 *      Looks for the name of a type. For print_elements and print_entities
 * Arguments:
 *      value_type - the id to look for its name
 * Return value:
 *      NULL if value_type is not a valid HTML type or SGML type, or
 *      the name of the basic type or attr type if it is
*/
static const gchar *get_typename(gint value_type)
{

	if (value_type >= SGML_BT_FIRST && value_type <= SGML_BT_LAST)
		return SGML_basictypenames[value_type - SGML_BT_FIRST];
	else if (value_type >= HTML_AT_FIRST && value_type <= HTML_AT_LAST)
		return HTML_attrtypenames[value_type - HTML_AT_FIRST];
	else
		return NULL;
}
#endif

/* Parsing of entities: 
   http://www.w3.org/TR/html401/intro/sgmltut.html (3.3.2)  */
/*******************************************************************/


#ifdef DEBUG
/* Function: print_entities
 * Description:
 *      Prints to stdout the list of entities, its count and its aprox. size.
 * Return value:
 *      The number of entities
*/
static gint print_entities()
{
	GList *tmplist;
	SGML_entityinfo *ei;
	gint size = 0, count = 0;

	tmplist = g_list_first(entities);
	if (!tmplist)
		g_print(_("The entities list is empty.\n"));
	while (tmplist) {
		ei = (SGML_entityinfo *) tmplist->data;
		count++;
		size += sizeof(SGML_entityinfo);
		g_print("Name=%s\n", ei->name);
		size += 1 + strlen(ei->name);
		g_print("Expandsto=%s\n", ei->expandsto);
		size += 1 + strlen(ei->expandsto);
		g_print("BasicType=(%d)%s\n", ei->basictype,
				get_typename(ei->basictype));
		if (ei->attrtype != -1)
			g_print("AttrType=(%d)%s\n", ei->attrtype,
					get_typename(ei->attrtype));
		g_print("\n");
		tmplist = g_list_next(tmplist);
	}
	g_print("\nEntities: %d, Total size: %d\n", count, size);
	return count;
}
#endif

/* Function: delete_entities
 * Description:
 *      Deletes the list of entities, since we don't need it once the DTD is parsed
*/
static void delete_entities()
{
	GList *tmplist;
	SGML_entityinfo *ei;

	tmplist = g_list_first(entities);
	while (tmplist) {
		ei = (SGML_entityinfo *) tmplist->data;
		g_free(ei->name);
		g_free(ei->expandsto);
		g_free(ei);
		tmplist = g_list_next(tmplist);
	}
	g_list_free(entities);
	entities = NULL;
}


/* Function: get_entity_byname
 * Description:
 *      Looks if an entity exists
 * Arguments:
 *      name - the name of the entity
 * Return value:
 *      NULL if an entity with that name doesn't exists, or
 *      the SGML_entityinfo of that entity
*/
static const SGML_entityinfo *get_entity_byname(gchar * name)
{
	GList *tmplist;
	SGML_entityinfo *ei;

	tmplist = g_list_first(entities);
	while (tmplist) {
		ei = (SGML_entityinfo *) tmplist->data;
		if (strcasecmp(ei->name, name) == 0)
			return ei;
		tmplist = g_list_next(tmplist);
	}
	return NULL;
}


/* Function: get_entity_expandsto
 * Description:
 *      Looks for an entity and gets its field SGML_entityinfo.expandsto 
 * Arguments:
 *      name - the name of the entity
 * Return value:
 *      NULL if an entity with that name doesn't exists, or
 *      the string which that entity expands to
*/
static const gchar *get_entity_expandsto(gchar * name)
{
	GList *tmplist;
	SGML_entityinfo *ei;

	tmplist = g_list_first(entities);
	while (tmplist) {
		ei = (SGML_entityinfo *) tmplist->data;
		if (strcasecmp(ei->name, name) == 0)
			return ei->expandsto;
		tmplist = g_list_next(tmplist);
	}
	return NULL;
}

/* Function: get_entity_typeattr
 * Description:
 *      Looks for an entity and gets its field SGML_entityinfo.attrtype
 * Arguments:
 *      name - the name of the entity
 * Return value:
 *      -1 if an entity with that name doesn't exists, or
 *      the id of its attribute
*/
static gint get_entity_typeattr(gchar * name)
{
	GList *tmplist;
	SGML_entityinfo *ei;

	tmplist = g_list_first(entities);
	while (tmplist) {
		ei = (SGML_entityinfo *) tmplist->data;
		if (strcasecmp(ei->name, name) == 0)
			return ei->attrtype;
		tmplist = g_list_next(tmplist);
	}
	return -1;
}


/* Function: expand_token
 * Description:
 *      Expands a string which can contain entities starting with %. Once a entity
 *        is expanded, other entities could appear and will be expanded also.
 *      Also expands character entities like &#34 into double quotes, etc.
 *      The expanded string could be rather big, so we allocate a big enough
 *         buffer for it (MAXLEN_EXPANSIONBUFFER)
 * Arguments:
 *      token - The string to expand
 *      includeattrtypes - This flag tell whether the expansion string will hold
 *         the id types of the attributes between {} as they are expanded, so 
 *         we could later know wich type of attribute is some property.
 *         This is done so because in parse_entities, we have guessed the type of
 *         certain attributes (TEXT, SCRIPT, URI, etc.) by its name, but when the
 *         expansion is made, the entity converts in CDATA, ID, etc. For this reason
 *         we store the id of the attribute (HTML_AT_TEXT, HTML_AT_SCRIPT, etc) in
 *         the expansion. For example:
 *                  title        &text;             #IMPLIED
 *         would get expanded as:
 *                  title        CDATA              #IMPLIED
 *         and we would loose the attrype of title. But when includeattrtypes=1,
 *         the latter expresion expands as:
 *                  title       {100}CDATA          #IMPLIED
 *         so later, in the function create_attrs, we know both the basic and the
 *         attr type of the attribute
 *         if includeattrtypes = 0, the type of the attr is not stored.
 * Return value:
 *      Dinamically allocated string with the expanded text
*/
static char *expand_token(char *token, int includeattrtypes)
{
	char *expanded, *expandedorig, *retval, *tmp;
	char entity[MAXLEN_ENTITY], *entitystart, *entityptr;

	expandedorig = g_malloc(MAXLEN_EXPANSIONBUFFER);
	expanded = expandedorig;
	strcpy(expanded, token);
	while (*expanded) {
		if (*expanded == '&' && *(expanded + 1) == '#'
			&& isdigit(*(expanded + 2))) {
			/* single char entity */
			entitystart = expanded;
			expanded += 2;
			entityptr = entity;
			while (*expanded && *expanded != ';' && *expanded != ' ')
				*entityptr++ = *expanded++;
			*entityptr = '\0';
			if (*expanded == ';')
				expanded++;
			*entitystart++ = (char) atoi(entity);
			memmove(entitystart, expanded, strlen(expanded) + 1);
			expanded = entitystart;
		} else if (*expanded++ == '%') {	/* SGML entity */
			entityptr = entity;
			entitystart = expanded - 1;
			/* some dtds reference entities without finishing them with ';' 
			   but with ')' or '|' or ',' */
			while (*expanded && *expanded != ';' && *expanded != ' '
				   && *expanded != ')' && *expanded != '|'
				   && *expanded != ',') *entityptr++ = *expanded++;
			*entityptr = '\0';
			if (*expanded == ';')
				expanded++;
			if (*entity == '#') {	/* single char entity */
				*entitystart++ = (char) atoi(entity + 1);
				*entitystart = '\0';
			} else {
				tmp = g_malloc(strlen(expanded) + 1);
				strcpy(tmp, expanded);
				if (includeattrtypes) {
					int tmpid;
					if ((tmpid = get_entity_typeattr(entity)) != -1) {
						strcpy(entitystart, "{");
						entitystart++;
						sprintf(entitystart, "%d", tmpid);
						strcat(entitystart, "}");
					} else
						*entitystart = '\0';
				} else
					*entitystart = '\0';
				if (get_entity_expandsto(entity) == NULL) {
					DEBUG_MSG("%s.expandsto=NULL\n", entity);
					exit(-1);
				}
				strcat(entitystart, get_entity_expandsto(entity));
				strcat(entitystart, tmp);
				expanded = entitystart;
				g_free(tmp);
			}
		}
	}
	if (expandedorig[strlen(expandedorig) - 1] == ' ')
		expandedorig[strlen(expandedorig) - 1] = '\0';	/* Remove trailing space */
	retval = g_strdup(expandedorig);
	g_free(expandedorig);
	return retval;
}

/*******************************************************************/
/* Function: insert_public_file
 * Description:
 *       Insert a file which is referenced by a <!ENTITY % ... PUBLIC 
 *       into the __text buffer. Reallocs the __text buffer
 * Arguments:
 *      fname .- Filename of the PUBLIC ENTITY.
 *             We'll try fname.ent, fname.dtd, FNAME.ENT and FNAME.DTD
 * Return value:
 *      0 if unsuccessfull
 *
*/
gint insert_public_file(gchar * fname)
{
	gchar filename[255];
	struct stat fileinfo;
	FILE *file = NULL;
	gchar *entity, *ext;
	gint ret = 1;

	DEBUG_MSG("insert_public_file(\"%s\")\n", fname);

	strcpy(filename, PKGDATADIR);
	strcat(filename, DIRSTR);
	strcat(filename, fname);
	ext = filename + strlen(filename);
	if (stat(filename, &fileinfo) == -1) {
	  strcpy(ext, ".ent");
	  if (stat(filename, &fileinfo) == -1) {
		strcpy(ext, ".dtd");
		if (stat(filename, &fileinfo) == -1) {
			strcpy(filename, PKGDATADIR);
			strcat(filename, DIRSTR);
			ext = filename + strlen(filename);
			while (*fname) {
				*ext++ = toupper(*fname);
				fname++;
			}
			strcpy(ext, ".ENT");
			if (stat(filename, &fileinfo) == -1) {
				strcpy(ext, ".DTD");
				if (stat(filename, &fileinfo) == -1) {
					ret = 0;
				}
			}
		}
	  }
	}
	if (ret == 1) {
		ret = 0;
		if ((entity = realloc_mygetch(fileinfo.st_size))) {
			if ((file = fopen(filename, "r")) != NULL) {
				fread(entity, fileinfo.st_size, 1, file);
				fclose(file);
				ret = 1;
			} else
				perror(filename);
		} else
			perror("Memory");
	} else
		perror(filename);
	return ret;
}


/* Function: parse_entity_expansion
 * Description:
 *      Parses the text of the macro of an entity from the DTD file, 
 *         which is between double or single quotes, skipping the comments.
 *      The expanded string could be rather big, so we allocate a big enough
 *         buffer for it (MAXLEN_EXPANSIONBUFFER)
 *      Process the insertion of public files. If the filename is not included
 *      in the <ENTITY %entityname PUBLIC "description" "filename">, 
 *      we'll look for a file named 'entityname.ent' or 'entityname.dtd'
 * Return value:
 *      Dinamically allocated string with the text of the macro
*/
static gchar *parse_entity_expansion()
{
	gchar *tmpstring = g_malloc(MAXLEN_EXPANSIONBUFFER), *tmp;
	gchar token[MAXLEN_TOKEN];
	gint singleq = 0, doubleq = 0;

	*tmpstring = '\0';
	do {
		getnexttoken(token, SGML_DELIMITERS);
		if (strcmp(token, "\"") == 0) {
			if (!singleq) {
				doubleq = !doubleq;
			} else {
				getnextliteral(token, '"');
				if (*tmpstring)
					strcat(tmpstring, " ");
				strcat(tmpstring, token);
			}
		} else if (strcmp(token, "'") == 0) {
			if (!doubleq) {
				singleq = !singleq;
			} else {
				getnextliteral(token, '\'');
				if (*tmpstring)
					strcat(tmpstring, " ");
				strcat(tmpstring, token);
			}
		} else if (strcmp(token, SGML_EMBEDDEDCOMMENT) == 0) {
			skipuntil(SGML_EMBEDDEDCOMMENT);	/* Skip comment */
		} else if (strcasecmp(token, "PUBLIC") == 0 && !singleq
				   && !doubleq) {
			/* public entity file. Look for the filename */
			getnexttoken(token, SGML_DELIMITERS);
			if( strcmp(token, "\"") == 0 ) {
			  skipuntil("\"");
			  getnexttoken(token, SGML_DELIMITERS);
			  if( strcmp(token, "\"") == 0 ) {
			    getnexttoken(tmpstring, SGML_DELIMITERS);
			  }
			}
			if( strcmp(token,">") != 0 )
			  skipuntil(">");
			getnexttoken(token, SGML_DELIMITERS);
			if( *tmpstring ) { /* The entity had a filename */
			  insert_public_file(tmpstring);
			} else {
			  if (*token == '%') {
			    if (token[strlen(token) - 1] == ';')
			      token[strlen(token) - 1] = '\0';
			    insert_public_file(token + 1);
			  }
			}
			break;
		} else if (strcmp(token, ">") == 0 && !singleq && !doubleq) {
			break;
		} else {
			if (*tmpstring		/* && tmpstring[strlen(tmpstring)-1] != ';' */
				&& strcmp(token, ";") != 0)
				strcat(tmpstring, " ");
			strcat(tmpstring, token);
		}
	} while (1);
	tmp = g_strdup(tmpstring);
	g_free(tmpstring);
	return tmp;
}


/* Function: parse_entities
 * Description:
 *      Parses the text of a DTD file and extracts the definition of the entities
 *      to the entities GList.
 *      Process the feature switch of the DTD: <![ .... ]]>
*/
static void parse_entities()
{
	gchar token[MAXLEN_TOKEN];
	gchar *expanded_entity, *tmp;
	SGML_entityinfo *ei;

	while (getnexttoken(token, SGML_DELIMITERS)) {
		if (strncasecmp(token, SGML_COMMENTOPEN, strlen(SGML_COMMENTOPEN))
			== 0) {
			skipcomment();
		} else if (strcmp(token, SGML_EMBEDDEDCOMMENT) == 0) {
			skipuntil(SGML_EMBEDDEDCOMMENT);
		} else if (strcasecmp(token, SGML_FEATURESWITCHOPEN) == 0) {
			/* Look whether the contents expands to INCLUDE or IGNORE */
			getnexttoken(token, SGML_DELIMITERS);
			expanded_entity = expand_token(token, 0);
			if (strcasecmp(expanded_entity, SGML_IGNORE) == 0)
				skipuntil(SGML_FEATURESWITCHCLOSE);
		} else if (strcasecmp(token, SGML_ENTITY) == 0) {
			getnexttoken(token, SGML_DELIMITERS);
			if (strcasecmp(token, "%") == 0) {
				/* Entity name follows */
				getnexttoken(token, SGML_DELIMITERS);
				if (get_entity_byname(token)) {
					/* If the entity already exists, skip it
					   (it has been defined before by a SGML_FEATURESWITCH) */
					tmp = parse_entity_expansion();
					g_free(tmp);
				} else {
					ei = (SGML_entityinfo *)
						g_malloc(sizeof(SGML_entityinfo));
					ei->attrtype = getid_attrtype(token);
					ei->basictype = getid_basictype(token);
					ei->name = g_strdup(token);
					ei->expandsto = parse_entity_expansion();
					entities = g_list_append(entities, ei);
				}
			} else {    /* Character entity. Ignore these */
				tmp = parse_entity_expansion();
				g_free(tmp);
			}
		}
	}
}



/* Parsing of elements:
   http://www.w3.org/TR/html401/intro/sgmltut.html (3.3.3) */
/*******************************************************************/

#ifdef DEBUG
/* Function: print_elements
 * Description:
 *      Prints to stdout the list of elements with their attrs,
 *             their count and their aprox. size.
 * Return value:
 *      The number of elements
*/

static gint print_elements(GList * elements)
{
	GList *tmplist, *attrlist;
	SGML_elementinfo *ei;
	SGML_attrinfo *ai;
	int size = 0, cntelements = 0, cntattrs = 0;

	tmplist = g_list_first(elements);
	if (!tmplist)
		g_print("Elements list is empty\n");
	while (tmplist) {
		ei = (SGML_elementinfo *) tmplist->data;
		cntelements++;
		size +=
			sizeof(SGML_elementinfo) + strlen(ei->name) +
			strlen(ei->content);
		g_print("Id=%d\n", ei->id);
		g_print("Name=%s\n", ei->name);
		g_print("Flags=%d ", ei->flags);
		if (ei->flags & HTML_STARTTAG_OPTIONAL)
			g_print("HTML_STARTTAG_OPTIONAL ");
		if (ei->flags & HTML_ENDTAG_OPTIONAL)
			g_print("HTML_ENDTAG_OPTIONAL ");
		g_print("\n");
		g_print("Content=%s\n", ei->content);
		attrlist = ei->attrs;
		if (attrlist)
			g_print("Attrs:          Type            Default value\n");
		while (attrlist) {
			ai = (SGML_attrinfo *) attrlist->data;
			cntattrs++;
			if (!ei->clonedattrs) {
				size += sizeof(SGML_attrinfo) + strlen(ai->name);
				if (ai->othertype)
					size += strlen(ai->othertype);
				if (ai->defaultvalue)
					size += strlen(ai->defaultvalue);
			}
			g_print("%-15s %-15s %-10s %s\n", ai->name,
					ai->attrtype != -1 ? get_typename(ai->attrtype)
					: (ai->othertype !=
					   NULL ? ai->othertype : get_typename(ai->basictype)),
					ai->defaultvaluetype ==
					HTML_DEFVALUE_REQUIRED ? HTML_DEFVALUE_REQUIRED_STR
					: (ai->defaultvaluetype ==
					   HTML_DEFVALUE_IMPLIED ? HTML_DEFVALUE_IMPLIED_STR
					   : (ai->defaultvaluetype ==
						  HTML_DEFVALUE_FIXED ? HTML_DEFVALUE_FIXED_STR :
						  HTML_DEFVALUE_LITERAL_STR)),
					ai->defaultvalue != NULL ? ai->defaultvalue : "");
			attrlist = g_list_next(attrlist);
		}
		g_print("\n");
		tmplist = g_list_next(tmplist);
	}
	g_print("\nElements: %d, Attributes: %d, Total size: %d\n",
			cntelements, cntattrs, size);
	return cntelements;
}
#endif


/* Function: delete_elements
 * Description:
 *      Deletes the list of elements.
 *      If an element has the attrs cloned, doesn't delete them
*/
void delete_elements(GList ** elements)
{
	GList *elelist, *attrlist;
	SGML_elementinfo *ei;
	SGML_attrinfo *ai;

	elelist = g_list_first(*elements);
	while (elelist) {
		ei = (SGML_elementinfo *) elelist->data;
		g_free(ei->name);
		if (!ei->clonedattrs) {
			attrlist = g_list_first(ei->attrs);
			while (attrlist) {
				ai = (SGML_attrinfo *) attrlist->data;
				g_free(ai->name);
				if (ai->othertype)
					g_free(ai->othertype);
				if (ai->defaultvalue)
					g_free(ai->defaultvalue);
				g_free(ai);
				attrlist = g_list_next(attrlist);
			}
			g_list_free(attrlist);
		}
		g_free(ei);
		elelist = g_list_next(elelist);
	}
	g_list_free(*elements);
	*elements = NULL;
}


/* Function: create_elements
 * Description:
 *     Add to the list of elements one or several elements with the given flags
 *       and body.
 * Arguments:
 *     elements - GList to add elements to
 *     elnames  - name of the element, or several elements separated by |
 *     flags    - properties of the element(s)
 *     content  - content or body of the element(s). May contain unexpanded entities
*/
static void create_elements(GList ** elements, char *elnames, int flags,
							char *content)
{
	SGML_elementinfo *ei;
	char element[MAXLEN_TOKEN], *elementptr;

	while (*elnames) {
		elementptr = element;
		while (*elnames
			   && (*elnames == '(' || *elnames == '|' || *elnames == ' '
				   || *elnames == ')'))
			elnames++;
		while (*elnames && *elnames != ')' && *elnames != '|'
			   && *elnames != ' ')
			*elementptr++ = *elnames++;
		*elementptr = '\0';
		if (*element) {
			ei = (SGML_elementinfo *) g_malloc(sizeof(SGML_elementinfo));
			ei->id = nextid++;
			ei->name = strdup(element);
			ei->flags = flags;
			ei->content = expand_token(content, 0);
			ei->attrs = NULL;
			*elements = g_list_append(*elements, ei);
		}
	}
}

/* Function: parse_elements
 * Description:
 *      Parses the text of a DTD file and extracts the definition of the elements
 *      to the element GList.
 *      A call to parse_entities has to be done before this function.
 * Arguments:
 *      elements - Empty GList to store the elements
*/
static void parse_elements(GList ** elements)
{
	gchar token[MAXLEN_TOKEN], *expanded_element, content[MAXLEN_CONTENT];
	gchar *expanded_entity;
	gint flags;

	while (getnexttoken(token, SGML_DELIMITERS)) {
		if (strncasecmp(token, SGML_COMMENTOPEN, strlen(SGML_COMMENTOPEN))
			== 0) {
			skipcomment();
		} else if (strcmp(token, SGML_EMBEDDEDCOMMENT) == 0) {
			skipuntil(SGML_EMBEDDEDCOMMENT);
		} else if (strcasecmp(token, SGML_FEATURESWITCHOPEN) == 0) {
			/* Look whether the contents expands to INCLUDE or IGNORE */
			getnexttoken(token, SGML_DELIMITERS);
			expanded_entity = expand_token(token, 0);
			if (strcasecmp(expanded_entity, SGML_IGNORE) == 0) {
				skipuntil(SGML_FEATURESWITCHCLOSE);
			}
		} else if (strcasecmp(token, SGML_ELEMENT) == 0) {
			getnexttoken(content, SGML_DELIMITERS_PARENTHESIS);
			if (strcmp(content, "(") == 0) {
				*content = '\0';
				getnexttoken(token, SGML_DELIMITERS_PARENTHESIS);
				while (*token && strcmp(token, ")") != 0) {
					strcat(content, token);
					getnexttoken(token, SGML_DELIMITERS_PARENTHESIS);
				}
			}
			/* The element can be one or a | separated list of elements */
			expanded_element = expand_token(content, 0);

			flags = 0;
			getnexttoken(token, SGML_DELIMITERS);
			if (strcmp(token, "O") == 0 || strcmp(token, "-") == 0) {
				/* Get the properties of the start and end tags */
				if (strcmp(token, "O") == 0)
					flags |= HTML_STARTTAG_OPTIONAL;
				getnexttoken(token, SGML_DELIMITERS);
				if (strcmp(token, "O") == 0)
					flags |= HTML_ENDTAG_OPTIONAL;
				getnexttoken(token, SGML_DELIMITERS);
			}

			/* Get the content */
			*content = '\0';
			do {
				if (strcmp(token, ">") == 0)
					break;
				if (strcmp(token, SGML_EMBEDDEDCOMMENT) == 0)
					skipuntil(SGML_EMBEDDEDCOMMENT);
				else {
					if (*content && strcmp(token, ";") != 0)
						strcat(content, " ");
					strcat(content, token);
				}
			} while (getnexttoken(token, SGML_DELIMITERS));
			/* if the element is multiple, create all of them with  same flags & content */
			create_elements(elements, expanded_element, flags, content);
		}
	}
}


/* Parsing of attributes 
   http://www.w3.org/TR/html401/intro/sgmltut.html (3.3.4) */
/*******************************************************************/

/* Function: create_attrs
 * Description:
 *     Add to the one or several elements the given attributes.
 *     See the function expand_token for details.
 *     Each attribute has a name, a type and a default value.
 * Arguments:
 *     elements - GList of elememnts to add attributes to
 *     elnames  - name of the element, or several elements separated by |
 *     attlistorig - Original expanded string of attributes
*/
static void create_attrs(GList * elements, char *elnames,
						 char *attlistorig)
{
	SGML_elementinfo *ei;
	SGML_attrinfo *ai;
	gchar element[MAXLEN_TOKEN], *elementptr, *attlist, *sptr;
	gchar name[MAXLEN_TOKEN], type[MAXLEN_TYPEATTRIBUTE],
		defaultvalue[MAXLEN_TOKEN];
	GList *firstelementlist = NULL;

	while (*elnames) {
		attlist = attlistorig;
		elementptr = element;
		while (*elnames
			   && (*elnames == '(' || *elnames == '|' || *elnames == ' '
				   || *elnames == ')'))
			elnames++;
		while (*elnames && *elnames != ')' && *elnames != '|'
			   && *elnames != ' ')
			*elementptr++ = *elnames++;
		*elementptr = '\0';
		if (*element) {
			ei = get_element_byname(elements, element);
			if (ei == NULL) {
				DEBUG_MSG("Element not found: %s\n", element);
				return;
			}
			/* All of these elements have the same attlist, so store only once */
			ei->attrs = firstelementlist;
			if (ei->attrs == NULL) {
				ei->clonedattrs = 0;
				/* Parse the attrs */
				while (*attlist) {
					ai = (SGML_attrinfo *) g_malloc(sizeof(SGML_attrinfo));
					ai->basictype = -1;
					ai->othertype = NULL;
					ai->attrtype = -1;
					ai->defaultvaluetype = HTML_DEFVALUE_LITERAL;
					ai->defaultvalue = NULL;
					/* parse the name */
					sptr = name;
					while (*attlist && *attlist == ' ')
						attlist++;
					while (*attlist && *attlist != ' ')
						*sptr++ = *attlist++;
					*sptr = '\0';
					ai->name = strdup(name);

					/* parse the type */
					/* If the attr is of a known type, its attrtype is between {} at the start of its basictypename */
					sptr = type;
					while (*attlist == ' ')
						attlist++;
					if (*attlist == '(' && *(attlist + 1) == '{')
						attlist++;
					if (*attlist == '{') {
						attlist++;
						while (*attlist && *attlist != '}')
							*sptr++ = *attlist++;
						if (*attlist == '}')
							attlist++;
						if (*attlist == ')')
							attlist++;
						ai->attrtype = atoi(type);
						sptr = type;
					} else
						ai->attrtype = -1;
					if (*attlist == '(') {
						/* Now, it can follows a boolean value or a list of values */
						while (*attlist && *attlist != ')')
							*sptr++ = *attlist++;
						*sptr = '\0';
						if (strcmp(type + 1, name) == 0)
							ai->basictype = SGML_BT_BOOLEAN;
						*sptr++ = *attlist++;
						*sptr = '\0';
						if (ai->basictype != SGML_BT_BOOLEAN) {
							if ((ai->basictype = getid_basictype(type)) ==
								-1) ai->othertype = strdup(type);	/* The type is of the form (rtl|ltr) */
						}
					} else
						while (*attlist && *attlist != ' ')
							*sptr++ = *attlist++;

					*sptr = '\0';
					if (ai->basictype != SGML_BT_BOOLEAN) {
						if ((ai->basictype = getid_basictype(type)) == -1)
							ai->othertype = strdup(type);	/* The type is of the form (rtl|ltr) */
					}

					/* parse the default value */
					sptr = defaultvalue;
					while (*attlist && *attlist == ' ')
						attlist++;
					if (*attlist == '"') {
						attlist++;
						while (*attlist && *attlist != '"')
							*sptr++ = *attlist++;
						attlist++;
						*sptr = '\0';
						ai->defaultvalue = strdup(defaultvalue);
					} else {
						while (*attlist && *attlist != ' ')
							*sptr++ = *attlist++;
						*sptr = '\0';
						if (strcasecmp
							(defaultvalue,
							 HTML_DEFVALUE_REQUIRED_STR) ==
							0) ai->defaultvaluetype =
								HTML_DEFVALUE_REQUIRED;
						else
							if (strcasecmp
								(defaultvalue,
								 HTML_DEFVALUE_IMPLIED_STR) ==
								0) ai->defaultvaluetype =
								HTML_DEFVALUE_IMPLIED;
						else
							if (strcasecmp
								(defaultvalue,
								 HTML_DEFVALUE_FIXED_STR) == 0) {
							int singleq = 0, doubleq = 0;
							ai->defaultvaluetype = HTML_DEFVALUE_FIXED;
							/* The fixed value is stored between single or double quotes */
							sptr = defaultvalue;
							while (*attlist && *attlist != '\''
								   && *attlist != '"')
								attlist++;
							if (*attlist == '\'')
								singleq = 1;
							else if (*attlist == '"')
								doubleq = 1;
							*sptr++ = *attlist++;
							while (*attlist && (
												(*attlist != '\''
												 && singleq)
												|| (*attlist != '"'
													&& doubleq)))
									*sptr++ = *attlist++;
							*sptr++ = *attlist++;
							*sptr = '\0';
							ai->defaultvalue = strdup(defaultvalue);
						} else
							ai->defaultvalue = strdup(defaultvalue);
					}
					ei->attrs = g_list_append(ei->attrs, ai);
				}
				firstelementlist = ei->attrs;
			} else {
				ei->clonedattrs = 1;
			}
		}
	}
}


/* Function: parse_attrs
 * Description:
 *      Parses the text of a DTD file and extracts the definition of the attributes
 *      of the elements to the list of elements
 *      A call to parse_entities and parse_elementes has to be done before
 * Arguments:
 *      elements - GList of elements to store the attributes
*/
static void parse_attrs(GList * elements)
{
	gchar token[MAXLEN_TOKEN], *expanded_element, *expanded_attlist,
		*attlist;
	gchar *expanded_entity;

	attlist = g_malloc(MAXLEN_EXPANSIONBUFFER);
	while (getnexttoken(token, SGML_DELIMITERS)) {
		if (strncasecmp(token, SGML_COMMENTOPEN, strlen(SGML_COMMENTOPEN))
			== 0) {
			skipcomment();
		} else if (strcmp(token, SGML_EMBEDDEDCOMMENT) == 0) {
			skipuntil(SGML_EMBEDDEDCOMMENT);
		} else if (strcasecmp(token, SGML_FEATURESWITCHOPEN) == 0) {
			/* Look whether the contents expands to INCLUDE or IGNORE */
			getnexttoken(token, SGML_DELIMITERS);
			expanded_entity = expand_token(token, 0);
			if (strcasecmp(expanded_entity, SGML_IGNORE) == 0) {
				skipuntil(SGML_FEATURESWITCHCLOSE);
			}
		} else if (strcasecmp(token, SGML_ATTLIST) == 0) {
			getnexttoken(attlist, SGML_DELIMITERS_PARENTHESIS);
			if (strcmp(attlist, "(") == 0) {
				*attlist = '\0';
				getnexttoken(token, SGML_DELIMITERS_PARENTHESIS);
				while (*token && strcmp(token, ")") != 0) {
					strcat(attlist, token);
					getnexttoken(token, SGML_DELIMITERS_PARENTHESIS);
				}
			}
			/* The element can be one or a | separated list of elements */
			expanded_element = expand_token(attlist, 0);
			/* Get the list of attributes, skipping comments  */
			*attlist = '\0';
			while (getnexttoken(token, SGML_DELIMITERS)) {

				/* HTML-3.DTD contains an extra NOTATION type */
				if (strcasecmp(token, "NOTATION") == 0)
					getnexttoken(token, SGML_DELIMITERS);

				if (strcmp(token, "\"") == 0) {
					getnextliteral(token, '"');
				} else if (strcmp(token, "'") == 0) {
					getnextliteral(token, '\'');
				}
				if (strcmp(token, ">") == 0)
					break;
				if (strcmp(token, SGML_EMBEDDEDCOMMENT) == 0)
					skipuntil(SGML_EMBEDDEDCOMMENT);
				else {
					if (*attlist && strcmp(token, ";") != 0)
						strcat(attlist, " ");
					strcat(attlist, token);
				}
			}
			/* Expands attlist preserving the type of the attr */
			expanded_attlist = expand_token(attlist, 1);
			/* if the element is multiple, create the attributes of all of them */
			create_attrs(elements, expanded_element, expanded_attlist);
		}
	}
	g_free(attlist);
}



/*******************************************************************
 * Public interface 
 *******************************************************************/

/*******************************************************************/
/* Function: get_doctype_from_text
 * Description:
 *      get the <!DOCTYPE tag of a document
 * Arguments:
 *      text - The text of the document until position
 *      position - The position of the cursor in the text
*/
gint get_doctype_from_text(gchar * text, gchar * doctype, int maxlen)
{
	int len;
	char *dt = strstr(text, "<!DOCTYPE ");
	if (dt) {
		dt++;
		len = 1;
		while (*dt && *dt != '>' && len < maxlen) {
			*doctype++ = *dt++;
			len++;
		}
		*doctype = '\0';
		return 1;
	}
	return 0;
}

/*******************************************************************/
/* Function: find_dtd_index
 * Description:
 *     Finds the index of a doctype in HTML_doctypes
 * Arguments:
 *     doctype - doctype tag with or without < and >
 * Return value:
 *     -1 - the doctype is not in HTML_doctypes
 *     or the index in HTML_doctypes 
*/
gint find_dtd_index(const gchar * doctype)
{
	int i, maxl, l = strlen(doctype);
	if (doctype[l - 1] == '>')
		l--;
	if (*doctype == '<') {
		doctype++;
		l--;
	}
	for (i = 0; HTML_doctypes[i].doctype != NULL; i++) {
		maxl = strlen(HTML_doctypes[i].doctype);
		if (maxl < l)
			maxl = l;
		if (strncasecmp(HTML_doctypes[i].doctype, doctype, maxl) == 0) {
			return i;
		}
	}
	return -1;
}

/*******************************************************************/
/* Function: find_a_loaded_dtd
 * Description:
 *     Finds the index of the first loaded DTD
 * Return value:
 *     -1 - there isn't any loaded DTD
 *     or the index of a loaded DTD
*/
gint find_a_loaded_dtd()
{
	int i;
	for (i = 0; HTML_doctypes[i].doctype != NULL; i++) {
		if (HTML_doctypes[i].ref > 0) {
			return i;
		}
	}
	return -1;
}


/*******************************************************************/
/* Function: load_DTD
 * Description:
 *     Looks for a DOCTYPE in HTML_doctypes and if found, reads it
 * Arguments:
 *     elementsptr - Pointer to a GList to add elements to
 *     doctype - doctype tag with or without < and >
 * Return value:
 *     RET_DTD_LOADED - the file has been correctly read
 *     1 - the file couldn't be opened
 *     2 - doctype not found
*/
gint load_DTD(int index)
{
	int ret = 2;
	if (index != -1) {
		assert(
			   (HTML_doctypes[index].ref == 0
				&& HTML_doctypes[index].elements == NULL)
			   ||
			   ((HTML_doctypes
				 [index].ref > 0
				 && HTML_doctypes[index].elements != NULL)));

		if (HTML_doctypes[index].ref == 0) {
			if (HTML_doctypes[index].filename) {
				if ((ret = filetoDTD(&HTML_doctypes[index].elements,
						     HTML_doctypes[index].filename)) == 0) {
					HTML_doctypes[index].ref++;
					DEBUG_MSG("load_DTD: dtd(%d) loaded %d times\n", index,
							  HTML_doctypes[index].ref);
				}
			}
		} else {
			HTML_doctypes[index].ref++;
			DEBUG_MSG("load_DTD: dtd(%d) loaded %d times\n", index,
				  HTML_doctypes[index].ref);
		}
	}
	return ret;
}

/*******************************************************************/
/* Function: unload_DTD
 * Description:
 *     Decrements ref count of a loaded DTD and if reaches 0, delete it
 * Arguments:
 *     index - DTD to unload
 * Return value:
 *     0 - failed
 *     1 - succesfull
*/
gint unload_DTD(int index)
{
	int ret = 1;
	if (index != -1) {
		assert(
			   (HTML_doctypes[index].ref == 0
				&& HTML_doctypes[index].elements == NULL)
			   ||
			   ((HTML_doctypes
				 [index].ref > 0
				 && HTML_doctypes[index].elements != NULL)));

		HTML_doctypes[index].ref--;
		if (HTML_doctypes[index].ref <= 0) {
			HTML_doctypes[index].ref = 0;
			delete_elements(&HTML_doctypes[index].elements);
			DEBUG_MSG("unload_DTD: dtd(%d) deleted\n", index);
		} else {
			DEBUG_MSG("unload_DTD: dtd(%d) ref= %d\n", index,
					  HTML_doctypes[index].ref);
		}
	}
	return ret;
}


/*******************************************************************/
/* Function: filetoDTD
 * Description:
 *     Reads a DTD file and creates a GList with the definition of the tags.
 * Arguments:
 *     elementsptr - Pointer to a GList to add elements to
 *     fname - path and name of the DTD file
 * Return value:
 *     RET_DTD_LOADED - the file has been correctly read
 *     1 - the file couldn't be opened
 * Note:
 *     Because the DTDstring buffer can grow with realloc, we can't free it here,
 *     it is freed in parseDTD
*/
gint filetoDTD(GList ** elementsptr, const gchar * fname)
{
	struct stat fileinfo;
	FILE *fileDTD = NULL;
	gchar *DTDstring;
	gint ret = 1;
	gchar filename[255];

	strcpy(filename, PKGDATADIR);
	strcat(filename, DIRSTR);
	strcat(filename, fname);
	if (stat(filename, &fileinfo) != -1) {
		if ((fileDTD = fopen(filename, "r")) != NULL) {
			DTDstring = g_malloc(fileinfo.st_size + 2);
			if (fread(DTDstring, fileinfo.st_size, 1, fileDTD) == 1) {
				DTDstring[fileinfo.st_size + 1] = '\0';

				// Parse this DTD
				*elementsptr = NULL;
				__text = DTDstring;
				__charcnt = 0;
				parse_entities();
#ifdef DEBUG
				print_entities();
#endif
				__charcnt = 0;
				nextid = HTML_EL_FIRST;
				parse_elements(elementsptr);
				__charcnt = 0;
				parse_attrs(*elementsptr);

#ifdef DEBUG
				print_elements(*elementsptr);
#endif

				delete_entities();	/* we don't need them any more */
				fclose(fileDTD);
				g_free(__text);
				/* When the DTD file exists, but
				   contains a reference to another DTD file 
				   which contains the tags, but doesn't exists,
				   we must return an error */
				if (g_list_next(*elementsptr) != NULL)
					ret = RET_DTD_LOADED;
			} else {
				g_free(DTDstring);
				perror(filename);
			}
		} else
			perror(filename);
	} else
		perror(filename);
	return ret;
}


/*******************************************************************/
/* Function: get_element_byname
 * Description:
 *     Looks for a element name (tag) in the list of elements
 * Arguments:
 *     elements - List of elements got with filetoDTD
 *     name - name of the tag or element
 * Return value:
 *     NULL if the name is not found, or 
 *     pointer to the SGML_elementinfo found in the list.
*/
SGML_elementinfo *get_element_byname(GList * elements, const gchar * name)
{
	GList *tmplist;
	SGML_elementinfo *ei;

	tmplist = g_list_first(elements);
	while (tmplist) {
		ei = (SGML_elementinfo *) tmplist->data;
		if (strcasecmp(ei->name, name) == 0)
			return ei;
		tmplist = g_list_next(tmplist);
	}
	return NULL;
}


gint getDTDTagList(GList * elements, gchar * buffer, gchar * sep,
				   int maxlen)
{
	GList *tmplist;
	SGML_elementinfo *ei;
	int consumed = 0, taglen, seplen = strlen(sep);

	for (tmplist = g_list_first(elements); tmplist;
		 tmplist = g_list_next(tmplist)) {
		ei = (SGML_elementinfo *) tmplist->data;
		if ((taglen = strlen(ei->name)) + seplen + consumed < maxlen) {
			if (consumed != 0) {
				strcat(buffer + consumed, sep);
				consumed += seplen;
			}
			strcat(buffer + consumed, ei->name);
			consumed += taglen;
		} else {
			break;
		}
	}
	DEBUG_MSG("getDTDTagList=%s\n", buffer);
	return consumed;
}

gint getDTDAttList(GList * elements, gchar * buffer, gchar * sep,
				   int maxlen)
{
	GList *tmplist, *attlist;
	SGML_elementinfo *ei;
	SGML_attrinfo *ai;
	int consumed = 0, taglen, seplen = strlen(sep);

	for (tmplist = g_list_first(elements); tmplist;
		 tmplist = g_list_next(tmplist)) {
		ei = (SGML_elementinfo *) tmplist->data;
		if (ei) {
			for (attlist = g_list_first(ei->attrs); attlist;
				 attlist = g_list_next(attlist)) {
				ai = (SGML_attrinfo *) attlist->data;
				if (strstr(buffer, ai->name) == NULL) {
					if ((taglen = strlen(ai->name)) + seplen + consumed <
						maxlen) {
						if (consumed != 0) {
							strcat(buffer + consumed, sep);
							consumed += seplen;
						}
						strcat(buffer + consumed, ai->name);
						consumed += taglen;
					} else {
						break;
					}
				}
			}
		}
	}
	DEBUG_MSG("getDTDAttList=%s\n", buffer);
	return consumed;
}

/*******************************************************************/
/* Function: get_attr_othervalues
 * Description:
 *     Get a list of other values of an attribute (ai->othertype!=NULL)
 * Arguments:
 *     ai -> info of the attribute
 * Return value:
 *     A GList with the values
*/
GList *get_attr_othervalues(SGML_attrinfo *ai)
{
  GList *lstvalues = NULL;

  assert(ai);
  /* The values of the attrs are in ai->othertype: (val1|val2..) */
  if( ai->othertype ) {
    gchar aval[100], *avalptr, *possiblevalues = ai->othertype;
    if (*possiblevalues == '(')
      possiblevalues++;
    while (*possiblevalues && *possiblevalues != ')') {
      avalptr = aval;
      while (*possiblevalues && *possiblevalues != '|'
	     && *possiblevalues != ')')
	*avalptr++ = *possiblevalues++;
      *avalptr = '\0';
      if (*aval)
	lstvalues = g_list_append(lstvalues, g_strdup(aval));
      possiblevalues++;
    }
  }
  return lstvalues;
}    

