/* $Id: htmllex.c,v 1.27 2011/04/05 19:35:08 dperry Exp $ $Revision: 1.27 $ */
/* vim:set shiftwidth=4 ts=8: */

/*************************************************************************
 * Copyright (c) 2011 AT&T Intellectual Property 
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Public License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/legal/epl-v10.html
 *
 * Contributors: See CVS logs. Details at http://www.graphviz.org/
 *************************************************************************/


#include "render.h"
#include "htmltable.h"
#include "htmlparse.h"
#include "htmllex.h"
#include <ctype.h>

#ifdef HAVE_EXPAT
#include <expat.h>
#endif

#ifndef XML_STATUS_ERROR
#define XML_STATUS_ERROR 0
#endif

typedef struct {
#ifdef HAVE_EXPAT
    XML_Parser parser;
#endif
    char* ptr;			/* input source */
    int tok;			/* token type   */
    agxbuf* xb;			/* buffer to gather T_string data */
    agxbuf  lb;			/* buffer for translating lexical data */
    char warn;			/* set if warning given */
    char error;			/* set if error given */
    char inCell;		/* set if in TD to allow T_string */
    char mode;			/* for handling artificial <HTML>..</HTML> */
    char *currtok;		/* for error reporting */
    char *prevtok;		/* for error reporting */
    int currtoklen;
    int prevtoklen;
} lexstate_t;
static lexstate_t state;

/* error_context:
 * Print the last 2 "token"s seen.
 */
static void error_context(void)
{
    agxbclear(state.xb);
    if (state.prevtoklen > 0)
	agxbput_n(state.xb, state.prevtok, state.prevtoklen);
    agxbput_n(state.xb, state.currtok, state.currtoklen);
    agerr(AGPREV, "... %s ...\n", agxbuse(state.xb));
}

/* htmlerror:
 * yyerror - called by yacc output
 */
void htmlerror(const char *msg)
{
    if (state.error)
	return;
    state.error = 1;
    agerr(AGERR, "%s in line %d \n", msg, htmllineno());
    error_context();
}

#ifdef HAVE_EXPAT
/* lexerror:
 * called by lexer when unknown <..> is found.
 */
static void lexerror(const char *name)
{
    state.tok = T_error;
    state.error = 1;
    agerr(AGERR, "Unknown HTML element <%s> on line %d \n",
	  name, htmllineno());
}

typedef int (*attrFn) (void *, char *);
typedef int (*bcmpfn) (const void *, const void *);

#define MAX_CHAR    (((unsigned char)(~0)) >> 1)
#define MIN_CHAR    ((signed char)(~MAX_CHAR))
#define MAX_UCHAR   ((unsigned char)(~0))
#define MAX_USHORT  ((unsigned short)(~0))

/* Mechanism for automatically processing attributes */
typedef struct {
    char *name;			/* attribute name */
    attrFn action;		/* action to perform if name matches */
} attr_item;

#define ISIZE (sizeof(attr_item))

/* icmp:
 * Compare two attr_item. Used in bsearch
 */
static int icmp(attr_item * i, attr_item * j)
{
    return strcasecmp(i->name, j->name);
}

static int bgcolorfn(htmldata_t * p, char *v)
{
    p->bgcolor = strdup(v);
    return 0;
}

static int pencolorfn(htmldata_t * p, char *v)
{
    p->pencolor = strdup(v);
    return 0;
}

static int hreffn(htmldata_t * p, char *v)
{
    p->href = strdup(v);
    return 0;
}

static int titlefn(htmldata_t * p, char *v)
{
    p->title = strdup(v);
    return 0;
}

static int portfn(htmldata_t * p, char *v)
{
    p->port = strdup(v);
    return 0;
}

static int stylefn(htmltbl_t * p, char *v)
{
    int rv = 0;
    char c = toupper(*v);
    if ((c == 'R') && !strcasecmp(v + 1, "OUNDED"))
	p->style = ROUNDED;
    else {
	agerr(AGWARN, "Illegal value %s for STYLE - ignored\n", v);
	rv = 1;
    }
    return rv;
}

static int targetfn(htmldata_t * p, char *v)
{
    p->target = strdup(v);
    return 0;
}

static int idfn(htmldata_t * p, char *v)
{
    p->id = strdup(v);
    return 0;
}

/* doInt:
 * Scan v for integral value. Check that
 * the value is >= min and <= max. Return value in ul.
 * String s is name of value.
 * Return 0 if okay; 1 otherwise.
 */
static int doInt(char *v, char *s, int min, int max, long *ul)
{
    int rv = 0;
    char *ep;
    long b = strtol(v, &ep, 10);

    if (ep == v) {
	agerr(AGWARN, "Improper %s value %s - ignored", s, v);
	rv = 1;
    } else if (b > max) {
	agerr(AGWARN, "%s value %s > %d - too large - ignored", s, v, max);
	rv = 1;
    } else if (b < min) {
	agerr(AGWARN, "%s value %s < %d - too small - ignored", s, v, min);
	rv = 1;
    } else
	*ul = b;
    return rv;
}

static int borderfn(htmldata_t * p, char *v)
{
    long u;

    if (doInt(v, "BORDER", 0, MAX_UCHAR, &u))
	return 1;
    p->border = (unsigned char) u;
    p->flags |= BORDER_SET;
    return 0;
}

static int cellpaddingfn(htmldata_t * p, char *v)
{
    long u;

    if (doInt(v, "CELLPADDING", 0, MAX_UCHAR, &u))
	return 1;
    p->pad = (unsigned char) u;
    p->flags |= PAD_SET;
    return 0;
}

static int cellspacingfn(htmldata_t * p, char *v)
{
    long u;

    if (doInt(v, "CELLSPACING", MIN_CHAR, MAX_CHAR, &u))
	return 1;
    p->space = (signed char) u;
    p->flags |= SPACE_SET;
    return 0;
}

static int cellborderfn(htmltbl_t * p, char *v)
{
    long u;

    if (doInt(v, "CELLSBORDER", 0, MAX_CHAR, &u))
	return 1;
    p->cb = (unsigned char) u;
    return 0;
}

static int fixedsizefn(htmldata_t * p, char *v)
{
    int rv = 0;
    char c = toupper(*(unsigned char *) v);
    if ((c == 'T') && !strcasecmp(v + 1, "RUE"))
	p->flags |= FIXED_FLAG;
    else if ((c != 'F') || strcasecmp(v + 1, "ALSE")) {
	agerr(AGWARN, "Illegal value %s for FIXEDSIZE - ignored\n", v);
	rv = 1;
    }
    return rv;
}

static int valignfn(htmldata_t * p, char *v)
{
    int rv = 0;
    char c = toupper(*v);
    if ((c == 'B') && !strcasecmp(v + 1, "OTTOM"))
	p->flags |= VALIGN_BOTTOM;
    else if ((c == 'T') && !strcasecmp(v + 1, "OP"))
	p->flags |= VALIGN_TOP;
    else if ((c != 'M') || strcasecmp(v + 1, "IDDLE")) {
	agerr(AGWARN, "Illegal value %s for VALIGN - ignored\n", v);
	rv = 1;
    }
    return rv;
}

static int halignfn(htmldata_t * p, char *v)
{
    int rv = 0;
    char c = toupper(*v);
    if ((c == 'L') && !strcasecmp(v + 1, "EFT"))
	p->flags |= HALIGN_LEFT;
    else if ((c == 'R') && !strcasecmp(v + 1, "IGHT"))
	p->flags |= HALIGN_RIGHT;
    else if ((c != 'C') || strcasecmp(v + 1, "ENTER")) {
	agerr(AGWARN, "Illegal value %s for ALIGN - ignored\n", v);
	rv = 1;
    }
    return rv;
}

static int cell_halignfn(htmldata_t * p, char *v)
{
    int rv = 0;
    char c = toupper(*v);
    if ((c == 'L') && !strcasecmp(v + 1, "EFT"))
	p->flags |= HALIGN_LEFT;
    else if ((c == 'R') && !strcasecmp(v + 1, "IGHT"))
	p->flags |= HALIGN_RIGHT;
    else if ((c == 'T') && !strcasecmp(v + 1, "EXT"))
	p->flags |= HALIGN_TEXT;
    else if ((c != 'C') || strcasecmp(v + 1, "ENTER"))
	rv = 1;
    if (rv)
	agerr(AGWARN, "Illegal value %s for ALIGN in TD - ignored\n", v);
    return rv;
}

static int balignfn(htmldata_t * p, char *v)
{
    int rv = 0;
    char c = toupper(*v);
    if ((c == 'L') && !strcasecmp(v + 1, "EFT"))
	p->flags |= BALIGN_LEFT;
    else if ((c == 'R') && !strcasecmp(v + 1, "IGHT"))
	p->flags |= BALIGN_RIGHT;
    else if ((c != 'C') || strcasecmp(v + 1, "ENTER"))
	rv = 1;
    if (rv)
	agerr(AGWARN, "Illegal value %s for BALIGN in TD - ignored\n", v);
    return rv;
}

static int heightfn(htmldata_t * p, char *v)
{
    long u;

    if (doInt(v, "HEIGHT", 0, MAX_USHORT, &u))
	return 1;
    p->height = (unsigned short) u;
    return 0;
}

static int widthfn(htmldata_t * p, char *v)
{
    long u;

    if (doInt(v, "WIDTH", 0, MAX_USHORT, &u))
	return 1;
    p->width = (unsigned short) u;
    return 0;
}

static int rowspanfn(htmlcell_t * p, char *v)
{
    long u;

    if (doInt(v, "ROWSPAN", 0, MAX_USHORT, &u))
	return 1;
    if (u == 0) {
	agerr(AGWARN, "ROWSPAN value cannot be 0 - ignored\n");
	return 1;
    }
    p->rspan = (unsigned short) u;
    return 0;
}

static int colspanfn(htmlcell_t * p, char *v)
{
    long u;

    if (doInt(v, "COLSPAN", 0, MAX_USHORT, &u))
	return 1;
    if (u == 0) {
	agerr(AGWARN, "COLSPAN value cannot be 0 - ignored\n");
	return 1;
    }
    p->cspan = (unsigned short) u;
    return 0;
}

static int fontcolorfn(htmlfont_t * p, char *v)
{
    p->color = strdup(v);
    return 0;
}

static int facefn(htmlfont_t * p, char *v)
{
    p->name = strdup(v);
    return 0;
}

static int ptsizefn(htmlfont_t * p, char *v)
{
    long u;

    if (doInt(v, "POINT-SIZE", 0, MAX_UCHAR, &u))
	return 1;
    p->size = (double) u;
    return 0;
}

static int srcfn(htmlimg_t * p, char *v)
{
    p->src = strdup(v);
    return 0;
}

static int scalefn(htmlimg_t * p, char *v)
{
    p->scale = strdup(v);
    return 0;
}

static int alignfn(int *p, char *v)
{
    int rv = 0;
    char c = toupper(*v);
    if ((c == 'R') && !strcasecmp(v + 1, "IGHT"))
	*p = 'r';
    else if ((c == 'L') || !strcasecmp(v + 1, "EFT"))
	*p = 'l';
    else if ((c == 'C') || strcasecmp(v + 1, "ENTER")) 
	*p = 'n';
    else {
	agerr(AGWARN, "Illegal value %s for ALIGN - ignored\n", v);
	rv = 1;
    }
    return rv;
}

/* Tables used in binary search; MUST be alphabetized */
static attr_item tbl_items[] = {
    {"align", (attrFn) halignfn},
    {"bgcolor", (attrFn) bgcolorfn},
    {"border", (attrFn) borderfn},
    {"cellborder", (attrFn) cellborderfn},
    {"cellpadding", (attrFn) cellpaddingfn},
    {"cellspacing", (attrFn) cellspacingfn},
    {"color", (attrFn) pencolorfn},
    {"fixedsize", (attrFn) fixedsizefn},
    {"height", (attrFn) heightfn},
    {"href", (attrFn) hreffn},
    {"id", (attrFn) idfn},
    {"port", (attrFn) portfn},
    {"style", (attrFn) stylefn},
    {"target", (attrFn) targetfn},
    {"title", (attrFn) titlefn},
    {"tooltip", (attrFn) titlefn},
    {"valign", (attrFn) valignfn},
    {"width", (attrFn) widthfn},
};

static attr_item cell_items[] = {
    {"align", (attrFn) cell_halignfn},
    {"balign", (attrFn) balignfn},
    {"bgcolor", (attrFn) bgcolorfn},
    {"border", (attrFn) borderfn},
    {"cellpadding", (attrFn) cellpaddingfn},
    {"cellspacing", (attrFn) cellspacingfn},
    {"color", (attrFn) pencolorfn},
    {"colspan", (attrFn) colspanfn},
    {"fixedsize", (attrFn) fixedsizefn},
    {"height", (attrFn) heightfn},
    {"href", (attrFn) hreffn},
    {"id", (attrFn) idfn},
    {"port", (attrFn) portfn},
    {"rowspan", (attrFn) rowspanfn},
    {"target", (attrFn) targetfn},
    {"title", (attrFn) titlefn},
    {"tooltip", (attrFn) titlefn},
    {"valign", (attrFn) valignfn},
    {"width", (attrFn) widthfn},
};

static attr_item font_items[] = {
    {"color", (attrFn) fontcolorfn},
    {"face", (attrFn) facefn},
    {"point-size", (attrFn) ptsizefn},
};

static attr_item img_items[] = {
    {"scale", (attrFn) scalefn},
    {"src", (attrFn) srcfn},
};

static attr_item br_items[] = {
    {"align", (attrFn) alignfn},
};

/* doAttrs:
 * General function for processing list of name/value attributes.
 * Do binary search on items table. If match found, invoke action
 * passing it tp and attribute value.
 * Table size is given by nel
 * Name/value pairs are in array atts, which is null terminated.
 * s is the name of the HTML element being processed.
 */
static void
doAttrs(void *tp, attr_item * items, int nel, char **atts, char *s)
{
    char *name;
    char *val;
    attr_item *ip;
    attr_item key;

    while ((name = *atts++) != NULL) {
	val = *atts++;
	key.name = name;
	ip = (attr_item *) bsearch(&key, items, nel, ISIZE, (bcmpfn) icmp);
	if (ip)
	    state.warn |= ip->action(tp, val);
	else {
	    agerr(AGWARN, "Illegal attribute %s in %s - ignored\n", name,
		  s);
	    state.warn = 1;
	}
    }
}

static void mkBR(char **atts)
{
    htmllval.i = UNSET_ALIGN;
    doAttrs(&htmllval.i, br_items, sizeof(br_items) / ISIZE, atts, "<BR>");
}

static htmlimg_t *mkImg(char **atts)
{
    htmlimg_t *img = NEW(htmlimg_t);

    doAttrs(img, img_items, sizeof(img_items) / ISIZE, atts, "<IMG>");

    return img;
}

static htmlfont_t *mkFont(char **atts, int flags, int ul)
{
    htmlfont_t *font = NEW(htmlfont_t);

    font->size = -1.0;		/* unassigned */
    font->flags = flags;
    if (atts)
	doAttrs(font, font_items, sizeof(font_items) / ISIZE, atts, "<FONT>");

    return font;
}

static htmlcell_t *mkCell(char **atts)
{
    htmlcell_t *cell = NEW(htmlcell_t);

    cell->cspan = 1;
    cell->rspan = 1;
    doAttrs(cell, cell_items, sizeof(cell_items) / ISIZE, atts, "<TD>");

    return cell;
}

static htmltbl_t *mkTbl(char **atts)
{
    htmltbl_t *tbl = NEW(htmltbl_t);

    tbl->rc = -1;		/* flag that table is a raw, parsed table */
    tbl->cb = -1;		/* unset cell border attribute */
    doAttrs(tbl, tbl_items, sizeof(tbl_items) / ISIZE, atts, "<TABLE>");

    return tbl;
}

static void startElement(void *user, const char *name, char **atts)
{
    if (strcasecmp(name, "TABLE") == 0) {
	htmllval.tbl = mkTbl(atts);
	state.inCell = 0;
	state.tok = T_table;
    } else if ((strcasecmp(name, "TR") == 0)
	       || (strcasecmp(name, "TH") == 0)) {
	state.inCell = 0;
	state.tok = T_row;
    } else if (strcasecmp(name, "TD") == 0) {
	state.inCell = 1;
	htmllval.cell = mkCell(atts);
	state.tok = T_cell;
    } else if (strcasecmp(name, "FONT") == 0) {
	htmllval.font = mkFont(atts, 0, 0);
	state.tok = T_font;
    } else if (strcasecmp(name, "B") == 0) {
	htmllval.font = mkFont(0, HTML_BF, 0);
	state.tok = T_bold;
    } else if (strcasecmp(name, "U") == 0) {
	htmllval.font = mkFont(0, HTML_UL, 1);
	state.tok = T_underline;
    } else if (strcasecmp(name, "I") == 0) {
	htmllval.font = mkFont(0, HTML_IF, 0);
	state.tok = T_italic;
    } else if (strcasecmp(name, "SUP") == 0) {
	htmllval.font = mkFont(0, HTML_SUP, 0);
	state.tok = T_sup;
    } else if (strcasecmp(name, "SUB") == 0) {
	htmllval.font = mkFont(0, HTML_SUB, 0);
	state.tok = T_sub;
    } else if (strcasecmp(name, "BR") == 0) {
	mkBR(atts);
	state.tok = T_br;
    } else if (strcasecmp(name, "IMG") == 0) {
	htmllval.img = mkImg(atts);
	state.tok = T_img;
    } else if (strcasecmp(name, "HTML") == 0) {
	state.tok = T_html;
    } else {
	lexerror(name);
    }
}

static void endElement(void *user, const char *name)
{
    if (strcasecmp(name, "TABLE") == 0) {
	state.tok = T_end_table;
	state.inCell = 1;
    } else if ((strcasecmp(name, "TR") == 0)
	       || (strcasecmp(name, "TH") == 0)) {
	state.tok = T_end_row;
    } else if (strcasecmp(name, "TD") == 0) {
	state.tok = T_end_cell;
	state.inCell = 0;
    } else if (strcasecmp(name, "HTML") == 0) {
	state.tok = T_end_html;
    } else if (strcasecmp(name, "FONT") == 0) {
	state.tok = T_end_font;
    } else if (strcasecmp(name, "B") == 0) {
	state.tok = T_n_bold;
    } else if (strcasecmp(name, "U") == 0) {
	state.tok = T_n_underline;
    } else if (strcasecmp(name, "I") == 0) {
	state.tok = T_n_italic;
    } else if (strcasecmp(name, "SUP") == 0) {
	state.tok = T_n_sup;
    } else if (strcasecmp(name, "SUB") == 0) {
	state.tok = T_n_sub;
    } else if (strcasecmp(name, "BR") == 0) {
	if (state.tok == T_br)
	    state.tok = T_BR;
	else
	    state.tok = T_end_br;
    } else if (strcasecmp(name, "IMG") == 0) {
	if (state.tok == T_img)
	    state.tok = T_IMG;
	else
	    state.tok = T_end_img;
    } else {
	lexerror(name);
    }
}

/* characterData:
 * Generate T_string token. Do this only when immediately in
 * <TD>..</TD> or <HTML>..</HTML>, i.e., when inCell is true.
 * Strip out formatting characters but keep spaces.
 * Distinguish between all whitespace vs. strings with non-whitespace
 * characters.
 */
static void characterData(void *user, const char *s, int length)
{
    int i, rc, cnt = 0;
    unsigned char c;

    if (state.inCell) {
	for (i = length; i; i--) {
	    c = *s++;
	    if (c >= ' ') {
		cnt++;
		rc = agxbputc(state.xb, c);
	    }
	}
	if (cnt) state.tok = T_string;
    }
}
#endif

int initHTMLlexer(char *src, agxbuf * xb, int charset)
{
#ifdef HAVE_EXPAT
    state.xb = xb;
    agxbinit (&state.lb, SMALLBUF, NULL);
    state.ptr = src;
    state.mode = 0;
    state.warn = 0;
    state.error = 0;
    state.currtoklen = 0;
    state.prevtoklen = 0;
    state.inCell = 1;
    state.parser = XML_ParserCreate(charsetToStr(charset));
    XML_SetElementHandler(state.parser,
			  (XML_StartElementHandler) startElement,
			  endElement);
    XML_SetCharacterDataHandler(state.parser, characterData);
    return 0;
#else
    static int first;
    if (!first) {
	agerr(AGWARN,
	      "Not built with libexpat. Table formatting is not available.\n");
	first++;
    }
    return 1;
#endif
}

int clearHTMLlexer()
{
#ifdef HAVE_EXPAT
    int rv = state.warn | state.error;
    XML_ParserFree(state.parser);
    agxbfree (&state.lb);
    return rv;
#else
    return 1;
#endif
}

#ifdef HAVE_EXPAT
/* eatComment:
 * Given first character after open comment, eat characters
 * upto comment close, returning pointer to closing > if it exists,
 * or null character otherwise.
 * We rely on HTML strings having matched nested <>.
 */
static char *eatComment(char *p)
{
    int depth = 1;
    char *s = p;
    char c;

    while (depth && (c = *s++)) {
	if (c == '<')
	    depth++;
	else if (c == '>')
	    depth--;
    }
    s--;			/* move back to '\0' or '>' */
    if (*s) {
	char *t = s - 2;
	if ((t < p) || strncmp(t, "--", 2)) {
	    agerr(AGWARN, "Unclosed comment\n");
	    state.warn = 1;
	}
    }
    return s;
}

/* findNext:
 * Return next XML unit. This is either <..>, an HTML 
 * comment <!-- ... -->, or characters up to next <.
 */
static char *findNext(char *s, agxbuf* xb)
{
    char* t = s + 1;
    char c;
    int rc;

    if (*s == '<') {
	if ((*t == '!') && !strncmp(t + 1, "--", 2))
	    t = eatComment(t + 3);
	else
	    while (*t && (*t != '>'))
		t++;
	if (*t != '>') {
	    agerr(AGWARN, "Label closed before end of HTML element\n");
	    state.warn = 1;
	} else
	    t++;
    } else {
	t = s;
	while ((c = *t) && (c != '<')) {
	    if ((c == '&') && (*(t+1) != '#')) {
		t = scanEntity(t + 1, xb);
	    }
	    else {
		rc = agxbputc(xb, c);
		t++;
	    }
	}
    }
    return t;
}
#endif

int htmllineno()
{
#ifdef HAVE_EXPAT
    return XML_GetCurrentLineNumber(state.parser);
#else
    return 0;
#endif
}

#ifdef DEBUG
static void printTok(int tok)
{
    char *s;

    switch (tok) {
    case T_BR:
	s = "T_BR";
	break;
    case T_br:
	s = "T_br";
	break;
    case T_end_br:
	s = "T_end_br";
	break;
    case T_end_table:
	s = "T_end_table";
	break;
    case T_row:
	s = "T_row";
	break;
    case T_end_row:
	s = "T_end_row";
	break;
    case T_end_cell:
	s = "T_end_cell";
	break;
    case T_html:
	s = "T_html";
	break;
    case T_end_html:
	s = "T_end_html";
	break;
    case T_string:
	s = "T_string";
	break;
    case T_error:
	s = "T_error";
	break;
    case T_table:
	s = "T_table";
	break;
    case T_cell:
	s = "T_cell";
	break;
    case T_img:
	s = "T_img";
	break;
    case T_end_img:
	s = "T_end_img";
	break;
    case T_IMG:
	s = "T_IMG";
	break;
    case T_underline:
	s = "T_underline";
	break;
    case T_n_underline:
	s = "T_underline";
	break;
    case T_italic:
	s = "T_italic";
	break;
    case T_n_italic:
	s = "T_italic";
	break;
    case T_bold:
	s = "T_bold";
	break;
    case T_n_bold:
	s = "T_bold";
	break;
    default:
	s = "<unknown>";
    }
    if (tok == T_string) {
	fprintf(stderr, "%s \"", s);
	fwrite(agxbstart(state.xb), 1, agxblen(state.xb), stderr);
	fprintf(stderr, "\"\n");
    } else
	fprintf(stderr, "%s\n", s);
}

#endif

int htmllex()
{
#ifdef HAVE_EXPAT
    static char *begin_html = "<HTML>";
    static char *end_html = "</HTML>";

    char *s;
    char *endp = 0;
    int len, llen;
    int rv;

    state.tok = 0;
    do {
	if (state.mode == 2)
	    return EOF;
	if (state.mode == 0) {
	    state.mode = 1;
	    s = begin_html;
	    len = strlen(s);
	    endp = 0;
	} else {
	    s = state.ptr;
	    if (*s == '\0') {
		state.mode = 2;
		s = end_html;
		len = strlen(s);
	    } else {
		endp = findNext(s,&state.lb);
		len = endp - s;
	    }
	}
	state.prevtok = state.currtok;
	state.prevtoklen = state.currtoklen;
	state.currtok = s;
	state.currtoklen = len;
	if ((llen = agxblen(&state.lb)))
	    rv = XML_Parse(state.parser, agxbuse(&state.lb),llen, 0);
	else
	    rv = XML_Parse(state.parser, s, len, (len ? 0 : 1));
	if (rv == XML_STATUS_ERROR) {
	    if (!state.error) {
		agerr(AGERR, "%s in line %d \n",
		      XML_ErrorString(XML_GetErrorCode(state.parser)),
		      htmllineno());
		error_context();
		state.error = 1;
		state.tok = T_error;
	    }
	}
	if (endp)
	    state.ptr = endp;
    } while (state.tok == 0);
    return state.tok;
#else
    return EOF;
#endif
}