Sophie

Sophie

distrib > Fedora > 13 > i386 > by-pkgid > b7d4776776c8e4296a0951083113f920 > files > 36

nickle-2.69-2.fc13.i686.rpm

/*
 * Parse simple sgml-style grammar
 *
 * Copyright © 2001 Keith Packard and Carl Worth
 * All Rights Reserved.  See the file COPYING in this directory
 * for licensing information.
 */

library "context.5c"

public namespace Lexc {
    public global int Eof = 0;
    public global int Printable = 1;
    public global int White = 2;
    public global int Left = 3;
    public global int Right = 4;
    public global int Slash = 5;
    
    public typedef struct {
	int id;
	int c;
    } char;
	
    public exception InvalidChar (int c);
    
    public char function get (file in)
    {
	char	    ch;

	if (File::end (in))
	    return (char) { c = 0, id = Eof };
	
	ch.c = File::getc (in);
	printf ("got %d\n", ch.c);
	if (ch.c == '<')
	    ch.id = Left;
	else if (ch.c == '>')
	    ch.id = Right;
	else if (ch.c == '/')
	    ch.id = Slash;
	else if (' ' < ch.c)
	    ch.id = Printable;
	else switch (ch.c) {
	case ' ':
	case '\t':
	case '\n':
	case '\r':
	    ch.id = White;
	    break;
	case -1:
	    ch.id = Eof;
	    break;
	default:
	    raise InvalidChar (ch.c);
	}
	return ch;
    }

    public void function unget (int c, file in)
    {
	File::ungetc (c, in);
    }
}
	    
public namespace Lex {

    public global int Eof = 0;
    public global int Text = 1;
    public global int Space = 2;
    public global int Start = 3;
    public global int End = 4;

    public typedef struct {
	int	id;
	string	value;
    } token;
    
    public exception Syntax (int c);

    global int	StateStart = 0;
    global int	StateSeenLeft = 1;
    global int	StateStartTag = 2;
    global int	StateEndTag = 3;
    global int	StateString = 4;
    global int	StateSpace = 5;

    public token function get (file in)
    {
	Lexc::char  c;
	token	    t;
	int	    state = StateStart;
	
	for (;;)
	{
	    c = Lexc::get (in);
	    switch (state) {
	    case StateStart:
		switch (c.id) {
		case Lexc::Eof:
		    return (token) { .id = Eof, .value = "" };
		case Lexc::Printable:
		case Lexc::Slash:
		    state = StateString;
		    t = (token) { .id = Text, .value = String::new (c.c) };
		    continue;
		case Lexc::White:
		    state = StateSpace;
		    t = (token) { .id = Space, .value = String::new (c.c) };
		    continue;
		case Lexc::Left:
		    state = StateSeenLeft;
		    continue;
		}
		break;
	    case StateSeenLeft:
		switch (c.id) {
		case Lexc::Printable:
		    state = StateStartTag;
		    t = (token) { .id = Start, .value = String::new (c.c) };
		    continue;
		case Lexc::Slash:
		    state = StateEndTag;
		    t = (token) { .id = End, .value = "" };
		    continue;
		}
		break;
	    case StateStartTag:
	    case StateEndTag:
		switch (c.id) {
		case Lexc::Printable:
		    t.value += String::new (c.c);
		    continue;
		case Lexc::Right:
		    return t;
		}
		break;
	    case StateString:
		switch (c.id) {
		case Lexc::Printable:
		    t.value += String::new (c.c);
		    continue;
		case Lexc::White:
		case Lexc::Left:
		    Lexc::unget (c.c, in);
		case Lexc::Eof:
		    return t;
		}
		break;
	    case StateSpace:
		switch (c.id) {
		case Lexc::White:
		    t.value += String::new (c.c);
		    continue;
		case Lexc::Printable:
		case Lexc::Left:
		    Lexc::unget (c.c, in);
		case Lexc::Eof:
		    return t;
		}
		break;
	    }
	    raise Syntax (c.c);
	}
    }
}

public namespace Parse {

    public exception Syntax (string token);

    public typedef element;
    
    public typedef struct {
	*element    next;
	string	    data;
	*context    ctxt;
    } element;

    public *element element_end = reference ((element){});
    
    public typedef c_list;
    public typedef struct {
	string   active_tag;
	*context ctxt;
	*c_list  prev;
    } c_list;

    public *c_list   c_list_end = reference ((c_list) {});

    function clist_show(*c_list c) {
	printf ("clist show...\n");
	while (c != c_list_end && c->ctxt != no_context) {
	    context_dump(*c->ctxt);
	    printf("\n");
	    c = c->prev;
	}
    }

    public *element function get (file in)
    {
	*element    first = element_end;
	* *element  p = &first;
	*c_list	    c = reference ( (c_list) { .ctxt=&root_context, .prev=c_list_end});

	for (;;)
	{
	    Lex::token  t = Lex::get (in);

	    printf ("lexed: %v\n", t);
	    switch (t.id) {
	    case Lex::Eof:
		if (c->ctxt != &root_context)
		    raise Syntax ("At Eof but not back to root context");
		return first;
	    case Lex::Text:
		*p = reference ((element) { 
		    .next = element_end, 
		    .data = t.value,
		    .ctxt = c->ctxt });
		p = &(*p)->next;
		break;
	    case Lex::Space:
		*p = reference ((element) { 
		    .next = element_end, 
		    .data = t.value,
		    .ctxt = c->ctxt });
		p = &(*p)->next;
		break;		
	    case Lex::Start:
		c = reference ((c_list) {
		    .ctxt = reference (*c->ctxt),
		    .prev = c,
		    .active_tag = t.value });
		context_set (c->ctxt, t.value);
		break;
	    case Lex::End:
		if (c->ctxt == &root_context)
		    raise Syntax ("Closing tag </"+t.value+"> encountered with no remaining open tags");
		if (t.value != c->active_tag)
		    raise Syntax ("Illegal closing tag </"+t.value+"> (expected </"+c->active_tag+">)");
		c = c->prev;
		break;
	    }
	}
    }

    public void function dump (*element e)
    {
	if (e != element_end)
	{
	    context_dump (*e->ctxt);
	    printf ("%v\n", e->data);
	    dump (e->next);
	}
    }
}