Sophie

Sophie

distrib > Mandriva > 9.1 > ppc > by-pkgid > b9ba69a436161613d8fb030c8c726a8e > files > 580

spirit-1.5.1-2mdk.noarch.rpm

/*=============================================================================
    Lexer

    Spirit V1.3
    Copyright (c) 2001, Daniel C. Nuffer

    This software is provided 'as-is', without any express or implied
    warranty. In no event will the copyright holder be held liable for
    any damages arising from the use of this software.

    Permission is granted to anyone to use this software for any purpose,
    including commercial applications, and to alter it and redistribute
    it freely, subject to the following restrictions:

    1.  The origin of this software must not be misrepresented; you must
        not claim that you wrote the original software. If you use this
        software in a product, an acknowledgment in the product documentation
        would be appreciated but is not required.

    2.  Altered source versions must be plainly marked as such, and must
        not be misrepresented as being the original software.

    3.  This notice may not be removed or altered from any source
        distribution.

    Acknowledgements:

        Special thanks to Dan Nuffer, John (EBo) David, Chris Uzdavinis,
        and Doug Gregor. These people are most instrumental in steering
        Spirit in the right direction.

        Special thanks also to people who have contributed to the code base
        and sample code, ported Spirit to various platforms and compilers,
        gave suggestions, reported and provided bug fixes. Alexander
        Hirner, Andy Elvey, Bogdan Kushnir, Brett Calcott, Bruce Florman,
        Changzhe Han, Colin McPhail, Hakki Dogusan, Jan Bares, Joseph
        Smith, Martijn W. van der Lee, Raghavendra Satish, Remi Delcos, Tom
        Spilman, Vladimir Prus, W. Scott Dillman, David A. Greene, Bob
        Bailey, Hartmut Kaiser.

        Finally special thanks also to people who gave feedback and
        valuable comments, particularly members of Spirit's Source Forge
        mailing list and boost.org.

    URL: http://spirit.sourceforge.net/


    TODO List:
        X callback objects (called when a match is made.)
        X    callback passed first & last iterator, and
                a reference to a lexercontrol object that supports some
                operations on the lexer.
                    set state
                    terminate
                    state stack (push, pop, top)
                    set new token return value
                    ignore the current token
                    yymore
                    get length of matched token
                    get current lexer state
            
        lexer states.
        organize the file into hpp and ipp. arrange the classes in a logical order.
        use buffering - iterator only needs be an input iterator,
           lexer & callback are not templatized on iterator type, but char type.
           next_token is templatized on iterator type. 
        beginning/ending contexts.
        ^ and $
        DFA minimization.
        DFA table compression.
        DFA serialization to save recomputing the DFA.

=============================================================================*/
#ifndef SPIRIT_LEXER_HPP
#define SPIRIT_LEXER_HPP

///////////////////////////////////////////////////////////////////////////////
#include "boost/spirit/core.hpp"
#include "boost/spirit/symbols/symbols.hpp"
#include "boost/spirit/utility/chset.hpp"
#include "boost/spirit/utility/escape_char.hpp"

#include <set>
#include <map>
#include <vector>
#include <stack>
#include <utility> // for pair
#include <iostream>
#include <fstream>
#include "boost/limits.hpp"

#if defined(BOOST_MSVC)
#include "boost/spirit/core/impl/msvc.hpp"
#endif

#if defined(BOOST_MSVC) && (BOOST_MSVC <= 1300)
#define SPIRIT_IT_NS impl
#else
#define SPIRIT_IT_NS std
#endif 

///////////////////////////////////////////////////////////////////////////////
namespace spirit
{

typedef unsigned char uchar;
typedef unsigned int node_id_t;
const node_id_t invalid_node = node_id_t(-1);
typedef std::set<node_id_t> node_set;
typedef std::vector<uchar> uchar_vector;
typedef std::map<node_id_t, node_set> followpos_t;
typedef std::vector<uchar_vector> state_match_t;

template <typename TokenT>
class lexer_control;

class bad_regex : public std::exception
{
};

namespace lexerimpl
{

class node
{
public:

    virtual ~node() {}

    virtual node* clone() const = 0;
    virtual bool nullable() const = 0;
    virtual node_set firstpos() const = 0;
    virtual node_set lastpos() const = 0;
    virtual void compute_followpos(followpos_t& followpos) const = 0;
    virtual void compute_state_match(state_match_t& state_match) const = 0;
    virtual void get_eof_ids(node_set& eof_set) const = 0;
    virtual void assign_node_ids(node_id_t& node_count) = 0;
#if defined(SPIRIT_DEBUG)
    virtual void dump(std::ostream& out) const = 0;
#endif

};

class char_node : public node
{

public:

    char_node(const uchar c);
    char_node(const char_node& x);
    virtual ~char_node(){}

    virtual node* clone() const;
    virtual bool nullable() const;
    virtual node_set firstpos() const;
    virtual node_set lastpos() const;
    virtual void compute_followpos(followpos_t& followpos) const;
    virtual void compute_state_match(state_match_t& state_match ) const;
    virtual void get_eof_ids(node_set& eof_set) const;
    virtual void assign_node_ids(node_id_t& node_count);
#if defined(SPIRIT_DEBUG)
    virtual void dump(std::ostream& out) const;
#endif

private:

    uchar m_char;
    node_id_t m_node_num;
};

inline
char_node::char_node(const uchar c)
    : node()
    , m_char(c)
    , m_node_num(0)
{
}

inline
char_node::char_node(const char_node& x)
    : node(x)
    , m_char(x.m_char)
    , m_node_num(x.m_node_num)
{
}

inline node * 
char_node::clone() const
{
    return new char_node(*this);
}

inline bool 
char_node::nullable() const
{
    return false;
}

inline node_set 
char_node::firstpos() const
{
    node_set rval;
    rval.insert(m_node_num);
    return rval;
}

inline node_set 
char_node::lastpos() const
{
    return firstpos();
}

inline void 
char_node::compute_followpos(followpos_t&) const
{
    return;
}

inline void
char_node::compute_state_match(state_match_t& state_match) const
{
    if (state_match.size() < m_node_num + 1)
        state_match.resize(m_node_num + 1);
    state_match[m_node_num].resize(256);
    state_match[m_node_num][m_char] = 1;
}

inline void 
char_node::get_eof_ids(node_set&) const
{
    return;
}

inline void 
char_node::assign_node_ids(node_id_t& node_count)
{
    m_node_num = node_count++;
}

#if defined(SPIRIT_DEBUG)
inline void 
char_node::dump(std::ostream& out) const
{
    out << "\nchar_node m_char = " << m_char;
    out << " m_node_num = " << m_node_num;
    out << " nullable() = " << (nullable() ? "true" : "false");
    out << " firstpos() = ";
    node_set fp = firstpos();
    std::copy(fp.begin(), fp.end(), 
            std::ostream_iterator<node_id_t>(out, ","));

    out << " lastpos() = ";
    node_set lp = lastpos();
    std::copy(lp.begin(), lp.end(), 
            std::ostream_iterator<node_id_t>(out, ","));
}
#endif


class epsilon_node : public node
{

public:

    epsilon_node();
    epsilon_node(const epsilon_node& x);
    virtual ~epsilon_node(){}

    virtual node* clone() const;
    virtual bool nullable() const;
    virtual node_set firstpos() const;
    virtual node_set lastpos() const;
    virtual void compute_followpos(followpos_t& followpos) const;
    virtual void compute_state_match(state_match_t& state_match ) const;
    virtual void get_eof_ids(node_set& eof_set) const;
    virtual void assign_node_ids(node_id_t& node_count);
#if defined(SPIRIT_DEBUG)
    virtual void dump(std::ostream& out) const;
#endif

private:

    node_id_t m_node_num;
};

inline
epsilon_node::epsilon_node()
    : node()
    , m_node_num(0)
{
}

inline 
epsilon_node::epsilon_node(const epsilon_node& x)
    : node(x)
    , m_node_num(x.m_node_num)
{
}

inline node * 
epsilon_node::clone() const
{
    return new epsilon_node(*this);
}

inline bool 
epsilon_node::nullable() const
{
    return true;
}

inline node_set 
epsilon_node::firstpos() const
{
    return node_set();
}

inline node_set 
epsilon_node::lastpos() const
{
    return node_set();
}

inline void 
epsilon_node::compute_followpos(followpos_t&) const
{
    return;
}

inline void
epsilon_node::compute_state_match(state_match_t& state_match) const
{
    if (state_match.size() < m_node_num + 1)
        state_match.resize(m_node_num + 1);
    state_match[m_node_num].resize(256, 1);
}

inline void 
epsilon_node::get_eof_ids(node_set&) const
{
    return;
}

inline void 
epsilon_node::assign_node_ids(node_id_t& node_count)
{
    m_node_num = node_count++;
}

#if defined(SPIRIT_DEBUG)
inline void 
epsilon_node::dump(std::ostream& out) const
{
    out << "\nepsilon_node";
    out << " m_node_num = " << m_node_num;
    out << " nullable() = " << (nullable() ? "true" : "false");
    out << " firstpos() = ";
    node_set fp = firstpos();
    std::copy(fp.begin(), fp.end(), 
            std::ostream_iterator<node_id_t>(out, ","));

    out << " lastpos() = ";
    node_set lp = lastpos();
    std::copy(lp.begin(), lp.end(), 
            std::ostream_iterator<node_id_t>(out, ","));
}
#endif


class or_node : public node
{

public:

    or_node(node* left, node* right);
    or_node(const or_node& x);
    virtual ~or_node(){}

    virtual node* clone() const;
    virtual bool nullable() const;
    virtual node_set firstpos() const;
    virtual node_set lastpos() const;
    virtual void compute_followpos(followpos_t& followpos) const;
    virtual void compute_state_match(state_match_t& state_match ) const;
    virtual void get_eof_ids(node_set& eof_set) const;
    virtual void assign_node_ids(node_id_t& node_count);
#if defined(SPIRIT_DEBUG)
    virtual void dump(std::ostream& out) const;
#endif

private:

    std::auto_ptr<node> m_left;
    std::auto_ptr<node> m_right;    
};

inline
or_node::or_node(node* left, node* right)
    : node()
    , m_left(left)
    , m_right(right)
{
}

inline 
or_node::or_node(const or_node& x)
    : node(x)
    , m_left(x.m_left->clone())
    , m_right(x.m_right->clone())
{
}

inline node * 
or_node::clone() const
{
    return new or_node(m_left->clone(), m_right->clone());
}

inline bool 
or_node::nullable() const
{
    return m_left->nullable() || m_right->nullable();
}

inline node_set 
or_node::firstpos() const
{
    node_set rval;
    node_set l = m_left->firstpos();
    node_set r = m_right->firstpos();
    std::set_union(l.begin(), l.end(), r.begin(), r.end(),
            std::inserter(rval, rval.begin()));
    return rval;
}

inline node_set 
or_node::lastpos() const
{
    node_set rval;
    node_set l = m_left->lastpos();
    node_set r = m_right->lastpos();
    std::set_union(l.begin(), l.end(), r.begin(), r.end(),
            std::inserter(rval, rval.begin()));
    return rval;
}

inline void 
or_node::compute_followpos(followpos_t& followpos) const
{
    m_left->compute_followpos(followpos);
    m_right->compute_followpos(followpos);
}

inline void
or_node::compute_state_match(state_match_t& state_match) const
{
    m_left->compute_state_match(state_match);
    m_right->compute_state_match(state_match);
}

inline void 
or_node::get_eof_ids(node_set& eof_nodes) const
{
    m_left->get_eof_ids(eof_nodes);
    m_right->get_eof_ids(eof_nodes);
}

inline void 
or_node::assign_node_ids(node_id_t& node_count)
{
    m_left->assign_node_ids(node_count);
    m_right->assign_node_ids(node_count);
}

#if defined(SPIRIT_DEBUG)
inline void 
or_node::dump(std::ostream& out) const
{
    m_left->dump(out);

    out << "\nor_node";
    out << " nullable() = " << (nullable() ? "true" : "false");
    out << " firstpos() = ";
    node_set fp = firstpos();
    std::copy(fp.begin(), fp.end(), 
            std::ostream_iterator<node_id_t>(out, ","));

    out << " lastpos() = ";
    node_set lp = lastpos();
    std::copy(lp.begin(), lp.end(), 
            std::ostream_iterator<node_id_t>(out, ","));

    m_right->dump(out);
}
#endif


class cat_node : public node
{

public:

    cat_node(node* left, node* right);
    cat_node(const cat_node& x);
    virtual ~cat_node(){}

    virtual node* clone() const;
    virtual bool nullable() const;
    virtual node_set firstpos() const;
    virtual node_set lastpos() const;
    virtual void compute_followpos(followpos_t& followpos) const;
    virtual void compute_state_match(state_match_t& state_match ) const;
    virtual void get_eof_ids(node_set& eof_set) const;
    virtual void assign_node_ids(node_id_t& node_count);
#if defined(SPIRIT_DEBUG)
    virtual void dump(std::ostream& out) const;
#endif

private:

    std::auto_ptr<node> m_left;
    std::auto_ptr<node> m_right;    
};

inline
cat_node::cat_node(node* left, node* right)
    : node()
    , m_left(left)
    , m_right(right)
{
}

inline 
cat_node::cat_node(const cat_node& x)
    : node(x)
    , m_left(x.m_left->clone())
    , m_right(x.m_right->clone())
{
}

inline node * 
cat_node::clone() const
{
    return new cat_node(m_left->clone(), m_right->clone());
}

inline bool 
cat_node::nullable() const
{
    return m_left->nullable() && m_right->nullable();
}

inline node_set 
cat_node::firstpos() const
{
    if (m_left->nullable())
    {
        node_set rval;
        node_set l = m_left->firstpos();
        node_set r = m_right->firstpos();
        std::set_union(l.begin(), l.end(), r.begin(), r.end(),
                std::inserter(rval, rval.begin()));
        return rval;
    }
    else
    {
        return m_left->firstpos();
    }
}

inline node_set 
cat_node::lastpos() const
{
    if (m_right->nullable())
    {
        node_set rval;
        node_set l = m_left->lastpos();
        node_set r = m_right->lastpos();
        std::set_union(l.begin(), l.end(), r.begin(), r.end(),
                std::inserter(rval, rval.begin()));
        return rval;
    }
    else
    {
        return m_right->lastpos();
    }
}

inline void 
cat_node::compute_followpos(followpos_t& followpos) const
{
    node_set l = m_left->lastpos();
    for (node_set::iterator i = l.begin();
            i != l.end();
            ++i)
    {
        node_set rf = m_right->firstpos();
        followpos[*i].insert(rf.begin(), rf.end());
    }

    m_left->compute_followpos(followpos);
    m_right->compute_followpos(followpos);
}

inline void
cat_node::compute_state_match(state_match_t& state_match) const
{
    m_left->compute_state_match(state_match);
    m_right->compute_state_match(state_match);
}

inline void 
cat_node::get_eof_ids(node_set& eof_nodes) const
{
    m_left->get_eof_ids(eof_nodes);
    m_right->get_eof_ids(eof_nodes);
}

inline void 
cat_node::assign_node_ids(node_id_t& node_count)
{
    m_left->assign_node_ids(node_count);
    m_right->assign_node_ids(node_count);
}

#if defined(SPIRIT_DEBUG)
inline void 
cat_node::dump(std::ostream& out) const
{
    m_left->dump(out);

    out << "\ncat_node";
    out << " nullable() = " << (nullable() ? "true" : "false");
    out << " firstpos() = ";
    node_set fp = firstpos();
    std::copy(fp.begin(), fp.end(), 
            std::ostream_iterator<node_id_t>(out, ","));

    out << " lastpos() = ";
    node_set lp = lastpos();
    std::copy(lp.begin(), lp.end(), 
            std::ostream_iterator<node_id_t>(out, ","));

    m_right->dump(out);
}
#endif


class star_node : public node
{

public:

    star_node(node* left);
    star_node(const star_node& x);
    virtual ~star_node(){}

    virtual node* clone() const;
    virtual bool nullable() const;
    virtual node_set firstpos() const;
    virtual node_set lastpos() const;
    virtual void compute_followpos(followpos_t& followpos) const;
    virtual void compute_state_match(state_match_t& state_match ) const;
    virtual void get_eof_ids(node_set& eof_set) const;
    virtual void assign_node_ids(node_id_t& node_count);
#if defined(SPIRIT_DEBUG)
    virtual void dump(std::ostream& out) const;
#endif

private:

    std::auto_ptr<node> m_left;
};

inline
star_node::star_node(node* left)
    : node()
    , m_left(left)
{
}

inline
star_node::star_node(const star_node& x)
    : node(x)
    , m_left(x.m_left->clone())
{
}

inline node * 
star_node::clone() const
{
    return new star_node(m_left->clone());
}

inline bool 
star_node::nullable() const
{
    return true;
}

inline node_set 
star_node::firstpos() const
{
    return m_left->firstpos();
}

inline node_set 
star_node::lastpos() const
{
    return m_left->lastpos();
}

inline void 
star_node::compute_followpos(followpos_t& followpos) const
{
    node_set lp = this->lastpos();
    for (node_set::iterator i = lp.begin();
            i != lp.end();
            ++i)
    {
        node_set fp = this->firstpos();
        followpos[*i].insert(fp.begin(), fp.end());
    }

    m_left->compute_followpos(followpos);
}

inline void
star_node::compute_state_match(state_match_t& state_match) const
{
    m_left->compute_state_match(state_match);
}

inline void 
star_node::get_eof_ids(node_set& eof_nodes) const
{
    m_left->get_eof_ids(eof_nodes);
}

inline void 
star_node::assign_node_ids(node_id_t& node_count)
{
    m_left->assign_node_ids(node_count);
}

#if defined(SPIRIT_DEBUG)
inline void 
star_node::dump(std::ostream& out) const
{
    m_left->dump(out);

    out << "\nstar_node";
    out << " nullable() = " << (nullable() ? "true" : "false");
    out << " firstpos() = ";
    node_set fp = firstpos();
    std::copy(fp.begin(), fp.end(), 
            std::ostream_iterator<node_id_t>(out, ","));

    out << " lastpos() = ";
    node_set lp = lastpos();
    std::copy(lp.begin(), lp.end(), 
            std::ostream_iterator<node_id_t>(out, ","));

}
#endif


class eof_node : public node
{

public:

    eof_node();
    eof_node(const eof_node& x);
    virtual ~eof_node(){}

    virtual node* clone() const;
    virtual bool nullable() const;
    virtual node_set firstpos() const;
    virtual node_set lastpos() const;
    virtual void compute_followpos(followpos_t& followpos) const;
    virtual void compute_state_match(state_match_t& state_match ) const;
    virtual void get_eof_ids(node_set& eof_set) const;
    virtual void assign_node_ids(node_id_t& node_count);
#if defined(SPIRIT_DEBUG)
    virtual void dump(std::ostream& out) const;
#endif

private:

    node_id_t m_node_num;
};

inline
eof_node::eof_node()
    : node()
    , m_node_num(0)
{
}

inline 
eof_node::eof_node(const eof_node& x)
    : node(x)
    , m_node_num(x.m_node_num)
{
}

inline node * 
eof_node::clone() const
{
    return new eof_node(*this);
}

inline bool 
eof_node::nullable() const
{
    return false;
}

inline node_set 
eof_node::firstpos() const
{
    node_set rval;
    rval.insert(m_node_num);
    return rval;
}

inline node_set 
eof_node::lastpos() const
{
    node_set rval;
    rval.insert(m_node_num);
    return rval;
}

inline void 
eof_node::compute_followpos(followpos_t&) const
{
    return;
}

inline void
eof_node::compute_state_match(state_match_t& state_match) const
{
    if (state_match.size() < m_node_num + 1)
        state_match.resize(m_node_num + 1);
    state_match[m_node_num].resize(256, 0);
}

inline void 
eof_node::get_eof_ids(node_set& eof_nodes) const
{
    eof_nodes.insert(m_node_num);
}

inline void 
eof_node::assign_node_ids(node_id_t& node_count)
{
    m_node_num = node_count++;
}

#if defined(SPIRIT_DEBUG)
inline void 
eof_node::dump(std::ostream& out) const
{
    out << "\neof_node";
    out << " m_node_num = " << m_node_num;
    out << " nullable() = " << (nullable() ? "true" : "false");
    out << " firstpos() = ";
    node_set fp = firstpos();
    std::copy(fp.begin(), fp.end(), 
            std::ostream_iterator<node_id_t>(out, ","));

    out << " lastpos() = ";
    node_set lp = lastpos();
    std::copy(lp.begin(), lp.end(), 
            std::ostream_iterator<node_id_t>(out, ","));
}
#endif

class ccl_node : public node
{

public:

    ccl_node(const std::vector<uchar>& v);
    ccl_node(const uchar c1, const uchar c2);
    ccl_node(const ccl_node& x);
    virtual ~ccl_node(){}

    virtual node* clone() const;
    virtual bool nullable() const;
    virtual node_set firstpos() const;
    virtual node_set lastpos() const;
    virtual void compute_followpos(followpos_t& followpos) const;
    virtual void compute_state_match(state_match_t& state_match ) const;
    virtual void get_eof_ids(node_set& eof_set) const;
    virtual void assign_node_ids(node_id_t& node_count);
#if defined(SPIRIT_DEBUG)
    virtual void dump(std::ostream& out) const;
#endif

private:

    std::vector<uchar> m_match;
    node_id_t m_node_num;
};

inline 
ccl_node::ccl_node(const std::vector<uchar>& v)
    : node()
    , m_match(v)
    , m_node_num(0)
{
    m_match.resize(256); // make sure it's the right size
}

inline 
ccl_node::ccl_node(const uchar c1, const uchar c2)
    : node()
    , m_match(256, 0)
    , m_node_num(0)
{
    SPIRIT_ASSERT(c1 < c2);
    for (size_t i = c1; i <= size_t(c2); ++i)
    {
        m_match[i] = 1;
    }
}

inline 
ccl_node::ccl_node(const ccl_node& x)
    : node(x)
    , m_match(x.m_match)
    , m_node_num(x.m_node_num)
{
}

inline node * 
ccl_node::clone() const
{
    return new ccl_node(*this);
}

inline bool 
ccl_node::nullable() const
{
    return false;
}

inline node_set 
ccl_node::firstpos() const
{
    node_set rval;
    rval.insert(m_node_num);
    return rval;
}

inline node_set 
ccl_node::lastpos() const
{
    return firstpos();
}

inline void 
ccl_node::compute_followpos(followpos_t&) const
{
    return;
}

inline void
ccl_node::compute_state_match(state_match_t& state_match) const
{
    if (state_match.size() < m_node_num + 1)
        state_match.resize(m_node_num + 1);
    state_match[m_node_num] = m_match;
}

inline void 
ccl_node::get_eof_ids(node_set&) const
{
    return;
}

inline void 
ccl_node::assign_node_ids(node_id_t& node_count)
{
    m_node_num = node_count++;
}

#if defined(SPIRIT_DEBUG)
inline void 
ccl_node::dump(std::ostream& out) const
{
    out << "\nccl_node m_match = ";
    for (size_t i = 0; i < m_match.size(); ++i)
    {
        if (m_match[i])
            out << i << ", ";
    }
    out << " m_node_num = " << m_node_num;
    out << " nullable() = " << (nullable() ? "true" : "false");
    out << " firstpos() = ";
    node_set fp = firstpos();
    std::copy(fp.begin(), fp.end(), 
            std::ostream_iterator<node_id_t>(out, ","));

    out << " lastpos() = ";
    node_set lp = lastpos();
    std::copy(lp.begin(), lp.end(), 
            std::ostream_iterator<node_id_t>(out, ","));
}
#endif

template <typename ScannerT>
class make_concat
{
    typedef typename ScannerT::iterator_t iterator_t;
    
public:

    make_concat(std::stack<node*>& the_stack)
        : m_stack(the_stack)
        {}

    void operator()(iterator_t const &, iterator_t const &) const
    {
        node* right = m_stack.top();
        m_stack.pop();
        node* left = m_stack.top();
        m_stack.pop();
        node* newnode = new cat_node(left, right);
        m_stack.push(newnode);
    }

    std::stack<node*>& m_stack;
};

template <int CharTSize>
struct get_byte_aux;

template<>
struct get_byte_aux<1>
{
    template <typename CharT>
    unsigned char operator()(CharT c, unsigned int byte)
    {
        SPIRIT_ASSERT(byte == 0);
        return c;
    }
};

template<>
struct get_byte_aux<2>
{
    template <typename CharT>
    unsigned char operator()(CharT c, unsigned int byte)
    {
        static unsigned long mask[] =
        {
            0xFF00,
            0x00FF
        };

        SPIRIT_ASSERT(byte < 2);
        return (c & mask[byte]) >> ((sizeof(c) - 1 - byte) * 8);
    }
};

template<>
struct get_byte_aux<4>
{
    template <typename CharT>
    unsigned char operator()(CharT c, unsigned int byte)
    {
        static unsigned long mask[] =
        {
            0xFF000000,
            0x00FF0000,
            0x0000FF00,
            0x000000FF
        };

        SPIRIT_ASSERT(byte < 4);
        return (c & mask[byte]) >> ((sizeof(c) - 1 - byte) * 8);
    }
};

template <typename CharT>
inline unsigned char 
get_byte(CharT c, unsigned int byte)
{
    return get_byte_aux<sizeof(c)>()(c, byte);
}

template <typename ScannerT>
class make_star
{
    typedef typename ScannerT::iterator_t iterator_t;
    
public:
    typedef 
        typename SPIRIT_IT_NS::iterator_traits<iterator_t>::value_type 
        char_t;

    make_star(std::stack<node*>& the_stack)
        : m_stack(the_stack)
        {}

    void operator()(const char_t) const
    {
        node* left = m_stack.top();
        m_stack.pop();
        node* newnode = new star_node(left);
        m_stack.push(newnode);
    }

    std::stack<node*>& m_stack;
};

template <typename ScannerT>
class make_or
{
    typedef typename ScannerT::iterator_t iterator_t;
    
public:

    make_or(std::stack<node*>& the_stack)
        : m_stack(the_stack)
        {}

    void operator()(iterator_t const&, iterator_t const&) const
    {
        node* right = m_stack.top();
        m_stack.pop();
        node* left = m_stack.top();
        m_stack.pop();
        node* newnode = new or_node(left, right);
        m_stack.push(newnode);
    }

    std::stack<node*>& m_stack;
};

template <typename ScannerT>
class make_plus
{
    typedef typename ScannerT::iterator_t iterator_t;
    
public:
    typedef 
        typename SPIRIT_IT_NS::iterator_traits<iterator_t>::value_type 
        char_t;

    make_plus(std::stack<node*>& the_stack)
        : m_stack(the_stack)
        {}

    void operator()(const char_t) const
    {
        node* left = m_stack.top();
        m_stack.pop();

        node* copy = left->clone();

        node* new_star = new star_node(copy);
        node* new_cat = new cat_node(left, new_star);
        m_stack.push(new_cat);
    }

    std::stack<node*>& m_stack;
};

template <typename ScannerT>
class make_optional
{
    typedef typename ScannerT::iterator_t iterator_t;
    
public:
    typedef 
        typename SPIRIT_IT_NS::iterator_traits<iterator_t>::value_type 
        char_t;

    make_optional(std::stack<node*>& the_stack)
        : m_stack(the_stack)
        {}

    void operator()(const char_t) const
    {
        node* left = m_stack.top();
        m_stack.pop();

        node* new_or = new or_node(left, new epsilon_node());
        m_stack.push(new_or);
    }

    std::stack<node*>& m_stack;
};

///////////////////////////////////////////////////////////////////////////////
//  utility function
template <typename CharT>
inline impl::range<CharT> const&
full_range()
{
    static impl::range<CharT> full(std::numeric_limits<CharT>::min(),
        std::numeric_limits<CharT>::max());
    return full;
}

namespace ccl_utils
{
    template <typename char_t>
    inline impl::range_run<char_t> 
    negate_range_run(
            const impl::range_run<char_t>& rr)
    {
        impl::range_run<char_t> newrr;
        newrr.set(full_range<char_t>());
        for (typename impl::range_run<char_t>::const_iterator iter = rr.begin(); 
                iter != rr.end(); ++iter)
            newrr.clear(*iter);
        return newrr;
    }

    template <typename char_t>
    inline node* 
    create_mb_node_seq(char_t c)
    {
        node* newnode = new char_node(get_byte(c, 0));
        for (unsigned int i = 1; i < sizeof(c); ++i)
        {
            node* cnode = new char_node(get_byte(c, i));
            node* top_node = new cat_node(newnode, cnode);
            newnode = top_node;
        }
        return newnode;
    }

    template <typename char_t>
    inline void 
    handle_mb_char(char_t c, bool first_time,
            std::stack<node*>& stack)
    {
        node* newnode = create_mb_node_seq(c);

        if (first_time)
        {
            stack.push(newnode);
        }
        else
        {
            node* top = stack.top();
            stack.pop();

            node* newtop = new or_node(top, newnode);
            stack.push(newtop);
        }
    }

    // forward decl only
    template <typename char_t>
    inline void 
    handle_mb_range(char_t c1, char_t c2, bool first_time,
            std::stack<node*>& stack);

    template <typename char_t>
    inline void 
    create_nodes(const impl::range_run<char_t>& rr, 
            std::stack<node*>& stack) 
    {

        if (sizeof(char_t) == 1)
        {
            std::vector<uchar> ccl;
            ccl.resize(256);
            for (typename impl::range_run<char_t>::const_iterator iter = rr.begin(); 
                    iter != rr.end(); ++iter)
            {
                for (int i = iter->first; i <= iter->last; ++i)
                {
                    SPIRIT_ASSERT(uchar(i) < 256 && ccl.size() == 256);
                    ccl[uchar(i)] = 1;
                }
            }

            node* new_ccl = new ccl_node(ccl);
            stack.push(new_ccl);
        }
        else
        {
            bool mb_first_time = true;
            for (typename impl::range_run<char_t>::const_iterator iter = rr.begin(); 
                    iter != rr.end(); ++iter)
            {
                if (iter->first == iter->last)
                {
                    handle_mb_char(iter->first, mb_first_time, stack);
                }
                else
                {
                    handle_mb_range(iter->first, iter->last, mb_first_time, stack);
                }
                mb_first_time = false;
            }
        }
    }

    template <typename char_t>
    inline size_t 
    compute_differing_byte(char_t c1, char_t c2)
    {
        size_t rval = 0;
        while (rval < sizeof(c1) && get_byte(c1, rval) == get_byte(c2, rval))
           ++rval;

        return rval; 
    }

    template <typename char_t>
    inline node* 
    create_mb_node_type1(size_t j, char_t c1, char_t c2)
    {
        size_t diff = get_byte(c2, j) - get_byte(c1, j);
        if (diff == 1)
            return 0;
        else if (diff == 2)
            return new char_node(get_byte(c1, j)+1);
        else 
            return new ccl_node(get_byte(c1, j)+1, get_byte(c2, j)-1);
    }

    template <typename char_t>
    inline node * 
    create_mb_node_for_byte(size_t i, size_t j, size_t sizem1,
            size_t differing_byte, char_t c1, char_t c2, node* newnode)
    {
        node* cnode;
        if (i == sizem1 && j == differing_byte && j != sizem1)
        {
            node* tmp = create_mb_node_type1(j, c1, c2);
            if (tmp == 0)
            {
                delete newnode;
                return 0;
            }
            else
                cnode = tmp;
        }

        else if (i == differing_byte && j == sizem1)
        {
            if (i != sizem1)
                cnode = new ccl_node(get_byte(c1, j), 0xFF);
            else
                cnode = new ccl_node(get_byte(c1, j), get_byte(c2, j));
        }

        else if (i != differing_byte && i != sizem1 && 
                j == (sizem1 - i + differing_byte))
            cnode = new ccl_node(get_byte(c1, j)+1, 0xFF);

        else if (i + j - differing_byte > sizem1)
            cnode = new ccl_node(0, 0xFF);

        else //if (is plain)
            cnode = new char_node(get_byte(c1, j));


        node* top_node = new cat_node(newnode, cnode);
        return top_node;
    }

// On platforms, where wchar_t is a typedef for unsigned short, the 
// comparision for a negative value is pointless
    template <bool is_signed>
    struct correct_char_aux {
    };
        
    template <>
    struct correct_char_aux<true> {
    
        template <typename char_t>
        static char_t correct(char_t c) { if (c < 0) c = 0; return c; }
    };

    template <>
    struct correct_char_aux<false> {
    
        template <typename char_t>
        static char_t correct(char_t c) { return c; }
    };

    template <typename char_t>
    struct correct_char
    {
        static char_t correct(char_t c)
        {
            return correct_char_aux<std::numeric_limits<char_t>::is_signed >::
                correct(c);
        }
    };

    template <typename char_t>
    inline void 
    handle_mb_range(char_t c1, char_t c2, bool first_time,
            std::stack<node*>& stack)
    {
        // The algorithm can't handle negative value chars, which don't make
        // much sense anyway. This comparision is pointless for wchar_t's on 
        // platforms, where wchar_t is a typedef for unsigned short

        c1 = correct_char<char_t>::correct(c1);
        //if (c1 < 0)
        //    c1 = 0;
        
        SPIRIT_ASSERT(c1 < c2);
        node* newnode = 0;
        node* savednode = 0;
        const size_t differing_byte = compute_differing_byte(c1, c2);
        const size_t sizem1 = sizeof(c1) - 1;
        const size_t ndb = sizem1 - differing_byte;
        for (size_t i = differing_byte; i < sizeof(c1); ++i)
        {
            // generate node for the first byte
            if (differing_byte == 0 && i == ndb)
            {
                node* tmp = create_mb_node_type1(0, c1, c2);
                if (tmp == 0)
                    continue;
                else
                    newnode = tmp;
            }
            else
            {
                newnode = new char_node(get_byte(c1, 0));
            }
            for (size_t j = 1; j < sizeof(c1); ++j)
            {
                newnode = create_mb_node_for_byte(i, j, sizem1, differing_byte,
                        c1, c2, newnode);
                if (newnode == 0)
                    goto end_outer_for;
            }

            // or together the various parts
            if (savednode)
            {
                node* top_node = new or_node(savednode, newnode);
                savednode = top_node;
            }
            else
            {
                savednode = newnode;
            }
end_outer_for:
            continue;
        }

        for (size_t k = 0; k < ndb; ++k)
        {
            newnode = new char_node(get_byte(c2, 0));
            for (size_t j = 1; j < sizeof(c2); ++j)
            {
                node* cnode;
                if (k == differing_byte && j == sizem1 && k != sizem1)
                    cnode = new ccl_node(0, get_byte(c2, j));

                else if (k != differing_byte && k != sizem1 && 
                        j == (sizem1 - k + differing_byte))
                    cnode = new ccl_node(0, get_byte(c2, j)-1);

                else if (k + j - differing_byte > sizem1)
                    cnode = new ccl_node(0, 0xFF);

                else //if (is plain)
                    cnode = new char_node(get_byte(c2, j));


                node* top_node = new cat_node(newnode, cnode);
                newnode = top_node;
            }

            // or together the various parts
            if (savednode)
            {
                node* top_node = new or_node(savednode, newnode);
                savednode = top_node;
            }
            else
            {
                savednode = newnode;
            }
        }

        
        if (first_time)
        {
            stack.push(savednode);
        }
        else
        {
            node* top = stack.top();
            stack.pop();

            node* newtop = new or_node(top, savednode);
            stack.push(newtop);
        }
    }
} // namespace ccl_utils

template <typename ScannerT>
class make_char
{
    typedef typename ScannerT::iterator_t iterator_t;
    
public:
    typedef 
        typename SPIRIT_IT_NS::iterator_traits<iterator_t>::value_type 
        char_t;

    make_char(std::stack<node*>& the_stack)
        : m_stack(the_stack)
        {}

    void operator()(iterator_t const& first, iterator_t const& last) const
    {
        const escape_char_parser<lex_escapes, char_t> lex_escape_ch =
            escape_char_parser<lex_escapes, char_t>();
        char_t the_char;
        iterator_t first_ = first;
        ScannerT scan(first_, last);
        lex_escape_ch[assign(the_char)].parse(scan);
        node* newnode = ccl_utils::create_mb_node_seq(the_char);
        m_stack.push(newnode);
    }

    std::stack<node*>& m_stack;
};


template <typename ScannerT>
class make_ccl
{
    typedef typename ScannerT::iterator_t iterator_t;
    
public:
    typedef 
        typename SPIRIT_IT_NS::iterator_traits<iterator_t>::value_type 
        char_t;

    make_ccl(std::stack<node*>& the_stack)
        : m_stack(the_stack)
        {}

    static bool is_equal_to_string(iterator_t first, 
        iterator_t const & last, const char* str)
    {
        while (first != last &&*str &&*first ==*str)
        {
            ++first;
            ++str;
        }
        return*str == 0;
    }

    template <typename ParserT>
    static void fill_ccl(impl::range_run<char_t>& rr, const ParserT& parser)
    {
        for (int i = 0; i < 256; ++i)
        {
            if (parser.test(uchar(i)))
                rr.set(impl::range<char_t>(i, i));
        }
    }

    void operator()(iterator_t const& first_, iterator_t const& last) const
    {
        SPIRIT_ASSERT(*first_ == '[');

        iterator_t first = first_;
        ++first; // skip over '['
        bool negated_ccl = false;
        if (*first == '^')
        {
            negated_ccl = true;
            ++first;
        }

        impl::range_run<char_t> rr;
        while (first != last &&*first != ']')
        {
            if (*first == '[') // it's a ccl_expr like [:space:]
            {
                // check for [:space:], etc.
                if (is_equal_to_string(first, last, "[:alnum:]"))
                {
                    fill_ccl(rr, alnum_p);
                }
                else if (is_equal_to_string(first, last, "[:alpha:]"))
                {
                    fill_ccl(rr, alpha_p);
                }
                else if (is_equal_to_string(first, last, "[:blank:]"))
                {
                    fill_ccl(rr, blank_p);
                }
                else if (is_equal_to_string(first, last, "[:cntrl:]"))
                {
                    fill_ccl(rr, cntrl_p);
                }
                else if (is_equal_to_string(first, last, "[:digit:]"))
                {
                    fill_ccl(rr, digit_p);
                }
                else if (is_equal_to_string(first, last, "[:graph:]"))
                {
                    fill_ccl(rr, graph_p);
                }
                else if (is_equal_to_string(first, last, "[:lower:]"))
                {
                    fill_ccl(rr, lower_p);
                }
                else if (is_equal_to_string(first, last, "[:print:]"))
                {
                    fill_ccl(rr, print_p);
                }
                else if (is_equal_to_string(first, last, "[:punct:]"))
                {
                    fill_ccl(rr, punct_p);
                }
                else if (is_equal_to_string(first, last, "[:space:]"))
                {
                    fill_ccl(rr, space_p);
                }
                else if (is_equal_to_string(first, last, "[:upper:]"))
                {
                    fill_ccl(rr, upper_p);
                }
                else if (is_equal_to_string(first, last, "[:xdigit:]"))
                {
                    fill_ccl(rr, xdigit_p);
                }
                // this can't happen, because it's parsed before we get here.
                //else
                //    throw bad_regex();

                // Advance past the character class expression
                while (first != last &&*first != ']')
                    ++first;
                SPIRIT_ASSERT(*first == ']');
                ++first;
            }
            else {
                const escape_char_parser<lex_escapes, char_t> lex_escape_ch =
                    escape_char_parser<lex_escapes, char_t>();
                
                char_t c1;
                ScannerT scan(first, last);
                lex_escape_ch[assign(c1)].parse(scan);
                if (*scan.first == '-') // insert a range
                {
                    ++scan.first;
                    char_t c2;
                    lex_escape_ch[assign(c2)].parse(scan);
                    SPIRIT_ASSERT(c1 < c2); // Throw exception?
                    rr.set(impl::range<char_t>(c1, c2));
                }
                else // insert 1 char
                {
                    rr.set(impl::range<char_t>(c1, c1));
                }
            }
        }

        if (negated_ccl)
        {
            rr = ccl_utils::negate_range_run(rr);
        }

        ccl_utils::create_nodes(rr, m_stack);
    }

    std::stack<node*>& m_stack;
};

template <typename ScannerT>
class make_any_char
{
    typedef typename ScannerT::iterator_t iterator_t;
    
public:
    typedef 
        typename SPIRIT_IT_NS::iterator_traits<iterator_t>::value_type 
        char_t;

    std::stack<node*>& m_stack;

    make_any_char(std::stack<node*>& the_stack)
        : m_stack(the_stack)
        {}

    void operator()(const char_t c) const
    {
        SPIRIT_ASSERT(c == '.');
        do_any_char();
    }

    void do_any_char() const
    {
        static impl::range_run<char_t> rr;
        rr.set(full_range<char_t>());
        char_t newline = '\n';
        rr.clear(impl::range<char_t>(newline, newline));

        ccl_utils::create_nodes(rr, m_stack);
    }
};

template <typename ScannerT>
class make_string
{
    typedef typename ScannerT::iterator_t iterator_t;
    
public:
    typedef 
        typename SPIRIT_IT_NS::iterator_traits<iterator_t>::value_type 
        char_t;

    std::stack<node*>& m_stack;

    make_string(std::stack<node*>& the_stack)
        : m_stack(the_stack)
        {}

    void operator()(iterator_t const& first, iterator_t const& last) const
    {
        SPIRIT_ASSERT(*first == '"');

        iterator_t first_ = first;
        ScannerT scan(first_, last);
        ++scan.first; // skip over '"'

        // empty string not allowed
        if (*scan.first == '"')
        {
            throw bad_regex();
        }

        const escape_char_parser<lex_escapes, char_t> lex_escape_ch =
            escape_char_parser<lex_escapes, char_t>();

        char_t c;
        lex_escape_ch[assign(c)].parse(scan);
        node* top_node = ccl_utils::create_mb_node_seq(c);

        while (*scan.first != '"' && scan.first != scan.last)
        {
            lex_escape_ch[assign(c)].parse(scan);
            node* cur_node = ccl_utils::create_mb_node_seq(c);
            top_node = new cat_node(top_node, cur_node);
        }
        m_stack.push(top_node);
    }
};

inline 
node* repeat_node(node* n, int num)
{
    node* list_of_nodes = n;
    for (int i = 1; i < num; ++i)
    {
        list_of_nodes = new cat_node(list_of_nodes, n->clone());
    }
    return list_of_nodes;
}

inline 
node* optional_node(node* n)
{
    return new or_node(n, new epsilon_node());
}

template <typename ScannerT>
class make_rep1
{
    typedef typename ScannerT::iterator_t iterator_t;
    
public:
    typedef 
        typename SPIRIT_IT_NS::iterator_traits<iterator_t>::value_type 
        char_t;

    std::stack<node*>& m_stack;

    make_rep1(std::stack<node*>& the_stack)
        : m_stack(the_stack)
        {}

    void operator()(iterator_t const& first, iterator_t const& last) const
    {
        SPIRIT_ASSERT(*first == '{');

        iterator_t first_ = first;
        ScannerT scan(first_, last);
        ++scan.first; // skip over '{'

        unsigned int count;
        uint_p[assign(count)].parse(scan);
        if (count == 0)
            throw bad_regex();

        node* top_node = m_stack.top();
        m_stack.pop();
        top_node = repeat_node(top_node, count);
        m_stack.push(top_node);
    }
};

template <typename ScannerT>
class make_rep2
{
    typedef typename ScannerT::iterator_t iterator_t;
    
public:
    typedef 
        typename SPIRIT_IT_NS::iterator_traits<iterator_t>::value_type 
        char_t;

    std::stack<node*>& m_stack;

    make_rep2(std::stack<node*>& the_stack)
        : m_stack(the_stack)
        {}

    void operator()(iterator_t const& first, iterator_t const& last) const
    {
        SPIRIT_ASSERT(*first == '{');

        iterator_t first_ = first;
        ScannerT scan (first_, last);
        ++scan.first; // skip over '{'

        unsigned int count;
        uint_p[assign(count)].parse(scan);
        if (count == 0)
            throw bad_regex();

        node* top_node = m_stack.top();
        m_stack.pop();
        top_node = new cat_node(repeat_node(top_node, count), 
                new star_node(top_node->clone()));
        m_stack.push(top_node);

    }
};

template <typename ScannerT>
class make_rep3
{
    typedef typename ScannerT::iterator_t iterator_t;
    
public:
    typedef 
        typename SPIRIT_IT_NS::iterator_traits<iterator_t>::value_type 
        char_t;

    std::stack<node*>& m_stack;

    make_rep3(std::stack<node*>& the_stack)
        : m_stack(the_stack)
        {}

    void operator()(iterator_t const& first, iterator_t const& last) const
    {
        SPIRIT_ASSERT(*first == '{');

        iterator_t first_ = first;
        ScannerT scan(first_, last);
        ++scan.first; // skip over '{'
        
        unsigned int count1, count2;
        uint_p[assign(count1)].parse(scan);
        if (count1 == 0)
            throw bad_regex();

        ++scan.first; // skip over ','

        uint_p[assign(count2)].parse(scan);
        if (count2 <= count1)
            throw bad_regex();

        node* top_node = m_stack.top();
        m_stack.pop();
        node* repeats = repeat_node(top_node, count1);
        top_node = new cat_node(repeats, 
                repeat_node(optional_node(top_node->clone()), 
                    count2 - count1));

        m_stack.push(top_node);
    }
};

///////////////////////////////////////////////////////////////////////////////
//  
//  Lexer grammar
//
//      Defines the grammar, which mandates the syntax of the understood
//      lexeme definitions passed to lexer::register_regex.
//
///////////////////////////////////////////////////////////////////////////////
class lexer_grammar : public spirit::grammar<lexer_grammar>
{
public:
    lexer_grammar(std::stack<node*> &node_stack_) 
    : node_stack(node_stack_) {}
    
    template <typename ScannerT>
    struct definition
    {
        typedef rule<ScannerT> rule_t;
        typedef typename ScannerT::iterator_t iterator_t;
        typedef 
            typename SPIRIT_IT_NS::iterator_traits<iterator_t>::value_type 
            char_t;
        
        rule_t regex, re, series, singleton, singleton2, fullccl, ccl, string, 
            escseq, ccl_char;
        symbols<> ccl_expr;

        definition(lexer_grammar const &self)
        {
            regex = 
                    re >> !('/' >> re) >> !ch_p('$')
                ;

            re =
                    series 
                >>*( ('|' >> series)[make_or<ScannerT>(self.node_stack)] )
                ;

            series =
                    singleton 
                >>*( singleton[make_concat<ScannerT>(self.node_stack)] )
                ;

            singleton = 
                    ch_p('.')[make_any_char<ScannerT>(self.node_stack)] 
                    >>  singleton2
                |   fullccl 
                    >>  singleton2
                |   ('"' >> string >> '"')
                    [
                        make_string<ScannerT>(self.node_stack)
                    ] 
                    >>  singleton2
                |   '(' >> re >> ')' 
                    >>  singleton2
                |   ((anychar_p - chset<>("/|*+?.(){}\\")) | escseq)
                    [
                        make_char<ScannerT>(self.node_stack)
                    ] 
                    >>  singleton2
                ;

            singleton2 =
                    ch_p('*')[make_star<ScannerT>(self.node_stack)] 
                    >> singleton2
                |   ch_p('+')[make_plus<ScannerT>(self.node_stack)] 
                    >> singleton2
                |   ch_p('?')[make_optional<ScannerT>(self.node_stack)] 
                    >> singleton2
                |   ('{' >> uint_p >> '}')
                    [
                        make_rep1<ScannerT>(self.node_stack)
                    ] 
                    >>  singleton2
                |   ('{' >> uint_p >> ',' >> '}')
                    [
                        make_rep2<ScannerT>(self.node_stack)
                    ] 
                    >>  singleton2
                |   ('{' >> uint_p >> ',' >> uint_p >> '}')
                    [
                        make_rep3<ScannerT>(self.node_stack)
                    ] 
                    >> singleton2
                |   epsilon_p
                ;

            fullccl =
                    ('[' >> !ch_p('^') >> ccl >> ']')
                    [
                        make_ccl<ScannerT>(self.node_stack)
                    ]
                ;

            ccl =
                   *(ccl_expr | (ccl_char >> !('-' >> ccl_char)))
                ;

            ccl_char =
                    ( (anychar_p - chset<>("\\\n]")) | escseq )
                ;

            ccl_expr =
                    "[:alnum:]",
                    "[:alpha:]",
                    "[:blank:]",
                    "[:cntrl:]",
                    "[:digit:]",
                    "[:graph:]",
                    "[:lower:]",
                    "[:print:]",
                    "[:punct:]",
                    "[:space:]",
                    "[:upper:]",
                    "[:xdigit:]"
                ;
                
            string =
                    +( (anychar_p - chset<>("\"\\")) | escseq )
                ;

            typedef 
                uint_parser<char_t, 8, 1, 
                    std::numeric_limits<char_t>::digits / 3 + 1
                > oct_parser_t;
            typedef 
                uint_parser<char_t, 16, 1, 
                    std::numeric_limits<char_t>::digits / 4 + 1
                > hex_parser_t;

            escseq =
                    ch_p('\\')
                    >>  (
                            oct_parser_t()
                        |   nocase_d['x'] >> hex_parser_t()
                        |   (anychar_p - chset<>('\n'))
                        )
                ;

    #if defined(SPIRIT_DEBUG)
            SPIRIT_DEBUG_RULE(regex);
            SPIRIT_DEBUG_RULE(re);
            SPIRIT_DEBUG_RULE(series);
            SPIRIT_DEBUG_RULE(singleton);
            SPIRIT_DEBUG_RULE(singleton2);
            SPIRIT_DEBUG_RULE(fullccl);
            SPIRIT_DEBUG_RULE(ccl);
            SPIRIT_DEBUG_RULE(string);
            SPIRIT_DEBUG_RULE(escseq);
            SPIRIT_DEBUG_RULE(ccl_char);
    #endif 
        }

        rule<ScannerT> const&
        start() const { return regex; }
    };
    
    std::stack<node*> &node_stack;

}; // class lexer_grammar

template <typename StringT>
inline node * 
parse(lexer_grammar& g, StringT const& str)
{
    typedef 
        scanner<typename StringT::const_iterator, scanner_policies<> > 
        scanner_t;
    
    typename StringT::const_iterator first = str.begin();
    typename StringT::const_iterator last = str.end();
    
    scanner_t scan(first, last);
    typename rule<scanner_t>::result_t hit = g.parse(scan);
    if (!hit || !scan.at_end())
    {
        while (g.node_stack.size())
        {
            delete g.node_stack.top();
            g.node_stack.pop();
        }
        return 0;
    }

    SPIRIT_ASSERT(g.node_stack.size() == 1);
    node* rval = g.node_stack.top();
    g.node_stack.pop();
    node* an_eof_node = new eof_node();
    rval = new cat_node(rval, an_eof_node);
    return rval;
}

inline
void make_case_insensitive(state_match_t& state_match)
{
    // TODO: Fix this.
    // This doesn't take into account foreign languages, figure out how to
    // do that. Also this approach is broken for this implementation of
    // wide chars.
    for (state_match_t::iterator iter = state_match.begin();
            iter != state_match.end(); ++iter)
    {
        int i, j;
        for (i = 'A', j = 'a'; i <= 'Z'; ++i, ++j)
        {
            // if either is set, turn them both on
            (*iter)[i] = (*iter)[j] = (*iter)[i] | (*iter)[j];
        }
    }
}

template<bool wide_char>
struct regex_match_helper;

template<>
struct regex_match_helper<false> // single byte char
{
    template <typename DfaT, typename IteratorT>
    static bool 
    do_match(DfaT const& dfa, IteratorT &first, IteratorT const& last, 
        int& regex_index)
    {
        node_id_t s = 0;
        node_id_t last_accepting_index = invalid_node;
        IteratorT p = first;
        IteratorT last_accepting_cpos = first;
        while (p != last)
        {
            s = dfa.transition_table[s][(uchar)*p];
            if (s == invalid_node)
                break;
            ++p;
            if (dfa.acceptance_index[s] != invalid_node)
            {
                last_accepting_index = s;
                last_accepting_cpos = p;
            }
        }
        if (last_accepting_index != invalid_node)
        {
#if defined(SPIRIT_DEBUG)
            std::cout << "dfa.acceptance_index[" << last_accepting_index << "] = " << 
                dfa.acceptance_index[last_accepting_index] << '\n';
#endif

            first = last_accepting_cpos;
            regex_index = dfa.acceptance_index[last_accepting_index];
            return true;
        }
        else
            return false;
    }
};

template<>
struct regex_match_helper<true> // wide char
{
    template <typename DfaT, typename IteratorT>
    static bool 
    do_match(DfaT const& dfa, IteratorT &first, IteratorT const& last, 
        int& regex_index)
    {
        typedef 
            typename SPIRIT_IT_NS::iterator_traits<IteratorT>::value_type 
            char_t;

        node_id_t s = 0;
        node_id_t last_accepting_index = invalid_node;
        IteratorT wp = first;
        IteratorT last_accepting_cpos = first;
        
        while (wp != last)
        {
            for (unsigned int i = 0;  i < sizeof(char_t); ++i)
            {
                s = dfa.transition_table[s][get_byte(*wp,i)];
                if (s == invalid_node)
                {
                    goto break_while;
                }
            }
            ++wp;
            if (dfa.acceptance_index[s] != invalid_node)
            {
                last_accepting_index = s;
                last_accepting_cpos = wp;
            }

        }

    break_while:
        if (last_accepting_index != invalid_node)
        {
#if defined(SPIRIT_DEBUG)
            std::cout << "dfa.acceptance_index[" << last_accepting_index << "] = " << 
                dfa.acceptance_index[last_accepting_index] << '\n';
#endif
            first = last_accepting_cpos;
            regex_index = dfa.acceptance_index[last_accepting_index];

            return true;
        }
        else
            return false;
    }
};

template <typename DfaT, typename IteratorT, bool wide_char>
struct regex_match
{
    static bool 
    do_match(DfaT const& dfa, IteratorT &first, IteratorT const& last, 
        int& regex_index)
    {
        return regex_match_helper<wide_char>::do_match(
            dfa, first, last, regex_index);
    }
};

} // namespace lexerimpl

///////////////////////////////////////////////////////////////////////////////
//  
template <typename IteratorT = char const*, typename TokenT = int, 
          typename CallbackT = void(*)(IteratorT const &, 
                                       IteratorT &,
                                       IteratorT const&, 
                                       TokenT const&,
                                       lexer_control<TokenT> &)>
class lexer
{
public:
    typedef CallbackT callback_t;
    typedef 
        typename SPIRIT_IT_NS::iterator_traits<IteratorT>::value_type 
        char_t;

    struct regex_info
    {
        std::basic_string<char_t>   str;
        TokenT                      token;
        CallbackT                   callback;

        regex_info(const std::basic_string<char_t>& _str, 
                const TokenT& _token,
                const CallbackT& _callback)
            : str(_str)
            , token(_token)
            , callback(_callback)
            {}

    };

    struct dfa_table
    {
        std::vector<std::vector<node_id_t> >    transition_table;
        std::vector<node_id_t>                  acceptance_index;
    };
    typedef std::vector<node_id_t> node_table_t;
    typedef std::vector<node_table_t> transition_table_t;
    typedef std::vector<dfa_table> dfa_t;


    lexer(unsigned int states = 1);

    void register_regex(const std::basic_string<char_t>& regex, 
            const TokenT& id, const CallbackT& cb = CallbackT(),
            unsigned int state = 0);

    TokenT next_token(IteratorT &first, IteratorT const &last);

    void create_dfa();

    void set_case_insensitive(bool insensitive);

#if defined(SPIRIT_DEBUG)
    void dump(std::ostream& out);
#endif
    typedef std::vector<std::vector<regex_info> > regex_list_t;

    bool load (std::ifstream &in, long unique_id = 0);
    bool save (std::ofstream &out, long unique_id = 0);
    enum {
        SLEX_SIGNATURE = 0x58454C53,    // "SLEX"
        SLEX_VERSION_100 = 0x0100,      // persistance version
        SLEX_LAST_KNOWN_VERSION = SLEX_VERSION_100,
        SLEX_MINOR_VERSION_MASK = 0xFF
    };

private:

    void create_dfa_for_state(int state);

    static bool regex_match(const dfa_t& dfa, IteratorT& first, 
        IteratorT& last, int& regex_index);

    mutable std::stack<lexerimpl::node*> node_stack;
    lexerimpl::lexer_grammar g;
    
    mutable bool m_compiled_dfa;
    mutable dfa_t m_dfa;

    regex_list_t m_regex_list;
    bool m_case_insensitive;

    unsigned int m_state;
    std::stack<unsigned int> m_state_stack;
    unsigned int m_num_states;
};


template <typename IteratorT, typename TokenT, typename CallbackT>
inline
lexer<IteratorT, TokenT, CallbackT>::lexer(unsigned int states)
    : g(node_stack)
    , m_compiled_dfa(false)
    , m_regex_list(states)
    , m_case_insensitive(false)
    , m_state(0)
    , m_state_stack()
    , m_num_states(states)
{
}

template <typename IteratorT, typename TokenT, typename CallbackT>
inline void
lexer<IteratorT, TokenT, CallbackT>::register_regex(
        const std::basic_string<char_t>& regex, const TokenT& id, 
        const CallbackT& callback, unsigned int state)
{
    if (state > m_num_states) {
        m_regex_list.resize(state);
        m_num_states = state;
    }
    m_regex_list[state].push_back(regex_info(regex, id, callback));
}

template <typename IteratorT, typename TokenT, typename CallbackT>
inline TokenT
lexer<IteratorT, TokenT, CallbackT>::next_token(
    IteratorT &first, IteratorT const& last)
{
    if (!m_compiled_dfa)
    {
        create_dfa();
    }

    IteratorT saved = first;
    int regex_index;
    if (!lexerimpl::regex_match<dfa_table, IteratorT, (sizeof(char_t) > 1)>::
            do_match(m_dfa[m_state], first, last, regex_index))
        return -1;  // TODO: can't return -1, need to return some invalid token.
    // how to figure this out?  We can use traits I guess.
    else
    {
        regex_info regex = m_regex_list[m_state][regex_index];
        TokenT rval = regex.token;
        if (regex.callback)
        {
            // execute corresponding callback
            lexer_control<TokenT> controller(rval, m_state, m_state_stack);
            regex.callback(saved, first, last, regex.token, controller);
            if (controller.ignore_current_token_set())
                return next_token(first, last);
        }
        return rval;
    }
}

namespace lexerimpl
{

inline
bool find_acceptance_state(const node_set& eof_node_ids, 
        const node_set& current_set,
        node_id_t& acceptance_node_id)
{
    for(node_set::const_iterator nsi = eof_node_ids.begin();
            nsi != eof_node_ids.end(); ++nsi)
    {
        node_id_t eof_node_id =*nsi;
        if (current_set.end() != current_set.find(eof_node_id))
        {
            // store the first one we come to as the
            // matched pattern
            acceptance_node_id = eof_node_id;
            // don't bother searching for more
            return true;
        }
    }
    return false;
}
 
template <typename RegexListT, typename GrammarT>
inline std::auto_ptr<node> 
parse_regexes(const RegexListT& regex_list, GrammarT& g)
{
    // parse the expressions into a tree
    if (regex_list.begin() == regex_list.end())
        throw bad_regex();

    typename RegexListT::const_iterator ri = regex_list.begin();
    std::auto_ptr<node> tree(lexerimpl::parse(g, (*ri).str));
    if (tree.get() == 0)
        throw bad_regex();

    ++ri;
    for (/**/; ri != regex_list.end(); ++ri)
    {
        std::auto_ptr<node> next_tree(lexerimpl::parse(g, (*ri).str));
        if (next_tree.get() == 0)
            throw bad_regex();
        std::auto_ptr<node> newnode(new or_node(tree.release(), next_tree.release()));
        tree = newnode;
    }
    return tree;
}

} //namespace lexerimpl

template <typename IteratorT, typename TokenT, typename CallbackT>
inline void 
lexer<IteratorT, TokenT, CallbackT>::create_dfa()
{
    m_dfa.resize(m_num_states);
    for (unsigned int i = 0; i < m_num_states; ++i)
        create_dfa_for_state(i);
}

// Algorithm from Compilers: Principles, Techniques, and Tools p. 141
template <typename IteratorT, typename TokenT, typename CallbackT>
inline void 
lexer<IteratorT, TokenT, CallbackT>::create_dfa_for_state(int state)
{
    using lexerimpl::node;
    std::auto_ptr<node> tree = lexerimpl::parse_regexes(m_regex_list[state], g);
    node_id_t dummy = 0;
    tree->assign_node_ids(dummy);

#if defined(SPIRIT_DEBUG)
    tree->dump(std::cout);
#endif

    // compute followpos(root)
    followpos_t followpos;
    tree->compute_followpos(followpos);

    // the dfa states <-> nfa state groups
    std::map<node_set, node_id_t> dstates1;
    std::map<node_id_t, node_set> dstates2;

    // the dfa transitions
    m_dfa[state].transition_table.push_back(
            std::vector<node_id_t>(256, invalid_node));
    m_dfa[state].acceptance_index.push_back(invalid_node);

    // whether the dfa state has been processed yet
    std::vector<node_id_t> marked;

    // used to give a unique id to each dfa state
    node_id_t num_states = 0;

    // initially, the only unmarked state in Dstates is firstpos(root).
    marked.push_back(0);
    node_set fpr = tree->firstpos();
    dstates1[fpr] = 0;
    dstates2[0] = fpr;
    state_match_t state_match;
    tree->compute_state_match(state_match);

    if (m_case_insensitive)
        lexerimpl::make_case_insensitive(state_match);

    node_set eof_node_ids;
    tree->get_eof_ids(eof_node_ids);
    // translate the eof_node_ids into a 0-based index
    std::map<node_id_t, node_id_t> eof_node_id_map;
    unsigned int x = 0;
    for (node_set::iterator node_id_it = eof_node_ids.begin();
            node_id_it != eof_node_ids.end();
            ++node_id_it)
    {
        eof_node_id_map[*node_id_it] = x++;
    }
    
    // figure out if this is an acceptance state
    node_id_t eof_node_id;
    if (lexerimpl::find_acceptance_state(eof_node_ids, fpr, eof_node_id))
    {
        m_dfa[state].acceptance_index[0] = eof_node_id_map[eof_node_id];
    }

    std::vector<node_id_t>::iterator i = std::find(marked.begin(), marked.end(), 
            node_id_t(0));
    while (marked.end() != i)
    {
       *i = 1;
        node_id_t T = node_id_t(std::distance(marked.begin(), i));
        SPIRIT_ASSERT(T < dstates2.size());
        node_set Tstates = dstates2[T];
        for (node_id_t j = 0; j < 256; ++j)
        {
            node_set U;
            for (node_set::iterator k = Tstates.begin();
                    k != Tstates.end(); ++k)
            {
                node_id_t p =*k;
                SPIRIT_ASSERT(p < state_match.size());
                SPIRIT_ASSERT(j < state_match[p].size());
                if (state_match[p][j])
                {
                    node_set fpp = followpos[p];
                    U.insert(fpp.begin(), fpp.end());
                }
            }
            if (U.size() > 0)
            {
                std::map<node_set, node_id_t>::iterator l = dstates1.find(U);
                node_id_t target_state;
                if (l == dstates1.end()) // not in the states yet
                {
                    ++num_states;
                    dstates1[U] = target_state = num_states;
                    dstates2[target_state] = U;
                    marked.push_back(0);
                    m_dfa[state].transition_table.push_back(
                            std::vector<node_id_t>(256, invalid_node));
                    m_dfa[state].acceptance_index.push_back(invalid_node);
                    // figure out if this is an acceptance state
                    node_id_t eof_node_id;
                    if (lexerimpl::find_acceptance_state(eof_node_ids, U, eof_node_id))
                    {
                        m_dfa[state].acceptance_index[target_state] = 
                            eof_node_id_map[eof_node_id];
                    }
                }
                else
                {
                    target_state = dstates1[U];
                }

                SPIRIT_ASSERT(T < m_dfa[state].transition_table.size());
                SPIRIT_ASSERT(j < m_dfa[state].transition_table[T].size());
                m_dfa[state].transition_table[T][j] = target_state;
            }
            
        }

        i = std::find(marked.begin(), marked.end(), node_id_t(0));
    }
    m_compiled_dfa = true;
}

template <typename IteratorT, typename TokenT, typename CallbackT>
inline void 
lexer<IteratorT, TokenT, CallbackT>::set_case_insensitive(bool insensitive)
{
    m_case_insensitive = insensitive;
}


#if defined(SPIRIT_DEBUG)
template <typename IteratorT, typename TokenT, typename CallbackT>
inline void
lexer<IteratorT, TokenT, CallbackT>::dump(std::ostream& out)
{
    for (unsigned x = 0; x < m_dfa.size(); ++x)
    {
        out << "\nm_dfa[" << x << "] has " << m_dfa[x].transition_table.size() << " states\n";
        for (node_id_t i = 0; i < m_dfa[x].transition_table.size(); ++i)
        {
            out << "state " << i << ":";
            for (node_id_t j = 0; j < m_dfa[x].transition_table[i].size(); ++j)
            {
                if (m_dfa[x].transition_table[i][j] != invalid_node)
                    out << j << "->" << m_dfa[x].transition_table[i][j] << " ";
            }
            out << "\n";
        }
        out << "acceptance states: ";
        for(unsigned int k = 0; k < m_dfa[x].acceptance_index.size(); ++k)
        {
            if (m_dfa[x].acceptance_index[k] != invalid_node)
                out << '<' << k << ',' << m_dfa[x].acceptance_index[k] << "> ";
        }
        out << endl;
    }
}
#endif

///////////////////////////////////////////////////////////////////////////////
// load the lexer tables 
#define slex_in(strm, val) \
    strm.read((char*)&val, sizeof(val)); \
    if (std::ios::goodbit != strm.rdstate()) return false

template <typename IteratorT, typename TokenT, typename CallbackT>
inline bool
lexer<IteratorT, TokenT, CallbackT>::load (std::ifstream &in, long unique_id)
{
// ensure correct signature and version
long signature = 0;

    slex_in (in, signature);
    if (signature != SLEX_SIGNATURE) 
        return false;       // not for us

long version = 0;

    slex_in (in, version);
    if ((version & ~SLEX_MINOR_VERSION_MASK) > SLEX_LAST_KNOWN_VERSION)
        return false;       // to new for us

long uid = 0;

    slex_in (in, uid);
    if (uid != unique_id)
        return false;       // not saved by us

// load auxiliary members
int num_states = 0;

    slex_in (in, num_states);

// load the dfa tables
dfa_t in_dfa;
size_t dfa_size = 0;

    slex_in (in, dfa_size);
    in_dfa.resize(dfa_size);
    for (size_t dfa = 0; dfa < dfa_size; ++dfa)
    {
    // load the transition tables
    size_t tt_size = 0;
    transition_table_t &tt_table = in_dfa[dfa].transition_table;

        slex_in (in, tt_size);
        tt_table.resize(tt_size);
        for (size_t tt = 0; tt < tt_size; ++tt)
        {
        size_t nt_size = 0;
        node_table_t &nt_table = tt_table[tt];

            slex_in (in, nt_size);
            nt_table.resize(nt_size);
            for (size_t nt = 0; nt < nt_size; ++nt)
            {
                slex_in (in, nt_table[nt]);
            }
        }

    // load the acceptance index table
    size_t ai_size = 0;
    node_table_t &ai_table = in_dfa[dfa].acceptance_index;

        slex_in (in, ai_size);
        ai_table.resize(ai_size);
        for (size_t ai = 0; ai < ai_size; ++ai)
        {
            slex_in (in, ai_table[ai]);
        }
    }

    m_dfa.swap(in_dfa);         // success, swap in the read values
    m_num_states = num_states;

    m_compiled_dfa = true;
    return true;
}

#undef slex_in

///////////////////////////////////////////////////////////////////////////////
// save the lexer tables
#define slex_out(strm, val) \
    strm.write((char*)&val, sizeof(val)); \
    if (std::ios::goodbit != strm.rdstate()) return false

template <typename IteratorT, typename TokenT, typename CallbackT>
inline bool
lexer<IteratorT, TokenT, CallbackT>::save (std::ofstream &out, long unique_id)
{
// save signature and version information
long out_long = SLEX_SIGNATURE;

    slex_out(out, out_long);
    out_long = SLEX_VERSION_100;
    slex_out(out, out_long);
    slex_out(out, unique_id);

// save auxiliary members
    slex_out(out, m_num_states);

// save the dfa tables
    typedef typename dfa_t::const_iterator dfa_iter_t;
    typedef transition_table_t::const_iterator transition_table_iter_t;
    typedef node_table_t::const_iterator node_table_iter_t;

    out_long = m_dfa.size();
    slex_out(out, out_long);

    dfa_iter_t end = m_dfa.end();
    for (dfa_iter_t it = m_dfa.begin(); it != end; ++it)
    {
    // save the transition table
        out_long = (*it).transition_table.size();
        slex_out(out, out_long);

        transition_table_iter_t tt_end = (*it).transition_table.end();
        for (transition_table_iter_t tt_it = (*it).transition_table.begin();
             tt_it != tt_end; 
             ++tt_it)
        {
            out_long = (*tt_it).size();
            slex_out(out, out_long);

            node_table_iter_t nt_end = (*tt_it).end();
            for (node_table_iter_t nt_it = (*tt_it).begin(); 
                 nt_it != nt_end; 
                 ++nt_it)
            {
                slex_out(out, (*nt_it));
            }
        }

    // save the acceptance index table
        out_long = (*it).acceptance_index.size();
        slex_out(out, out_long);

        node_table_iter_t nt_end = (*it).acceptance_index.end();
        for (node_table_iter_t nt_it = (*it).acceptance_index.begin(); 
             nt_it != nt_end; 
             ++nt_it)
        {
            slex_out(out, (*nt_it));
        }
    }
    return true;
}

#undef slex_out

/*
a lexer_control object supports some operations on the lexer.
    get current lexer state
    set state
    terminate
    state stack (push, pop, top)
    set new token return value
    ignore the current token
    yymore
    get length of matched token
*/
template <typename TokenT>
class lexer_control
{
public:

    lexer_control(TokenT& token, unsigned int& current_state,
        std::stack<unsigned int>& state_stack);
    // functions dealing with the lexer state

    // set the state to state
    void set_state(unsigned int state);

    // get the current state
    unsigned int get_state();

    // pushes the current state onto the top of the state stack and
    // switches to new_state
    void push_state(unsigned int new_state);

    // pops the top of the state stack and switches to it.
    void pop_state();

    // returns the top of the stack without altering the stack's contents.
    unsigned int top_state();

    // functions dealing with the token returned.
    
    // set a new TokenT return value, overriding that one that was
    // registered via. register_regex()
    void set_token(const TokenT& token);

    // tell the lexer to return an invalid token, signifying termination.
    void terminate();

    // ignore the current token, and move on to the next one.  The current
    // token will NOT be returned from next_token()
    void ignore_current_token();

    // returns true if ignore_current_token() has been called, 
    // false otherwise.
    bool ignore_current_token_set();

private:
    TokenT& m_token;
    bool m_ignore_current_token;
    unsigned int& m_current_state;
    std::stack<unsigned int>& m_state_stack;
};

template <typename TokenT>
inline 
lexer_control<TokenT>::lexer_control(TokenT& token, unsigned int& current_state,
        std::stack<unsigned int>& state_stack)
    : m_token(token)
    , m_ignore_current_token(false)
    , m_current_state(current_state)
    , m_state_stack(state_stack)
{
}

template <typename TokenT>
inline void
lexer_control<TokenT>::set_state(unsigned int state)
{
    m_current_state = state;
}

template <typename TokenT>
inline unsigned int
lexer_control<TokenT>::get_state()
{
    return m_current_state;
}

template <typename TokenT>
inline void
lexer_control<TokenT>::push_state(unsigned int new_state)
{
    m_state_stack.push(m_current_state);
    m_current_state = new_state;
}

template <typename TokenT>
inline void
lexer_control<TokenT>::pop_state()
{
    m_current_state = m_state_stack.top();
    m_state_stack.pop();
}

template <typename TokenT>
inline unsigned int
lexer_control<TokenT>::top_state()
{
    return m_state_stack.top();
}

template <typename TokenT>
inline void
lexer_control<TokenT>::set_token(const TokenT& token)
{
    m_token = token;
}

template <typename TokenT>
inline void
lexer_control<TokenT>::terminate()
{
    m_token = -1; // TOOD: fix this, need to be determined by traits
}

template <typename TokenT>
inline void
lexer_control<TokenT>::ignore_current_token()
{
    m_ignore_current_token = true;
}

template <typename TokenT>
inline bool
lexer_control<TokenT>::ignore_current_token_set()
{
    return m_ignore_current_token;
}

} // namespace spirit

#undef SPIRIT_IT_NS

#endif