Sophie

Sophie

distrib > Mandriva > 9.1 > ppc > by-pkgid > b9ba69a436161613d8fb030c8c726a8e > files > 578

spirit-1.5.1-2mdk.noarch.rpm

//#define SPIRIT_DEBUG
#include "lexer.hpp"
#include <boost/function.hpp>
#include <iostream>

using namespace spirit;
using namespace std;

ostream& operator<<(ostream& out, wchar_t c)
{
    if (spirit::impl::isgraph_(char(c)) || spirit::impl::isspace_(char(c)))
	    out << char(c);
    else
        out << '.';
    return out;
}

ostream& operator<<(ostream& out, wchar_t const* c)
{
    while (*c)
	    out << char(*c++);
    return out;
}

template <typename LexerT, typename CharT>
void test_match(LexerT& lexer, const CharT* str)
{
    int token;
    const CharT* end = str;
    while (*end)
        ++end;

    cout << "testing string: " << str << endl;
    while ((token = lexer.next_token(str, end)) != -1 && str != end)
    {
        cout << "matched token #" << token << endl;
    }
    if (token == -1)
    {
        cout << "lexing failed at: " << str << endl;
    }
    else
    {
        cout << "matched token #" << token << endl;
    }

}

void lex_callback(const char* const& tok_begin, const char*& tok_end,
        const char* const& /*last*/, const int& token, lexer_control<int>&)
{
    cout << "matched token #" << token << " token = " << std::string(tok_begin, tok_end) << endl;

}

void ignore_callback(const char* const& tok_begin, const char*& tok_end,
        const char* const& /*last*/, const int& token, lexer_control<int>& ctl)
{
    cout << "ignoring token #" << token << " token = " << std::string(tok_begin, tok_end) << endl;

    ctl.ignore_current_token();
}

void change_callback(const char* const& tok_begin, const char*& tok_end,
        const char* const& /*last*/, const int& token, lexer_control<int>& ctl)
{
    cout << "changing token #" << token << " to 150. token = " << 
        std::string(tok_begin, tok_end) << endl;

    ctl.set_token(150);
}

void term_callback(const char* const& tok_begin, const char*& tok_end,
        const char* const& /*last*/, const int& token, lexer_control<int>& ctl)
{
    cout << "matched token #" << token << " token = " << 
        std::string(tok_begin, tok_end) << endl;
    cout << "terminating lexer" << endl;

    ctl.terminate();
}

struct term_callback_ftr
{
    void operator()(const char* const& tok_begin, const char*& tok_end,
        const char* const& /*last*/, const int& token, lexer_control<int>& ctl)
    {
        cout << "matched token #" << token << " token = " << 
            std::string(tok_begin, tok_end) << endl;
        cout << "terminating lexer" << endl;

        ctl.terminate();
    }
};

int main(int , char** )
{

    {
        lexer<const char*> lex;
        lex.register_regex("(a|b)*abb+c?(ab(c|d)*)?", 99, &lex_callback);
        lex.register_regex("[\\a-zA-\\x5a][a-zA-Z0-9_]*", 100, &lex_callback); // identifier
        lex.register_regex("1zz", 101, &change_callback);
        lex.register_regex("2zz", 102, &term_callback);
        lex.register_regex("[0-9]{3}", 300, &ignore_callback); // integer
        lex.register_regex("[0-9]*", 200, &lex_callback); // integer
        test_match(lex, "aaabb");
        test_match(lex, "aaabbd");
        test_match(lex, "aaabbabd");
        test_match(lex, "aaabbde");
        test_match(lex, "baaabcabd");
        test_match(lex, "baaabcabde");
        test_match(lex, "abc123");
        test_match(lex, "123abc123");
        test_match(lex, "1234");
        test_match(lex, "12a45");
        test_match(lex, "1zz");
        test_match(lex, "2zzbdef23");
    }

    {
        typedef boost::function<void, 
                        const char* const&, 
                        const char*&, 
                        const char* const&, 
                        const int&, 
                        lexer_control<int>&> func_callback_t;

        func_callback_t func_lex_cb = &lex_callback;
        func_callback_t func_ignore_cb = &ignore_callback;
        func_callback_t func_change_cb = &change_callback;
        func_callback_t func_term_cb = term_callback_ftr();

        lexer<const char*, int, func_callback_t> lex;
        lex.register_regex("(a|b)*abb+c?(ab(c|d)*)?", 99, &lex_callback);
        lex.register_regex("[\\a-zA-\\x5a][a-zA-Z0-9_]*", 100, func_lex_cb); // identifier
        lex.register_regex("1zz", 101, func_change_cb);
        lex.register_regex("2zz", 102, func_term_cb);
        lex.register_regex("[0-9]{3}", 300, func_ignore_cb); // integer
        lex.register_regex("[0-9]*", 200, func_lex_cb); // integer
        test_match(lex, "aaabb");
        test_match(lex, "aaabbd");
        test_match(lex, "aaabbabd");
        test_match(lex, "aaabbde");
        test_match(lex, "baaabcabd");
        test_match(lex, "baaabcabde");
        test_match(lex, "abc123");
        test_match(lex, "123abc123");
        test_match(lex, "1234");
        test_match(lex, "12a45");
        test_match(lex, "1zz");
        test_match(lex, "2zzbdef23");
    }

    lexer<const char*> lex2;
    lex2.register_regex("do", 1);
    lex2.register_regex("do1", 2);
    lex2.register_regex("do_more", 3);
    lex2.register_regex("_more", 4);
    lex2.register_regex("some_more", 5);
    lex2.register_regex("some_", 6);
    test_match(lex2, "do");
    test_match(lex2, "do1");
    test_match(lex2, "do_more");
    test_match(lex2, "_more");
    test_match(lex2, "some_more");
    test_match(lex2, "some_");
    test_match(lex2, "dodo1");
    test_match(lex2, "do_more_more");
    test_match(lex2, "some_more_moresome__more");

}