Boost Spirit and Lex parser problem

Question

I've been struggling to try and (incrementally) modify example code from the documentation but with not much different I am not getting the behavior I expect. Specifically, the "if" statement fails when (my intent is that) it should be passing (there was an "else" but that part of the parser was removed during debugging). The assignment statement works fine. I had a "while" statement as well which had the same problem as the "if" statement so I am sure if I can get help to figure out why one is not working it should be easy to get the other going. It must be kind of subtle because this is almost verbatim what is in one of the examples.

#include <iostream>
#include <fstream>
#include <string>

#define BOOST_SPIRIT_DEBUG
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_statement.hpp>
#include <boost/spirit/include/phoenix_container.hpp>

namespace qi  = boost::spirit::qi;
namespace lex = boost::spirit::lex;

inline std::string read_from_file( const char* infile )
{
    std::ifstream instream( infile );
    if( !instream.is_open() )
    {
        std::cerr << "Could not open file: \"" << infile << "\"" << std::endl;
        exit( -1 );
    }
    instream.unsetf( std::ios::skipws );
    return( std::string(
                std::istreambuf_iterator< char >( instream.rdbuf() ),
                std::istreambuf_iterator< char >()
          ) );
}

template< typename Lexer >
struct LangLexer : lex::lexer< Lexer >
{
    LangLexer()
    {
        identifier = "[a-zA-Z][a-zA-Z0-9_]*";
        number = "[-+]?(\d*\.)?\d+([eE][-+]?\d+)?";

        if_ = "if";
        else_ = "else";

        this->self = lex::token_def<> ( '(' ) | ')' | '{' | '}' | '=' | ';';
        this->self += identifier | number | if_ | else_;

        this->self( "WS" ) = lex::token_def<>( "[ \t\n]+" );

    }

    lex::token_def<> if_, else_;
    lex::token_def< std::string > identifier;
    lex::token_def< double > number;
};

template< typename Iterator, typename Lexer >
struct LangGrammar : qi::grammar< Iterator, qi::in_state_skipper< Lexer > >
{
    template< typename TokenDef >
    LangGrammar( const TokenDef& tok ) : LangGrammar::base_type( program )
    {
        using boost::phoenix::val;
        using boost::phoenix::ref;
        using boost::phoenix::size;

        program = +block;
        block = '{' >> *statement >> '}';
        statement = assignment | if_stmt;
        assignment = ( tok.identifier >> '=' >> expression >> ';' );
        if_stmt = ( tok.if_ >> '(' >> expression >> ')' >> block );
        expression = ( tok.identifier[ qi::_val = qi::_1 ] | tok.number[ qi::_val = qi::_1 ] );

        BOOST_SPIRIT_DEBUG_NODE( program );
        BOOST_SPIRIT_DEBUG_NODE( block );
        BOOST_SPIRIT_DEBUG_NODE( statement );
        BOOST_SPIRIT_DEBUG_NODE( assignment );
        BOOST_SPIRIT_DEBUG_NODE( if_stmt );
        BOOST_SPIRIT_DEBUG_NODE( expression );
    }

    qi::rule< Iterator, qi::in_state_skipper< Lexer > > program, block, statement;
    qi::rule< Iterator, qi::in_state_skipper< Lexer > > assignment, if_stmt;

    typedef boost::variant< double, std::string > expression_type;
    qi::rule< Iterator, expression_type(), qi::in_state_skipper< Lexer > > expression;
};

int main( int argc, char** argv )
{
    typedef std::string::iterator base_iterator_type;
    typedef lex::lexertl::token< base_iterator_type, boost::mpl::vector< double, std::string > > token_type;
    typedef lex::lexertl::lexer< token_type > lexer_type;
    typedef LangLexer< lexer_type > LangLexer;
    typedef LangLexer::iterator_type iterator_type; 
    typedef LangGrammar< iterator_type, LangLexer::lexer_def > LangGrammar;

    LangLexer lexer;
    LangGrammar grammar( lexer );

    std::string str( read_from_file( 1 == argc ? "boostLexTest.dat" : argv[1] ) );

    base_iterator_type strBegin = str.begin();
    iterator_type tokenItor = lexer.begin( strBegin, str.end() );
    iterator_type tokenItorEnd = lexer.end(); 

    std::cout << std::setfill( '*' ) << std::setw(20) << '*' << std::endl <<
        str
        << std::endl << std::setfill( '*' ) << std::setw(20) << '*' << std::endl;

    bool result = qi::phrase_parse( tokenItor, tokenItorEnd, grammar, qi::in_state( "WS" )[ lexer.self ] );

    if( result )
    {
        std::cout << "Parsing successful" << std::endl;
    }
    else
    {
        std::cout << "Parsing error" << std::endl;
    }

    return( 0 );
}

Here is the output of running this (the file read into the string is dumped out first in main)

********************
{
    a = 5;
    if( a ){ b = 2; }
}


********************
<program>
  <try>{</try>
  <block>
    <try>{</try>
    <statement>
      <try></try>
      <assignment>
        <try></try>
<expression>
  <try></try>
  <success>;</success>
  <attributes>(5)</attributes>
</expression>
        <success></success>
        <attributes>()</attributes>
      </assignment>
      <success></success>
      <attributes>()</attributes>
    </statement>
    <statement>
      <try></try>
      <assignment>
        <try></try>
        <fail/>
      </assignment>
      <if_stmt>
        <try>
    if(</try>
        <fail/>
      </if_stmt>
      <fail/>
    </statement>
    <fail/>
  </block>
  <fail/>
</program>
Parsing error

hkaiser · Accepted Answer

The problem is the sequence you added the token definitions to the lexer. Your code

this->self += identifier | number | if_ | else_;

first adds the identifier token, which will perfectly match the 'if' (and any other keyword) as well. If you change that to

this->self += if_ | else_ | identifier | number;

everythings starts to work as it should.

This is nothing specific to Spirit.Lex. Any tokenizer respects the order the tokens are defined in to prioritize the matching.

Boost Spirit and Lex parser problem

Tags:

c++

boost

boost-spirit

boost-spirit-qi

bpw1621

1 Answers

hkaiser

Recent Activity

Donate For Us

Boost Spirit and Lex parser problem

Tags:

c++

boost

boost-spirit

boost-spirit-qi

bpw1621

1 Answers

hkaiser

Related questions

Recent Activity

Donate For Us