I've been struggling to try and (incrementally) modify example code from the documentation but with not much different I am not getting the behavior I expect. Specifically, the "if" statement fails when (my intent is that) it should be passing (there was an "else" but that part of the parser was removed during debugging). The assignment statement works fine. I had a "while" statement as well which had the same problem as the "if" statement so I am sure if I can get help to figure out why one is not working it should be easy to get the other going. It must be kind of subtle because this is almost verbatim what is in one of the examples.
#include <iostream>
#include <fstream>
#include <string>
#define BOOST_SPIRIT_DEBUG
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_statement.hpp>
#include <boost/spirit/include/phoenix_container.hpp>
namespace qi = boost::spirit::qi;
namespace lex = boost::spirit::lex;
inline std::string read_from_file( const char* infile )
{
std::ifstream instream( infile );
if( !instream.is_open() )
{
std::cerr << "Could not open file: \"" << infile << "\"" << std::endl;
exit( -1 );
}
instream.unsetf( std::ios::skipws );
return( std::string(
std::istreambuf_iterator< char >( instream.rdbuf() ),
std::istreambuf_iterator< char >()
) );
}
template< typename Lexer >
struct LangLexer : lex::lexer< Lexer >
{
LangLexer()
{
identifier = "[a-zA-Z][a-zA-Z0-9_]*";
number = "[-+]?(\\d*\\.)?\\d+([eE][-+]?\\d+)?";
if_ = "if";
else_ = "else";
this->self = lex::token_def<> ( '(' ) | ')' | '{' | '}' | '=' | ';';
this->self += identifier | number | if_ | else_;
this->self( "WS" ) = lex::token_def<>( "[ \\t\\n]+" );
}
lex::token_def<> if_, else_;
lex::token_def< std::string > identifier;
lex::token_def< double > number;
};
template< typename Iterator, typename Lexer >
struct LangGrammar : qi::grammar< Iterator, qi::in_state_skipper< Lexer > >
{
template< typename TokenDef >
LangGrammar( const TokenDef& tok ) : LangGrammar::base_type( program )
{
using boost::phoenix::val;
using boost::phoenix::ref;
using boost::phoenix::size;
program = +block;
block = '{' >> *statement >> '}';
statement = assignment | if_stmt;
assignment = ( tok.identifier >> '=' >> expression >> ';' );
if_stmt = ( tok.if_ >> '(' >> expression >> ')' >> block );
expression = ( tok.identifier[ qi::_val = qi::_1 ] | tok.number[ qi::_val = qi::_1 ] );
BOOST_SPIRIT_DEBUG_NODE( program );
BOOST_SPIRIT_DEBUG_NODE( block );
BOOST_SPIRIT_DEBUG_NODE( statement );
BOOST_SPIRIT_DEBUG_NODE( assignment );
BOOST_SPIRIT_DEBUG_NODE( if_stmt );
BOOST_SPIRIT_DEBUG_NODE( expression );
}
qi::rule< Iterator, qi::in_state_skipper< Lexer > > program, block, statement;
qi::rule< Iterator, qi::in_state_skipper< Lexer > > assignment, if_stmt;
typedef boost::variant< double, std::string > expression_type;
qi::rule< Iterator, expression_type(), qi::in_state_skipper< Lexer > > expression;
};
int main( int argc, char** argv )
{
typedef std::string::iterator base_iterator_type;
typedef lex::lexertl::token< base_iterator_type, boost::mpl::vector< double, std::string > > token_type;
typedef lex::lexertl::lexer< token_type > lexer_type;
typedef LangLexer< lexer_type > LangLexer;
typedef LangLexer::iterator_type iterator_type;
typedef LangGrammar< iterator_type, LangLexer::lexer_def > LangGrammar;
LangLexer lexer;
LangGrammar grammar( lexer );
std::string str( read_from_file( 1 == argc ? "boostLexTest.dat" : argv[1] ) );
base_iterator_type strBegin = str.begin();
iterator_type tokenItor = lexer.begin( strBegin, str.end() );
iterator_type tokenItorEnd = lexer.end();
std::cout << std::setfill( '*' ) << std::setw(20) << '*' << std::endl <<
str
<< std::endl << std::setfill( '*' ) << std::setw(20) << '*' << std::endl;
bool result = qi::phrase_parse( tokenItor, tokenItorEnd, grammar, qi::in_state( "WS" )[ lexer.self ] );
if( result )
{
std::cout << "Parsing successful" << std::endl;
}
else
{
std::cout << "Parsing error" << std::endl;
}
return( 0 );
}
Here is the output of running this (the file read into the string is dumped out first in main)
********************
{
a = 5;
if( a ){ b = 2; }
}
********************
<program>
<try>{</try>
<block>
<try>{</try>
<statement>
<try></try>
<assignment>
<try></try>
<expression>
<try></try>
<success>;</success>
<attributes>(5)</attributes>
</expression>
<success></success>
<attributes>()</attributes>
</assignment>
<success></success>
<attributes>()</attributes>
</statement>
<statement>
<try></try>
<assignment>
<try></try>
<fail/>
</assignment>
<if_stmt>
<try>
if(</try>
<fail/>
</if_stmt>
<fail/>
</statement>
<fail/>
</block>
<fail/>
</program>
Parsing error
The problem is the sequence you added the token definitions to the lexer. Your code
this->self += identifier | number | if_ | else_;
first adds the identifier
token, which will perfectly match the 'if' (and any other keyword) as well. If you change that to
this->self += if_ | else_ | identifier | number;
everythings starts to work as it should.
This is nothing specific to Spirit.Lex. Any tokenizer respects the order the tokens are defined in to prioritize the matching.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With