Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

parse enum using boost spirit qi parser

I am trying to parse char to fill in a C++11 strongly typed enum. I need help with writing a parser for the enums.. it needs to be high performance as well.

I have a string with the following format

Category | Type | Attributes 

Example:

std::string str1 = "A|D|name=tim, address=3 infinite loop"
std::string str2 = "A|C|name=poc, address=5 overflow street" 

I am representing Category and Type as follows:

 enum class CATEGORY : char 
 {
     Animal:'A', Bird:'B'
 } 

 enum class TYPE : char 
 {
     Dog:'D', Bird:'B'
 } 

 struct Zoo
 {
      Category category; 
      Type     type; 
      std::string name;
      std::string address;
 }; 

namespace qi = boost::spirit::qi;
namespace repo = boost::spirit::repository;
namespace ascii = boost::spirit::ascii;
template <typename Iterator>
struct ZooBuilderGrammar :  qi::grammar<Iterator, ascii::space_type>
{
 ZooBuilderGrammar():ZooBuilderGrammar::base_type(start_)
 {
    using qi::char_; 
    using qi::_1;
    using qi::lit 
    using boost::phoenix::ref; 

    //need help here 
    start_=char_[/*how to assign enum */ ]>>'|'
         >>char_[ /*how to assign enum */ ]>>'|'
         >>lit;
 } 
 qi::rule<Iterator, ascii::space_type> start_;
};   

I have problem around creating a parser type like the built in ex: qi::char_ to "parse enums CATEGORY and TYPE".

thanks for the help in advance..

like image 529
Sanjit Avatar asked Sep 17 '25 22:09

Sanjit


2 Answers

As usual there's several approaches:

  1. The semantic action way (ad-hoc)
  2. The customization points way
  3. The qi::symbols way

Which is the most appropriate depends. All three approaches should be equally efficient. The symbols<> apprach seems to be most safe (not involving casts) and flexible: you can e.g. use it with variable-length enum members, use it inside no_case[] etc.

Case by case:

  1. The semantic action way (ad-hoc):

    template <typename Iterator>
    struct ZooBuilderGrammar :  qi::grammar<Iterator, ascii::space_type>
    {
        ZooBuilderGrammar():ZooBuilderGrammar::base_type(start_)
        {
            using namespace qi;
    
            category_ = char_("AB") [ _val = phx::static_cast_<Category>(_1) ];
            type_     = char_("DB") [ _val = phx::static_cast_<Type>(_1) ];
            start_    = category_ >> '|' > type_;
        } 
      private:
        qi::rule<Iterator, Category(),        ascii::space_type> category_;
        qi::rule<Iterator, Type(),            ascii::space_type> type_;
        qi::rule<Iterator, ascii::space_type> start_;
    };   
    

    You can see it Live On Coliru printing:

    Parse success: [A, D]
    Remaining unparsed input '|name=tim, address=3 infinite loop'
    ---------------------------
    expected: tag: char-set
    got: "C|name=poc, address=5 overflow street"
    Expectation failure: boost::spirit::qi::expectation_failure at 'C|name=poc, address=5 overflow street'
    ---------------------------
    

  2. The customization points way:

    namespace boost { namespace spirit { namespace traits {
        template <typename Enum, typename RawValue> 
        struct assign_to_attribute_from_value<Enum, RawValue, typename enable_if<is_enum<Enum>>::type> {
            static void call(RawValue const& raw, Enum& cat) {
                cat = static_cast<Enum>(raw);
            }
        };
    }}}
    
    template <typename Iterator>
    struct ZooBuilderGrammar :  qi::grammar<Iterator, Zoo(), ascii::space_type>
    {
        ZooBuilderGrammar():ZooBuilderGrammar::base_type(start_)
        {
            start_ = qi::char_("AB") > '|' > qi::char_("DB");
        } 
    private:
        qi::rule<Iterator, Zoo(), ascii::space_type> start_;
    };   
    

    See it Live On Coliru too, with the same output (obviously)


  3. The qi::symbols way:

    template <typename Iterator>
    struct ZooBuilderGrammar :  qi::grammar<Iterator, Zoo(), ascii::space_type>
    {
        ZooBuilderGrammar():ZooBuilderGrammar::base_type(start_)
        {
            start_ = category_ > '|' > type_;
        } 
    private:
        struct Category_ : qi::symbols<char,Category> {
            Category_() {
                this->add("A", Category::Animal)("B", Category::Bird);
            }
        } category_;
        struct Type_ : qi::symbols<char,Type> {
            Type_() {
                this->add("D", Type::Dog)("B", Type::Bird);
            }
        } type_;
        qi::rule<Iterator, Zoo(), ascii::space_type> start_;
    };   
    

    See it Live On Coliru


Full Demo

This happens to be the traits approach, but you can reuse the skeleton with both other grammars:

#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/adapted/struct.hpp>

enum class Category : char { Animal='A', Bird='B' };
enum class Type     : char { Dog='D',    Bird='B' };

struct Zoo {
    Category category;
    Type     type;
}; 

BOOST_FUSION_ADAPT_STRUCT(Zoo, (Category,category)(Type,type))

namespace qi    = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
namespace phx   = boost::phoenix;

namespace boost { namespace spirit { namespace traits {
    template <typename Enum, typename RawValue> 
    struct assign_to_attribute_from_value<Enum, RawValue, typename enable_if<is_enum<Enum>>::type> {
        static void call(RawValue const& raw, Enum& cat) {
            cat = static_cast<Enum>(raw);
        }
    };
}}}

template <typename Iterator>
struct ZooBuilderGrammar :  qi::grammar<Iterator, Zoo(), ascii::space_type>
{
    ZooBuilderGrammar():ZooBuilderGrammar::base_type(start_)
    {
        start_ = qi::char_("AB") > '|' > qi::char_("DB");
    } 
private:
    qi::rule<Iterator, Zoo(), ascii::space_type> start_;
};   

/////////////////////////////////////////////////
// For exception output
struct printer {
    typedef boost::spirit::utf8_string string;

    void element(string const& tag, string const& value, int depth) const {
        for (int i = 0; i < (depth*4); ++i) std::cout << ' '; // indent to depth

        std::cout << "tag: " << tag;
        if (value != "") std::cout << ", value: " << value;
        std::cout << std::endl;
    }
};

void print_info(boost::spirit::info const& what) {
    using boost::spirit::basic_info_walker;

    printer pr;
    basic_info_walker<printer> walker(pr, what.tag, 0);
    boost::apply_visitor(walker, what.value);
}
//
/////////////////////////////////////////////////

int main()
{
    typedef std::string::const_iterator It;
    static const ZooBuilderGrammar<It> p;

    for (std::string const str1 : { 
            "A|D|name=tim, address=3 infinite loop",
            "A|C|name=poc, address=5 overflow street" })
    {
        It f(str1.begin()), l(str1.end());

        try {
            Zoo zoo;
            bool ok = qi::phrase_parse(f,l,p,ascii::space,zoo);

            if (ok)
                std::cout << "Parse success: [" << static_cast<char>(zoo.category) << ", " << static_cast<char>(zoo.type) << "]\n";
            else
                std::cout << "Failed to parse '" << str1 << "'\n";

            if (f!=l)
                std::cout << "Remaining unparsed input '" << std::string(f,l) << "'\n";
        } catch(qi::expectation_failure<It> const& x)
        {
            std::cout << "expected: "; print_info(x.what_);
            std::cout << "got: \"" << std::string(x.first, x.last) << '"' << std::endl;
        }
        std::cout << "---------------------------\n";
    }
}
like image 78
sehe Avatar answered Sep 19 '25 14:09

sehe


I'd use the qi::symbols way as sugested by sehe, but in this way to improve code readability:

template <typename Iterator>
struct ZooBuilderGrammar :  qi::grammar<Iterator, Zoo(), ascii::space_type>
{
    ZooBuilderGrammar():ZooBuilderGrammar::base_type(start_)
    {
        category_.add
            ("A", Category::Animal)
            ("B", Category::Bird)
            ;
        type_.add
            ("D", Type::Dog)
            ("B", Type::Bird)
            ;
        start_ = category_ > '|' > type_;
    } 
private:
    qi::symbols<char,Type> category_;
    qi::symbols<char,Category> type_;
    qi::rule<Iterator, Zoo(), ascii::space_type> start_;
};
like image 39
nihil Avatar answered Sep 19 '25 12:09

nihil