Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

boost::spirit parsing into a fusion adapted structure optional but exclusive

If there's a structure:

struct record
{
    std::string     type;
    std::string     delimiter;
    uint32_t        length;
    std::string     name;

    record()
    {
            type = "";
            delimiter = "";
            length = 0;
            name = "";
    }
};

Which is adapted using boost::fusion, and the below grammar:

struct record_parser : qi::grammar<Iterator, record(), ascii::space_type>
{
    record_parser() : record_parser::base_type(start)
    {
        using qi::lit;
        using qi::uint_;
        using qi::lexeme;

        using ascii::char_;
        using ascii::blank;
        using ascii::string;
        using qi::attr;

        using qi::eps;

            type %= lexeme[+(char_ - (blank|char('(')))];
            delimiter_double_quote %= char('(') >> lexeme[char('"')  >> +(char_ - char('"'))  >> char('"') ] >> char(')');
            delimiter_single_quote %= char('(') >> lexeme[char('\'') >> +(char_ - char('\'')) >> char('\'')] >> char(')');
            delimiter %= (delimiter_double_quote | delimiter_single_quote);
            name %= lexeme[+(char_ - (blank|char(';')))] >> char(';');
            length %= (char('(') >> uint_ >> char(')'));

        start %=
            eps >
            lit("record")
            >> char('{')
            >>  type
            >>  (delimiter | attr("")) >> (length | attr(0))
            >>  name
            >>  char('}')
            ;
    }

    qi::rule<Iterator, std::string(), ascii::space_type> type;
    qi::rule<Iterator, std::string(), ascii::space_type> delimiter_double_quote;
    qi::rule<Iterator, std::string(), ascii::space_type> delimiter_single_quote;
    qi::rule<Iterator, std::string(), ascii::space_type> delimiter;
    qi::rule<Iterator, uint32_t(), ascii::space_type> length;
    qi::rule<Iterator, std::string(), ascii::space_type> name;
    qi::rule<Iterator, record(), ascii::space_type> start;
};

I am looking to parse 'delimiter' and 'length' as optional. However, one of them has to be present, and if one is present, the other one should not exist.

For Example:

record { string(5) Alex; }
record { string("|") Alex; }

But Not:

record { string(5)("|") Alex; }
record { string Alex; }

I have attempted to do it this way, but compilation fails:

start %=
            eps >
            lit("record")
            >> char('{')
            >>  type
            >> ((delimiter >> attr(0)) | (attr("") >> length))
            >>  name
            >>  char('}')
            ;

Thank you for your help in advance. Below is the full source code:

#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_object.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/io.hpp>

#include <string>

namespace client
{
    namespace qi = boost::spirit::qi;
    namespace ascii = boost::spirit::ascii;
    namespace phoenix = boost::phoenix;

    struct record
    {
        std::string     type;
        std::string     delimiter;
        uint32_t        length;
        std::string     name;

        record()
        {
                type = "";
                delimiter = "";
                length = 0;
                name = "";
        }
    };
}

BOOST_FUSION_ADAPT_STRUCT(
    client::record,
    (std::string, type)
    (std::string, delimiter)
    (uint32_t, length)
    (std::string, name)
)

namespace client
{
    template <typename Iterator>
    struct record_parser : qi::grammar<Iterator, record(), ascii::space_type>
    {
        record_parser() : record_parser::base_type(start)
        {
            using qi::lit;
            using qi::uint_;
            using qi::lexeme;

            using ascii::char_;
            using ascii::blank;
            using ascii::string;
            using qi::attr;

            using qi::eps;

                type %= lexeme[+(char_ - (blank|char('(')))];
                delimiter_double_quote %= char('(') >> lexeme[char('"')  >> +(char_ - char('"'))  >> char('"') ] >> char(')');
                delimiter_single_quote %= char('(') >> lexeme[char('\'') >> +(char_ - char('\'')) >> char('\'')] >> char(')');
                delimiter %= (delimiter_double_quote | delimiter_single_quote);
                name %= lexeme[+(char_ - (blank|char(';')))] >> char(';');
                length %= (char('(') >> uint_ >> char(')'));

            start %=
                eps >
                lit("record")
                >> char('{')
                >>  type
                >>  (delimiter | attr("")) >> (length | attr(0))
                >>  name
                >>  char('}')
                ;
        }

        qi::rule<Iterator, std::string(), ascii::space_type> type;
        qi::rule<Iterator, std::string(), ascii::space_type> delimiter_double_quote;
        qi::rule<Iterator, std::string(), ascii::space_type> delimiter_single_quote;
        qi::rule<Iterator, std::string(), ascii::space_type> delimiter;
        qi::rule<Iterator, uint32_t(), ascii::space_type> length;
        qi::rule<Iterator, std::string(), ascii::space_type> name;
        qi::rule<Iterator, record(), ascii::space_type> start;
    };
}

////////////////////////////////////////////////////////////////////////////
//  Main program
////////////////////////////////////////////////////////////////////////////
int main()
{
    std::string storage = "record { string(5) Alex; }";

    using boost::spirit::ascii::space;
    typedef std::string::const_iterator iterator_type;
    typedef client::record_parser<iterator_type> record_parser;

    record_parser g; // Our grammar

        client::record rec;
        std::string::const_iterator iter = storage.begin();
        std::string::const_iterator end = storage.end();
        bool r = phrase_parse(iter, end, g, space, rec);

        if (r && iter == end)
        {
            std::cout << boost::fusion::tuple_open('[');
            std::cout << boost::fusion::tuple_close(']');
            std::cout << boost::fusion::tuple_delimiter(", ");

            std::cout << "-------------------------\n";
            std::cout << "Parsing succeeded\n";
            std::cout << "got: " << boost::fusion::as_vector(rec) << std::endl;
            std::cout << "\n-------------------------\n";
        }
        else
        {
                std::string::const_iterator some = iter+30;
                std::string context(iter, (some>end)?end:some);
                std::cout << "-------------------------\n";
                std::cout << "Parsing failed\n";
                std::cout << "stopped at -->" << context << "...\n";
                std::cout << "-------------------------\n";
        }

    return 0;
}
like image 357
Alex Avatar asked Mar 12 '23 20:03

Alex


1 Answers

You can just write out the combinations:

    >> (
            delimiter >> attr(0)
         |  attr("")  >> length
         |  attr("")  >> attr(0)
    )

The best way to make it work with automatic attribute propagation is to use an AST structure that is similar:

namespace client {

    struct record {
        std::string type;

        struct param_t {
            std::string delimiter;
            uint32_t    length = 0;
        } param;

        std::string name;
    };
}

BOOST_FUSION_ADAPT_STRUCT(client::record::param_t, delimiter, length)
BOOST_FUSION_ADAPT_STRUCT(client::record, type, param, name)

Full Demo Live On Coliru

Note how much simpler the grammar has been made (all those char(' ') things are unnecessary; use lexemes only if you declare a skipper; use ~char_ instead of character set subtraction; use graph instead of char_ - space etc.).

type                   = +(graph - '(');
delimiter_double_quote = '"' >> +~char_('"') >> '"' ;
delimiter_single_quote = "'" >> +~char_("'") >> "'" ;
delimiter              = '(' >> (delimiter_double_quote | delimiter_single_quote) >> ')';
name                   = +(graph - ';');
length                 = '(' >> uint_ >> ')';

start = eps > lit("record") >> '{' 
    >> type
    >> (
            delimiter >> attr(0)
         |  attr("")  >> length
         |  attr("")  >> attr(0)
    )
    >>  name >> ';' >> '}'
    ;

Full code:

Live On Coliru

#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/io.hpp>

#include <string>

namespace qi = boost::spirit::qi;

namespace client {

    struct record {
        std::string type;

        struct param_t {
            std::string delimiter;
            uint32_t    length = 0;
        } param;

        std::string name;
    };
}

BOOST_FUSION_ADAPT_STRUCT(client::record::param_t, delimiter, length)
BOOST_FUSION_ADAPT_STRUCT(client::record, type, param, name)

namespace client {
    std::ostream& operator<<(std::ostream& os, record::param_t const& v) { return os << boost::fusion::as_vector(v); }
    std::ostream& operator<<(std::ostream& os, record const& v)          { return os << boost::fusion::as_vector(v); }
}

namespace client
{
    template <typename Iterator, typename Skipper = qi::ascii::space_type>
    struct record_parser : qi::grammar<Iterator, record(), Skipper>
    {
        record_parser() : record_parser::base_type(start)
        {
            using namespace qi;

            type                   = +(graph - '(');
            delimiter_double_quote = '"' >> +~char_('"') >> '"' ;
            delimiter_single_quote = "'" >> +~char_("'") >> "'" ;
            delimiter              = '(' >> (delimiter_double_quote | delimiter_single_quote) >> ')';
            name                   = +(graph - ';');
            length                 = '(' >> uint_ >> ')';

            start = eps > lit("record") >> '{' 
                >> type
                >> (
                        delimiter >> attr(0)
                     |  attr("")  >> length
                     |  attr("")  >> attr(0)
                )
                >>  name >> ';' >> '}'
                ;
        }
      private: 
        qi::rule<Iterator, record(),      Skipper> start;
        qi::rule<Iterator, uint32_t(),    Skipper> length;
        qi::rule<Iterator, std::string(), Skipper> delimiter;
        // lexemes
        qi::rule<Iterator, std::string()> type, delimiter_double_quote, delimiter_single_quote, name;
    };
}

int main()
{
    for (std::string const storage : {
                "record { string(5) Alex; }",
                "record { string(\"|\") Alex; }",
            })
    {
        typedef std::string::const_iterator iterator_type;
        typedef client::record_parser<iterator_type> record_parser;

        record_parser g; // Our grammar

        client::record rec;
        auto iter = storage.begin(), end = storage.end();
        bool r = phrase_parse(iter, end, g, qi::ascii::space, rec);

        if (r) {
            std::cout << "Parsing succeeded: " << rec << std::endl;
        } else {
            std::cout << "Parsing failed\n";
        }

        if (iter != end) {
            std::cout << "Remaining: '" << std::string(iter, end) << "'...\n";
        }
    }
}

Prints:

Parsing succeeded: (string ( 5) Alex)
Parsing succeeded: (string (| 0) Alex)
like image 96
sehe Avatar answered Apr 26 '23 14:04

sehe