Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Implementing recursive grammars with Boost.Qi

I am using Boost.Spirit Qi to construct rather complex structure from some text data. The data structure may be recursively defined, so I need two of my grammars to reference each other, and that is where problems emerge.

For example, I have a grammar:

element = line | text | circle | box | composite_element
composite_element = 'C', int, int, '[', +element, ']'

Obviously, I need something like that:

#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_fusion.hpp>
#include <boost/spirit/include/phoenix_stl.hpp>
#include <tuple>
#include <boost/fusion/adapted/std_pair.hpp>
#include <boost/fusion/include/std_pair.hpp>
#include <boost/spirit/include/qi_eol.hpp>
#include <boost/phoenix.hpp>
#include <vector>
#include <string>

namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
namespace phoenix = boost::phoenix;

struct line {
    int x1;
    int y1;
    int x2;
    int y2;
    int color;
    int width;
    int capstyle;
    int dashstyle;
    int dashlength;
    int dashspace;
};

struct box {
    int x;
    int y;
    int width;
    int height;
    int color;
    int line_width;
    int capstyle;
    int dashstyle;
    int dashlength;
    int dashspace;
    int filltype;
    int fillwidth;
    int angle1;
    int pitch1;
    int angle2;
    int pitch2;
};

struct circle {
    int x;
    int y;
    int radius;
    int color;
    int line_width;
    int capstyle;
    int dashstyle;
    int dashlength;
};

struct text {
    int x;
    int y;
    int color;
    int size;
    int visibility;
    int show_name_value;
    int angle;
    int alignment;
    int num_lines;
    std::vector<std::string> lines;
};

struct composite_component;
using element_t = boost::variant<line, box, circle, text, boost::recursive_wrapper<composite_component>>;

struct composite_component {
    int x;
    int y;
    std::string basename;
    // only used if component is embedded
    // i. e. stores its definition within the schematic file
    std::vector<element_t> elements;
};

struct element {
    // some other fields
    // ...
    element_t element;
};

struct document {
    std::vector<element> elements;
};

BOOST_FUSION_ADAPT_STRUCT(line, x1, y1, x2, y2, color, width, capstyle, dashstyle, dashlength, dashspace)
BOOST_FUSION_ADAPT_STRUCT(box, x, y, width, height, color, line_width, capstyle, dashstyle, dashlength, dashspace, filltype, fillwidth, angle1, pitch1, angle2, pitch2)
BOOST_FUSION_ADAPT_STRUCT(circle, x, y, radius, color, line_width, capstyle, dashstyle, dashlength)
BOOST_FUSION_ADAPT_STRUCT(text, x, y, color, size, visibility, show_name_value, angle, alignment, num_lines, lines)
BOOST_FUSION_ADAPT_STRUCT(composite_component, x, y, basename, elements)
BOOST_FUSION_ADAPT_STRUCT(element, element)
BOOST_FUSION_ADAPT_STRUCT(document, elements)

template <typename Iterator, typename Attribute>
using rule = qi::rule<Iterator, Attribute, qi::blank_type>;

template <typename Iterator>
class composite_element_parser;

template <typename Iterator>
class element_parser : public qi::grammar<Iterator, element(), qi::blank_type> {
public:
    element_parser(): element_parser::base_type{start_rule_}
    {
        using qi::int_;
        using qi::repeat;
        using phoenix::val;
        using phoenix::construct;

        /* other definitions except of the 'line' is omitted in sake of simplicity */
        line_ = 'L' >> int_ >> int_ >> int_ >> int_ >> int_ >>
                    int_ >> int_ >> int_ >> int_ >> int_ >> qi::eol;
        // box = ...
        // circle = ...
        // text = ...
        start_rule_ = (line_ /* || embedded_component_ */) >> qi::eoi;
    }
private:
    rule<Iterator, element()> start_rule_;
    rule<Iterator, line()> line_;
    // here comes the problem - CIRCULAR REFERENCE to incompletely defined template
    // composite_element_parser<Iterator> embedded_component_;
};

template <typename Iterator>
class composite_element_parser : public qi::grammar<Iterator, composite_component(), qi::blank_type> {
    public:
    composite_element_parser() : composite_element_parser::base_type{start_rule_}
    {
        using phoenix::at_c;
        using qi::int_;
        using phoenix::push_back;

        start_rule_ = "C" >> int_ >> int_ >>  qi::lexeme[(qi::char_)[at_c<2>(qi::_val) += qi::_1]]
                                                        >> -(
                                                           "[" >>
                                                           *(element_) [push_back(at_c<3>(qi::_val), qi::_1)] >>
                                                           "]"
                                                           );
    }
    private:
    rule<Iterator, composite_component()> start_rule_;
    element_parser<Iterator> element_;
};

template <typename Iterator>
class document_parser : public qi::grammar<Iterator, document(), qi::blank_type> {
public:
    document_parser() : document_parser::base_type{start_rule_}
    {

        using phoenix::at_c;
        using phoenix::push_back;
        using qi::_val;
        using qi::_0;
        using qi::_1;

        start_rule_ = +(element_)[push_back(at_c<0>(_val), _1)] >> qi::eoi;
    }
    private:
    rule<Iterator, document()> start_rule_;
    element_parser<Iterator> element_;
};

int main(int , char **) {
    document_parser<std::string::const_iterator> parser;
    document doc;
    const std::string text = "v 20180904 2\n"
                             "L 1 2 3 4 5 6 7 8 9 10\n"
                             "C 10 10 FOO\n"
                             "[ "
                             "L 1 2 3 4 5 6 7 8 9 10\n"
                             "]\n";
    bool r = qi::phrase_parse(text.cbegin(), text.cend(), parser, qi::blank, doc);
    std::cout << (r ? "OK" : "FAIL") << std::endl;

     return 0;
}

Definitions of rules for 'text', 'circle' and 'box' are omitted, though. Note the comment in private section of element_parser definition - compiler will be unable to instantiate an incomplete class template composite_element_parser<Iterator>. What am I supposed to do with that? Obviously, I can't have element_parser and composite_element_parser as members of a top-level grammar (document_parser in my case) and pass them references/pointers to each other in the constructors initializer list, because they are uninitialized at the moment.

UPDATE: this thread may be likely recognized as a duplicate of Deeply-recursive qi grammars (parsers) with synthesized and inherited attributes, but I really can't grasp the approved answer.

like image 476
nvevg Avatar asked Dec 18 '22 16:12

nvevg


2 Answers

Based on my earlier answer where I showed the approach passing references around, I simplified that answer to this:

template <typename Iterator>
struct document_parser : qi::grammar<Iterator, document()> {
    document_parser() : document_parser::base_type{start_}
    {
        using namespace qi;

        line_              = 'L' >> auto_;
        box_               = 'B' >> auto_;
        circle_            = 'S' >> auto_;
        // text            = 'T' >> ...;
        element_           = (line_ | box_ | circle_ | composite_element_) >> eol;
        elements_          = -skip(space) [ '[' >> skip(blank) [*element_] >> ']' ];
        composite_element_ = 'C' >> int_ >> int_ >> lexeme[+graph] >> elements_;

        document_          = +element_ >> eoi;

        start_ = skip(blank) [ document_ ];
        BOOST_SPIRIT_DEBUG_NODES((document_)(element_)(composite_element_)(elements_)(line_));
    }
  private:
    qi::rule<Iterator, document()> start_;
    qi::rule<Iterator, document(),             qi::blank_type> document_;
    qi::rule<Iterator, element(),              qi::blank_type> element_;
    qi::rule<Iterator, line(),                 qi::blank_type> line_;
    qi::rule<Iterator, box(),                  qi::blank_type> box_;
    qi::rule<Iterator, circle(),               qi::blank_type> circle_;
    qi::rule<Iterator, composite_component(),  qi::blank_type> composite_element_;
    qi::rule<Iterator, std::vector<element>(), qi::blank_type> elements_;
};

Note that it parses boxes and circles too now. You may want to read about Auto Parser for that magic.

See it Live On Wandbox

#define BOOST_SPIRIT_DEBUG
#include <boost/fusion/adapted.hpp>
#include <boost/spirit/include/qi.hpp>

namespace qi = boost::spirit::qi;

struct line { int x1, y1, x2, y2, color, width, capstyle, dashstyle, dashlength, dashspace; };
struct box { int x, y, width, height, color, line_width, capstyle, dashstyle, dashlength, dashspace, filltype, fillwidth, angle1, pitch1, angle2, pitch2; };
struct circle { int x, y, radius, color, line_width, capstyle, dashstyle, dashlength; };
struct text { int x, y, color, size, visibility, show_name_value, angle, alignment, num_lines;
    std::vector<std::string> lines;
};

struct composite_component;
using element_t = boost::variant<line, box, circle, text, boost::recursive_wrapper<composite_component>>;

struct element {
    // ...
    element_t element;
};

struct composite_component {
    int x;
    int y;
    std::string basename;
    std::vector<element> elements;
};

struct document { std::vector<element> elements; };

BOOST_FUSION_ADAPT_STRUCT(line, x1, y1, x2, y2, color, width, capstyle, dashstyle, dashlength, dashspace)
BOOST_FUSION_ADAPT_STRUCT(box, x, y, width, height, color, line_width, capstyle, dashstyle, dashlength, dashspace, filltype, fillwidth, angle1, pitch1, angle2, pitch2)
BOOST_FUSION_ADAPT_STRUCT(circle, x, y, radius, color, line_width, capstyle, dashstyle, dashlength)
BOOST_FUSION_ADAPT_STRUCT(text, x, y, color, size, visibility, show_name_value, angle, alignment, num_lines, lines)
BOOST_FUSION_ADAPT_STRUCT(composite_component, x, y, basename, elements)
BOOST_FUSION_ADAPT_STRUCT(element, element)
BOOST_FUSION_ADAPT_STRUCT(document, elements)

template <typename Iterator>
struct document_parser : qi::grammar<Iterator, document()> {
    document_parser() : document_parser::base_type{start_}
    {
        using namespace qi;

        line_              = 'L' >> auto_;
        box_               = 'B' >> auto_;
        circle_            = 'S' >> auto_;
        // text            = 'T' >> ...;
        element_           = (line_ | box_ | circle_ | composite_element_) >> eol;
        elements_          = -skip(space) [ '[' >> skip(blank) [*element_] >> ']' ];
        composite_element_ = 'C' >> int_ >> int_ >> lexeme[+graph] >> elements_;

        document_          = +element_ >> eoi;

        start_ = skip(blank) [ document_ ];
        BOOST_SPIRIT_DEBUG_NODES((document_)(element_)(composite_element_)(elements_)(line_)(box_)(circle_));
    }
  private:
    qi::rule<Iterator, document()> start_;
    qi::rule<Iterator, document(),             qi::blank_type> document_;
    qi::rule<Iterator, element(),              qi::blank_type> element_;
    qi::rule<Iterator, line(),                 qi::blank_type> line_;
    qi::rule<Iterator, box(),                  qi::blank_type> box_;
    qi::rule<Iterator, circle(),               qi::blank_type> circle_;
    qi::rule<Iterator, composite_component(),  qi::blank_type> composite_element_;
    qi::rule<Iterator, std::vector<element>(), qi::blank_type> elements_;
};

int main(int , char **) {
    document_parser<std::string::const_iterator> parser;

    const std::string text = // "v 20180904 2\n"
         "L 1 2 3 4 5 6 7 8 9 10\n"
         "C 10 10 FOO\n"
         "[ "
         "    L 10 20 30 40 50 60 70 80 90 100\n"
         "]\n";

    document doc;
    bool r = qi::parse(text.cbegin(), text.cend(), parser, doc);
    std::cout << (r ? "OK" : "FAIL") << std::endl;
}

Prints

OK

And debug output:

<document_>
  <try>L 1 2 3 4 5 6 7 8 9 </try>
  <element_>
    <try>L 1 2 3 4 5 6 7 8 9 </try>
    <line_>
      <try>L 1 2 3 4 5 6 7 8 9 </try>
      <success>\nC 10 10 FOO\n[     L</success>
      <attributes>[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]]</attributes>
    </line_>
    <success>C 10 10 FOO\n[     L </success>
    <attributes>[[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]]]</attributes>
  </element_>
  <element_>
    <try>C 10 10 FOO\n[     L </try>
    <line_>
      <try>C 10 10 FOO\n[     L </try>
      <fail/>
    </line_>
    <box_>
      <try>C 10 10 FOO\n[     L </try>
      <fail/>
    </box_>
    <circle_>
      <try>C 10 10 FOO\n[     L </try>
      <fail/>
    </circle_>
    <composite_element_>
      <try>C 10 10 FOO\n[     L </try>
      <elements_>
        <try>\n[     L 10 20 30 40</try>
        <element_>
          <try>     L 10 20 30 40 5</try>
          <line_>
            <try>     L 10 20 30 40 5</try>
            <success>\n]\n</success>
            <attributes>[[10, 20, 30, 40, 50, 60, 70, 80, 90, 100]]</attributes>
          </line_>
          <success>]\n</success>
          <attributes>[[[10, 20, 30, 40, 50, 60, 70, 80, 90, 100]]]</attributes>
        </element_>
        <element_>
          <try>]\n</try>
          <line_>
            <try>]\n</try>
            <fail/>
          </line_>
          <box_>
            <try>]\n</try>
            <fail/>
          </box_>
          <circle_>
            <try>]\n</try>
            <fail/>
          </circle_>
          <composite_element_>
            <try>]\n</try>
            <fail/>
          </composite_element_>
          <fail/>
        </element_>
        <success>\n</success>
        <attributes>[[[[10, 20, 30, 40, 50, 60, 70, 80, 90, 100]]]]</attributes>
      </elements_>
      <success>\n</success>
      <attributes>[[10, 10, [F, O, O], [[[10, 20, 30, 40, 50, 60, 70, 80, 90, 100]]]]]</attributes>
    </composite_element_>
    <success></success>
    <attributes>[[[10, 10, [F, O, O], [[[10, 20, 30, 40, 50, 60, 70, 80, 90, 100]]]]]]</attributes>
  </element_>
  <element_>
    <try></try>
    <line_>
      <try></try>
      <fail/>
    </line_>
    <box_>
      <try></try>
      <fail/>
    </box_>
    <circle_>
      <try></try>
      <fail/>
    </circle_>
    <composite_element_>
      <try></try>
      <fail/>
    </composite_element_>
    <fail/>
  </element_>
  <success></success>
  <attributes>[[[[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]], [[10, 10, [F, O, O], [[[10, 20, 30, 40, 50, 60, 70, 80, 90, 100]]]]]]]]</attributes>
</document_>
like image 138
sehe Avatar answered Jan 07 '23 22:01

sehe


Usually you just do not split grammar that way. But if you really want, there are multiple ways:

  1. Create grammars separately and assign grammars to rule placeholder externally:

    #include <boost/spirit/include/qi.hpp>
    
    namespace qi = boost::spirit::qi;
    
    template <typename Iterator>
    struct grammar1 : qi::grammar<Iterator, int()>
    {
        grammar1() : grammar1::base_type{start_}
        {
            start_ = '[' >> outer >> ']';
        }
    
        qi::rule<Iterator, int()> outer;
    
    private:
        qi::rule<Iterator, int()> start_;
    };
    
    template <typename Iterator>
    struct grammar2 : qi::grammar<Iterator, int()>
    {
        grammar2() : grammar2::base_type{start_}
        {
            start_ = outer | qi::int_;
        }
    
        qi::rule<Iterator, int()> outer;
    
    private:
        qi::rule<Iterator, int()> start_;
    };
    
    int main()
    {
        char const* s = "[[123]]", * e = s + std::strlen(s);
        grammar2<char const*> g2;
        grammar1<char const*> g1;
        g2.outer = g1;
        g1.outer = g2;
        int value = 0;
        if (qi::parse(s, e, g1, value))
            std::cout << value << '\n';
        else
            std::cout << "failed\n";
    }
    

    https://wandbox.org/permlink/QhA18pIZwVlQ2osi

  2. Create one grammar in other dynamically and pass to it a reference of the former:

    #include <boost/spirit/include/qi.hpp>
    
    namespace qi = boost::spirit::qi;
    
    template <typename Iterator>
    struct grammar2;
    
    template <typename Iterator>
    struct grammar1 : qi::grammar<Iterator, int()>
    {
        grammar1()
            : grammar1::base_type{start_}
        {
            outer_ = std::make_unique<grammar2<Iterator>>(start_);
            start_ = '[' >> *outer_ >> ']';  // NOTE: it is not a kleen star!
        }
    
    private:
        std::unique_ptr<grammar2<Iterator>> outer_;
        qi::rule<Iterator, int()> start_;
    };
    
    template <typename Iterator>
    struct grammar2 : qi::grammar<Iterator, int()>
    {
        explicit grammar2(qi::rule<Iterator, int()> const& outer)
            : grammar2::base_type{start_}
        {
            start_ = outer | qi::int_;
        }
    
    private:
        qi::rule<Iterator, int()> start_;
    };
    
    int main()
    {
        char const* s = "[[123]]", * e = s + std::strlen(s);
        grammar1<char const*> const g1;
        int value = 0;
        if (qi::parse(s, e, g1, value))
            std::cout << value << '\n';
        else
            std::cout << "failed\n";
    }
    

    https://wandbox.org/permlink/hJz3v1ApK8GCkquS

like image 40
Nikita Kniazev Avatar answered Jan 07 '23 23:01

Nikita Kniazev