I am analyzing some scientific text whose format is something like
Keyword
{ 1.0 22.2 59.6 'cm' 'yes' }
I am new to spirit, and after studying the document, I can use spirit to solve the fixed-format keyword .
But to the following format, I don't know how to build grammar. My question is: In the scientific keyword I've meet, certain items of data can be defaulted to a built-in default value. The keyword description indicates when defaults can be applied. There are two ways of setting quantities to their default values. Firstly, by ending a data record prematurely with a slash '}' the quantities remaining unspecified are set to their default values. Secondly, selected quantities positioned before '}' can be defaulted by entering n* where n is the number of consecutive quantities to be defaulted. For example, 3* causes the next three quantities in the keyword data to be given their default values.
For example,
Person
{ 'Tom' 188 80 'male' 32 }
say 'male' and '32' are default value, and its equivalent can be:
Person
{ 'Tom' 188 88 2* }
or
Person
{ 'Tom' 188 88 'male' 1* }
or
Person
{ 'Tom' 188 88 }
I've searched past posts, and this gives me some idea, but how can I write the rule of n*?
The parser you're asking for is very complex as it has to solve several tasks:
The trick here is to utilize qi::attr
in different ways:
to supply default values for missing elements:
qi::int_ | qi::attr(180)
i.e. either match an integer or use default value 180
to supply all remaining values for the "2*" syntax (as @vines suggested):
"2*" >> qi::attr(attr2)
i.e. if 2*
was matched use default value attr2 (which is a fusion::vector
).
Overall, I came up with this solution, which seems to parse and return the default values just fine (even if it looks very complex):
#include <string>
#include <iostream>
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/vector.hpp>
int main()
{
namespace qi = boost::spirit::qi;
namespace fusion = boost::fusion;
// the attribute passed to the parser has to match (in structure) the
// parser, requiring to create nested fusion::vector's
typedef fusion::vector<std::string, int> attribute1_type;
typedef fusion::vector<int, attribute1_type> attribute2_type;
typedef fusion::vector<int, attribute2_type> attribute3_type;
// overall attribute type
typedef fusion::vector<std::string, attribute3_type> attribute_type;
// initialize attributes with default values
attribute1_type attr1("male", 32);
attribute2_type attr2(80, attr1);
attribute3_type attr3(180, attr2);
qi::rule<std::string::iterator, std::string()> quoted_string =
"'" >> *~qi::char_("'") >> "'";
qi::rule<std::string::iterator, attribute_type(), qi::space_type> data =
qi::lit("Person") >> "{"
>> quoted_string
>> -( ("4*" >> qi::attr(attr3))
| (qi::int_ | qi::attr(180))
>> -( ("3*" >> qi::attr(attr2))
| (qi::int_ | qi::attr(80))
>> -( ("2*" >> qi::attr(attr1))
| (quoted_string | qi::attr("male"))
>> -( "1*"
| qi::int_
| qi::attr(32)
)
)
)
)
>> "}";
std::string in1 = "Person\n{ 'Tom' 188 80 'male' 32 }";
attribute_type fullattr1;
if (qi::phrase_parse(in1.begin(), in1.end(), data, qi::space, fullattr1))
std::cout << fullattr1 << std::endl;
std::string in2 = "Person\n{ 'Tom' 188 80 'male' }";
attribute_type fullattr2;
if (qi::phrase_parse(in2.begin(), in2.end(), data, qi::space, fullattr2))
std::cout << fullattr2 << std::endl;
std::string in3 = "Person\n{ 'Tom' 188 3* }";
attribute_type fullattr3;
if (qi::phrase_parse(in3.begin(), in3.end(), data, qi::space, fullattr3))
std::cout << fullattr3 << std::endl;
return 0;
}
Splitting the rule into separate rules (as @vines suggests) would require the input to be parsed more than once, which is why I used this nested structure of sequences and alternatives.
I've just came up with the generalized solution, though it's a bit more complex =)
It handles both "premature brace" and multiple arbitrary skip specifiers. Here's it:
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/io.hpp>
#include <iostream>
#include <string>
namespace qi = boost::spirit::qi;
namespace ph = boost::phoenix;
struct numbers { int i1, i2, i3, i4; };
BOOST_FUSION_ADAPT_STRUCT
(numbers,
(int, i1)
(int, i2)
(int, i3)
(int, i4)
)
template <typename Iterator, typename Skipper>
struct Grammar : public qi::grammar <Iterator, numbers(), Skipper>
{
Grammar() : Grammar::base_type(start, "numbers")
{
using qi::int_;
// This rule resets the skip counter:
init_skip = qi::eps[ph::ref(skp) = 0];
// This rule parses the skip directive ("n*") and sets the skip counter:
skip_spec = qi::omit[ (qi::lexeme[ int_ >> "*" ])[ph::ref(skp) = qi::_1] ];
// This rule checks if we should skip the field, and if so, decrements
// the skip counter and returns the value given to it (the default one).
// If not, it tries to parse the int.
// If int fails to parse, the rule resorts the default value again,
// thus handling the "premature brace" case.
int_dflt %= qi::eps(ph::ref(skp) > 0)[--ph::ref(skp)] >> qi::attr(qi::_r1) | int_ | qi::attr(qi::_r1);
// And this is the grammar:
start %= init_skip >>
"{" >> -skip_spec >> int_dflt(-1)
>> -skip_spec >> int_dflt(-1)
>> -skip_spec >> int_dflt(-1)
>> -skip_spec >> int_dflt(-1)
>> "}";
}
// the skip counter itself:
int skp;
qi::rule <Iterator, numbers(), Skipper> start;
qi::rule <Iterator, Skipper> skip_spec, init_skip;
qi::rule <Iterator, int(int), Skipper> int_dflt;
};
int main (int argc, char **argv)
{
using std::cout;
using std::endl;
std::string s = argv[1];
numbers result;
std::string::iterator ib = s.begin();
std::string::iterator ie = s.end();
bool r = qi::phrase_parse(ib, ie, Grammar<std::string::iterator, qi::space_type>(), qi::space, result );
if (r && ib == ie)
{
cout << boost::fusion::tuple_open('[');
cout << boost::fusion::tuple_close(']');
cout << boost::fusion::tuple_delimiter(", ");
cout << "Parsing succeeded\n";
cout << "got: " << boost::fusion::as_vector(result) << endl;
}
else
{
cout << "Parsing failed\n";
cout << "err: " << std::string(ib, ie) << endl;
}
return 0;
}
PS: Note that the Skipper template argument has nothing to do with field skipping — it's simply the type of the whitespace-skipping parser used by the grammar.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With