In Boost::Spirit, how can I parse entries that are followed by either a semicolon or by a newline with optional semicolon?
Example input, where each entry is an int and a double:
12 1.4;
63 13.2
2423 56.4 ; 5 8.1
Here is example code that just parses entries followed by whitespace:
#include <iostream>
#include <boost/foreach.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/support_istream_iterator.hpp>
#include <boost/fusion/include/std_pair.hpp>
namespace qi = boost::spirit::qi;
typedef std::pair<int, double> Entry;
template <typename Iterator, typename Skipper>
struct MyGrammar : qi::grammar<Iterator, std::vector<Entry>(), Skipper> {
MyGrammar() : MyGrammar::base_type(entries) {
entry = qi::int_ >> qi::double_;
entries = +entry;
}
qi::rule<Iterator, Entry(), Skipper> entry;
qi::rule<Iterator, std::vector<Entry>(), Skipper> entries;
};
int main() {
typedef boost::spirit::istream_iterator It;
std::cin.unsetf(std::ios::skipws);
It it(std::cin), end;
MyGrammar<It, qi::space_type> entry_grammar;
std::vector<Entry> entries;
if (qi::phrase_parse(it, end, entry_grammar, qi::space, entries)
&& it == end) {
BOOST_FOREACH(Entry const& entry, entries) {
std::cout << entry.first << " and " << entry.second << std::endl;
}
}
else {
std::cerr << "FAIL" << std::endl;
exit(1);
}
return 0;
}
Now, to parse the way I want (each entry followed by semicolon or newline with optional semicolon), I replaced this:
entries = +entry;
by this:
entries = +(entry >> (qi::no_skip[qi::eol] || ';'));
where the boost::spirit
operator ||
means: (a followed by optional b) or b. But gives an error if there is a space after the 1.4
in this example input:
12 1.4
63 13.2
It makes sense that the space is not matched because of the no_skip
but I wasn't able to find a solution.
Here's my take.
qi::blank
(which is qi::space
except qi::eol
). This will remove the need for no_skip
. The core grammar becomes:
entry = qi::int_ >> qi::double_;
entries = entry % +qi::char_("\n;") >> qi::omit[*qi::space];
Use BOOST_SPIRIT_DEBUG to learn where parsing fails and why (e.g. backtracking)
Output:
12 and 1.4
63 and 13.2
2423 and 56.4
5 and 8.1
Full code:
//#define BOOST_SPIRIT_DEBUG
#include <iostream>
#include <boost/foreach.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/support_istream_iterator.hpp>
#include <boost/fusion/include/std_pair.hpp>
namespace qi = boost::spirit::qi;
typedef std::pair<int, double> Entry;
template <typename Iterator, typename Skipper>
struct MyGrammar : qi::grammar<Iterator, std::vector<Entry>(), Skipper> {
MyGrammar() : MyGrammar::base_type(entries) {
entry = qi::int_ >> qi::double_;
entries =
entry % +qi::char_("\n;") // the data
>> qi::omit[*qi::space] > qi::eoi; // trailing whitespace
BOOST_SPIRIT_DEBUG_NODE(entry);
BOOST_SPIRIT_DEBUG_NODE(entries);
}
qi::rule<Iterator, Entry(), Skipper> entry;
qi::rule<Iterator, std::vector<Entry>(), Skipper> entries;
};
int main() {
typedef boost::spirit::istream_iterator It;
std::cin.unsetf(std::ios::skipws);
It it(std::cin), end;
MyGrammar<It, qi::blank_type> entry_grammar;
std::vector<Entry> entries;
if (qi::phrase_parse(it, end, entry_grammar, qi::blank, entries)
&& it == end) {
BOOST_FOREACH(Entry const& entry, entries) {
std::cout << entry.first << " and " << entry.second << std::endl;
}
}
else {
std::cerr << "FAIL" << std::endl;
exit(1);
}
return 0;
}
Okay, I found that this works fine:
entries = +(entry >> (qi::no_skip[*qi::lit(' ') >> qi::eol] || ';'));
So the immediate question is solved.
But it will still fail if a tab comes ofter the 1.4
in
12 1.4
63 13.2
This would be better but it won't compile:
entries = +(entry >> (qi::no_skip[*qi::space >> qi::eol] || ';'));
The error:
error: invalid static_cast from type ‘const std::pair<int, double\
>’ to type ‘int’
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With