I'm trying to get a grip with the new Spirit X3 (boost 1.61.0).
My machine is a MacBook Pro (i7-4750HQ) running Linux.
Having used version 2 of Spirit I was used to large compile times, but this doesn't feel right. For the following first steps of an expression parser the compilation needs 20s.
I thought X3 will be faster, so is this reasonable? Is my code suboptimal?
Compiler settings (clang 3.8.0)
clang++ -c -pipe -std=c++14 -ftemplate-depth=512 -g -w -Wall -Wno-unused-parameter -fPIC
Code:
//#define BOOST_SPIRIT_X3_DEBUG
#include <iostream>
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/x3/support/ast/variant.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <string>
#include <vector>
//--------------------------------------------------------------------------------------------------
namespace client { namespace ast
{
namespace fusion = boost::fusion;
namespace x3 = boost::spirit::x3;
struct number : x3::variant<int, double> {
using base_type::base_type;
using base_type::operator=;
};
struct add_ast;
struct mult_ast;
struct block_ast;
struct function;
struct expr_ast : x3::variant<
number,
x3::forward_ast<function>,
x3::forward_ast<add_ast>,
x3::forward_ast<mult_ast>,
x3::forward_ast<block_ast>
> {
using base_type::base_type;
using base_type::operator=;
};
struct add_ast {
expr_ast lhs;
bool add;
expr_ast rhs;
};
struct mult_ast {
expr_ast lhs;
bool mult;
expr_ast rhs;
};
struct block_ast {
expr_ast body;
};
struct function {
std::string name;
std::vector<expr_ast> params;
};
}}
//--------------------------------------------------------------------------------------------------
BOOST_FUSION_ADAPT_STRUCT(client::ast::add_ast,
(client::ast::expr_ast, lhs),
(bool, add),
(client::ast::expr_ast, rhs)
)
BOOST_FUSION_ADAPT_STRUCT(client::ast::mult_ast,
(client::ast::expr_ast, lhs),
(bool, mult),
(client::ast::expr_ast, rhs)
)
BOOST_FUSION_ADAPT_STRUCT(client::ast::block_ast,
(client::ast::expr_ast, body)
)
BOOST_FUSION_ADAPT_STRUCT(client::ast::function,
(std::string, name),
(std::vector<client::ast::expr_ast>, params)
)
//--------------------------------------------------------------------------------------------------
namespace client { namespace parser
{
namespace x3 = boost::spirit::x3;
const x3::rule<class expr, ast::expr_ast> expr = "expr";
const x3::rule<class add_expr, ast::expr_ast> add_expr = "add_expr";
const x3::rule<class mult_expr, ast::expr_ast> mult_expr = "mult_expr";
const x3::rule<class block_expr, ast::expr_ast> block_expr = "block_expr";
auto const number = x3::rule<class number, ast::number> {"number"}
= (x3::int_ >> !x3::lit('.')) | x3::double_;
auto const fct_name = x3::rule<class fct_name, std::string> {"fct_name"}
= x3::lexeme[ *x3::alpha >> *(x3::alnum | x3::char_('_')) ];
auto const function = x3::rule<class function, ast::function> {"function"}
= fct_name >> x3::lit("(") >> -expr % ',' >> ")";
auto const simple_expr = x3::rule<class simple_expr, ast::expr_ast> {"simple_expr"}
= function | number;
auto const block_term = x3::rule<class block_term, ast::block_ast> {"block_term"}
= "(" >> expr >> ")";
auto const mult_term = x3::rule<class mult_term, ast::mult_ast> {"mult_term"}
= block_expr
>> ((x3::lit("*") >> x3::attr(true)) | (x3::lit("/") >> x3::attr(false)))
>> mult_expr;
auto const add_term = x3::rule<class add_term, ast::add_ast> {"add_term"}
= mult_expr
>> ((x3::lit("+") >> x3::attr(true)) | (x3::lit("-") >> x3::attr(false)))
>> add_expr;
auto const block_expr_def = block_term | simple_expr;
auto const mult_expr_def = mult_term | block_expr;
auto const add_expr_def = add_term | mult_expr;
auto const expr_def = add_expr;
BOOST_SPIRIT_DEFINE(expr, add_expr, mult_expr, block_expr);
}}
//--------------------------------------------------------------------------------------------------
namespace client { namespace ast
{
struct printer
{
typedef std::string result_type;
std::string operator()(const expr_ast &ast) const
{
return boost::apply_visitor(printer(), ast);
}
std::string operator()(const number &value) const
{
return boost::apply_visitor(printer(), value);
}
std::string operator()(const add_ast &expr) const {
return "(" + boost::apply_visitor(printer(), expr.lhs) + (expr.add?" + ":" - ")
+ boost::apply_visitor(printer(), expr.rhs) + ")";
}
std::string operator()(const mult_ast &expr) const {
return "(" + boost::apply_visitor(printer(), expr.lhs) + (expr.mult?" * ":" / ")
+ boost::apply_visitor(printer(), expr.rhs) + ")";
}
std::string operator()(const block_ast &expr) const {
return boost::apply_visitor(printer(), expr.body);
}
std::string operator()(const function &fct) const
{
std::string result = fct.name + "(";
for (std::size_t i = 0; i < fct.params.size(); ++i) {
result += printer()(fct.params[i]);
if (i != fct.params.size() - 1)
result += ",";
}
result += ")";
return result;
}
std::string operator()(int const& value) const
{
return std::to_string(value);
}
std::string operator()(double const& value) const
{
return std::to_string(value);
}
};
}}
//--------------------------------------------------------------------------------------------------
int main()
{
std::vector<std::string> storage = {
"foo()", "-foo()",
"f1_2()",
"foo_bar ()",
"foo( bar (42, baz()))",
"foo(5)", "foo(-5)",
"foo(1.1, foo(4.21e-2, 4., 6))",
"1.1", "-1.1",
"1 * 1",
"foo(1 * 1) * bar(42)",
"foo(2 + 5.5, bar()*3.4-7)",
"foo(2 + 5.5, bar(baz(-5/foo())) * 3.4 - 7)",
"4 + 5 * 6",
"1+2+3+4*5*6*-7+-8*+9-0",
"(foo())",
"foo() * ((1+2)+3*(2+3))",
"(1+2)*3", "1+2*3",
"foo"
};
using boost::spirit::x3::ascii::space;
for (const auto &item : storage) {
using client::parser::expr; // Our grammar
client::ast::expr_ast ast; // Our tree
std::string::const_iterator iter = item.begin();
std::string::const_iterator end = item.end();
bool r = phrase_parse(iter, end, expr, space, ast);
if (r && iter == end)
{
std::cout << "Ok: " << item << " result: " << client::ast::printer()(ast) << std::endl;
}
else
{
std::cout << "Fail: " << item << std::endl;
}
}
}
This looks like a severe regression to me.
It took very long on my machine:
gcc 5: slowly using more and more memory up to 3GiB after 4min30s, followed by the assembler stage of ~20s:
g++-5 -std=c++14 -Wall -pedantic -Wextra -fsanitize=undefined,address -Wno-unused -g -O3 -isystem /home/sehe/custom/nonius/include -isystem /home/sehe/custom/boost_1_60_0 -pthread -march=native test.cpp -c -o test.o
test.cpp:119:62: warning: extra ‘;’ [-Wpedantic]
BOOST_SPIRIT_DEFINE(expr, add_expr, mult_expr, block_expr);
^
g++-5 -std=c++14 -Wall -pedantic -Wextra -fsanitize=undefined,address -Wno-unused -g -O3 -isystem /home/sehe/custom/nonius/include -isystem /home/sehe/custom/boost_1_60_0 -pthread -march=native test.o -o test -L /home/sehe/custom/boost_1_60_0/stage/lib/ -Wl,-rpath,/home/sehe/custom/boost_1_60_0/stage/lib -lboost_system -lboost_regex -lboost_thread -lboost_iostreams -lboost_serialization -lboost_filesystem -lboost_chrono -lrt -lboost_unit_test_framework -lpugixml -lssl -lcrypto -lxml2
real 4m50.427s
user 4m48.248s
sys 0m1.856s
clang 3.6: fails with template instantiation depth exceeded
/home/sehe/custom/boost_1_60_0/boost/spirit/home/x3/support/context.hpp|30 col 25| fatal error: recursive template instantiation exceeded maximum depth of 256
This then gives a direct hint as to what causes it.
My first hunch was that x3::variant
might lead to the compiler to more aggressively inline things, but replacing with boost::variant didn't help much:
g++-5 -std=c++14 -Wall -pedantic -Wextra -fsanitize=undefined,address -Wno-unused -g -O3 -isystem /home/sehe/custom/nonius/include -isystem /home/sehe/custom/boost_1_60_0 -pthread -march=native test.cpp -c -o test.o
test.cpp:135:62: warning: extra ‘;’ [-Wpedantic]
BOOST_SPIRIT_DEFINE(expr, add_expr, mult_expr, block_expr);
^
g++-5 -std=c++14 -Wall -pedantic -Wextra -fsanitize=undefined,address -Wno-unused -g -O3 -isystem /home/sehe/custom/nonius/include -isystem /home/sehe/custom/boost_1_60_0 -pthread -march=native test.o -o test -L /home/sehe/custom/boost_1_60_0/stage/lib/ -Wl,-rpath,/home/sehe/custom/boost_1_60_0/stage/lib -lboost_system -lboost_regex -lboost_thread -lboost_iostreams -lboost_serialization -lboost_filesystem -lboost_chrono -lrt -lboost_unit_test_framework -lpugixml -lssl -lcrypto -lxml2
real 3m55.728s
With no difference in resuts:
Ok: foo() result: foo()
Fail: -foo()
Ok: f1_2() result: f1_2()
Ok: foo_bar () result: foo_bar()
Ok: foo( bar (42, baz())) result: foo(bar(42,baz()))
Ok: foo(5) result: foo(5)
Ok: foo(-5) result: foo(-5)
Ok: foo(1.1, foo(4.21e-2, 4., 6)) result: foo(1.100000,foo(0.042100,4.000000,6))
Ok: 1.1 result: 1.100000
Ok: -1.1 result: -1.100000
Ok: 1 * 1 result: (1 * 1)
Ok: foo(1 * 1) * bar(42) result: (foo((1 * 1)) * bar(42))
Ok: foo(2 + 5.5, bar()*3.4-7) result: foo((2 + 5.500000),((bar() * 3.400000) - 7))
Ok: foo(2 + 5.5, bar(baz(-5/foo())) * 3.4 - 7) result: foo((2 + 5.500000),((bar(baz((-5 / foo()))) * 3.400000) - 7))
Ok: 4 + 5 * 6 result: (4 + (5 * 6))
Ok: 1+2+3+4*5*6*-7+-8*+9-0 result: (1 + (2 + (3 + ((4 * (5 * (6 * -7))) + ((-8 * 9) - 0)))))
Ok: (foo()) result: foo()
Ok: foo() * ((1+2)+3*(2+3)) result: (foo() * ((1 + 2) + (3 * (2 + 3))))
Ok: (1+2)*3 result: ((1 + 2) * 3)
Ok: 1+2*3 result: (1 + (2 * 3))
Fail: foo
I'd report this at the Spirit mailing list: http://boost.2283326.n4.nabble.com/spirit-general-f2672582.html
While this might actually be a regression in Spirit X3 like @sehe suggests there is a workaround with the current version:
Change all rules taking part in the expression recursion like this:
const x3::rule<class block_term, ast::block_ast> block_term = "block_term";
auto const block_term_def = x3::rule<class block_term, ast::block_ast> {"block_term"}
= "(" >> expr >> ")";
BOOST_SPIRIT_DEFINE(block_term)
This reduces compile time drastically and the parser is working well. Parser performance seems to be the same (very unscientific tests!).
The compile times were mitigated in Boost 1.77 by PR650 X3: Skip rule definition injection into context. It was taking 14 seconds before and 2 seconds after (just parsing the includes takes 1 second for me).
The reason to slowdown was nested usage of the inplace created rules like auto const somerule = x3::rule<...>{} = ...;
within alternative parsers.
Rules created like auto const somerule = rule_placeholder = ... may call rule_placeholder ...;
may recursively call itself. To support that rule definition injects itself into the context, and nesting multiple of them via alternative parser creates exponential amount of unique parsers, leading to not only a lot of template instantiation (which are actually pretty fast), but to huge amount of code generation that compiler front-end (Clang) passes to compiler back-end (LLVM) which is a huge bottleneck in Clang (it also takes time to optimize and deduplicate the LLVM IR).
After the fix auto const somerule = x3::rule<...>{} = ...;
are considered to no be able to call themselves recursively since they create the placeholder inplace which cannot be referenced in the same expression without hacks.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With