I managed to parse a pgn file thanks to the Boost Spirit library, but it fails as soon as there is some characters I did not "anticipated".
Here is my Spirit grammar :
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
BOOST_FUSION_ADAPT_STRUCT(
loloof64::pgn_tag,
(std::string, key),
(std::string, value)
)
BOOST_FUSION_ADAPT_STRUCT(
loloof64::game_move,
(unsigned, move_number),
(std::string, move_turn),
(std::string, white_move),
(std::string, black_move),
(std::string, result)
)
BOOST_FUSION_ADAPT_STRUCT(
loloof64::pgn_game,
(std::vector<loloof64::pgn_tag>, header),
(std::vector<loloof64::game_move>, moves)
)
namespace loloof64 {
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
namespace phoenix = boost::phoenix;
template <typename Iterator>
struct pgn_parser : qi::grammar<Iterator, std::vector<pgn_game>, qi::unused_type>
{
pgn_parser() : pgn_parser::base_type(games)
{
using qi::lexeme;
using ascii::char_;
using qi::uint_;
using qi::alnum;
using qi::space;
using qi::omit;
using qi::eol;
using qi::lit;
quoted_string %= lexeme[lit('"') >> *(char_ - '"') >> lit('"')];
tag %=
'['
>> +alnum
>> omit[+space]
>> quoted_string
>> ']'
>> omit[+eol]
;
header %= +tag;
move_turn %= qi::string("...") | qi::string(".");
regular_move %=
+char_("a-hNBRQK")
>> +char_("a-h1-8x=NBRQK")
>> -qi::string("e.p.")
;
castle_move %= qi::string("O-O-O") | qi::string("O-O");
single_move %=
(regular_move | castle_move) >> -(char_('+') | char_('#'))
;
result %= qi::string("1-0") | qi::string("0-1") | qi::string("1/2-1/2") | qi::string("*");
full_move %=
uint_
>> move_turn
>> omit[*space]
>> single_move
>> -(omit[+space] >> single_move)
>> -(omit[+space] >> result)
;
game_description %= full_move
>> *(omit[*space] >> full_move);
single_game %=
-header
>> game_description
;
games %=
single_game
>> *(omit[*(space|eol)] >> single_game)
;
}
qi::rule<Iterator, pgn_tag(), qi::unused_type> tag;
qi::rule<Iterator, std::vector<pgn_tag>, qi::unused_type> header;
qi::rule<Iterator, std::string(), qi::unused_type> quoted_string;
qi::rule<Iterator, std::string(), qi::unused_type> result;
qi::rule<Iterator, std::string(), qi::unused_type> regular_move;
qi::rule<Iterator, std::string(), qi::unused_type> castle_move;
qi::rule<Iterator, std::string(), qi::unused_type> single_move;
qi::rule<Iterator, std::string(), qi::unused_type> move_turn;
qi::rule<Iterator, game_move(), qi::unused_type> full_move;
qi::rule<Iterator, std::vector<game_move>, qi::unused_type> game_description;
qi::rule<Iterator, pgn_game, qi::unused_type> single_game;
qi::rule<Iterator, std::vector<pgn_game>, qi::unused_type> games;
};
}
How could I simply consume any character I could not "anticipate" ? I mean, how could I ignore any character that I don't want in none of my grammar rule ?
As for testing purposes :
here my parser header (pgn_games_extractor.hpp)
#ifndef PGNGAMESEXTRACTOR_HPP
#define PGNGAMESEXTRACTOR_HPP
#include <string>
#include <vector>
#include <fstream>
#include <stdexcept>
namespace loloof64 {
struct pgn_tag {
std::string key;
std::string value;
};
struct game_move {
unsigned move_number;
std::string move_turn;
std::string white_move;
std::string black_move;
std::string result;
};
struct pgn_game {
std::vector<pgn_tag> header;
std::vector<game_move> moves;
};
class PgnGamesExtractor
{
public:
PgnGamesExtractor(std::string inputFilePath);
PgnGamesExtractor(std::ifstream &inputFile);
/*
Both constructos may throw PgnParsingException (if bad pgn format) and InputFileException (if missing file)
*/
std::vector<pgn_game> getGames() const { return games; }
virtual ~PgnGamesExtractor();
protected:
private:
std::vector<pgn_game> games;
void parseInput(std::ifstream &inputFile);
};
class PgnParsingException : public std::runtime_error
{
public:
PgnParsingException(std::string message): std::runtime_error(message){}
};
class InputFileException : public std::runtime_error
{
public:
InputFileException(std::string message) : std::runtime_error(message){}
};
}
#endif // PGNGAMESEXTRACTOR_HPP
Here is my parser source (pgn_games_extractor.cpp) :
#include "pgn_games_extractor.hpp"
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
BOOST_FUSION_ADAPT_STRUCT(
loloof64::pgn_tag,
(std::string, key),
(std::string, value)
)
BOOST_FUSION_ADAPT_STRUCT(
loloof64::game_move,
(unsigned, move_number),
(std::string, move_turn),
(std::string, white_move),
(std::string, black_move),
(std::string, result)
)
BOOST_FUSION_ADAPT_STRUCT(
loloof64::pgn_game,
(std::vector<loloof64::pgn_tag>, header),
(std::vector<loloof64::game_move>, moves)
)
namespace loloof64 {
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
namespace phoenix = boost::phoenix;
template <typename Iterator>
struct pgn_parser : qi::grammar<Iterator, std::vector<pgn_game>, qi::unused_type>
{
pgn_parser() : pgn_parser::base_type(games)
{
using qi::lexeme;
using ascii::char_;
using qi::uint_;
using qi::alnum;
using qi::space;
using qi::omit;
using qi::eol;
using qi::lit;
quoted_string %= lexeme[lit('"') >> *(char_ - '"') >> lit('"')];
tag %=
'['
>> +alnum
>> omit[+space]
>> quoted_string
>> ']'
>> omit[+eol]
;
header %= +tag;
move_turn %= qi::string("...") | qi::string(".");
regular_move %=
+char_("a-hNBRQK")
>> +char_("a-h1-8x=NBRQK")
>> -qi::string("e.p.")
;
castle_move %= qi::string("O-O-O") | qi::string("O-O");
single_move %=
(regular_move | castle_move) >> -(char_('+') | char_('#'))
;
result %= qi::string("1-0") | qi::string("0-1") | qi::string("1/2-1/2") | qi::string("*");
full_move %=
uint_
>> move_turn
>> omit[*space]
>> single_move
>> -(omit[+space] >> single_move)
>> -(omit[+space] >> result)
;
game_description %= full_move
>> *(omit[*space] >> full_move);
single_game %=
-header
>> game_description
;
games %=
single_game
>> *(omit[*(space|eol)] >> single_game)
;
}
qi::rule<Iterator, pgn_tag(), qi::unused_type> tag;
qi::rule<Iterator, std::vector<pgn_tag>, qi::unused_type> header;
qi::rule<Iterator, std::string(), qi::unused_type> quoted_string;
qi::rule<Iterator, std::string(), qi::unused_type> result;
qi::rule<Iterator, std::string(), qi::unused_type> regular_move;
qi::rule<Iterator, std::string(), qi::unused_type> castle_move;
qi::rule<Iterator, std::string(), qi::unused_type> single_move;
qi::rule<Iterator, std::string(), qi::unused_type> move_turn;
qi::rule<Iterator, game_move(), qi::unused_type> full_move;
qi::rule<Iterator, std::vector<game_move>, qi::unused_type> game_description;
qi::rule<Iterator, pgn_game, qi::unused_type> single_game;
qi::rule<Iterator, std::vector<pgn_game>, qi::unused_type> games;
};
}
loloof64::PgnGamesExtractor::PgnGamesExtractor(std::string inputFilePath)
{
std::ifstream inputFile(inputFilePath);
parseInput(inputFile);
}
loloof64::PgnGamesExtractor::PgnGamesExtractor(std::ifstream &inputFile)
{
parseInput(inputFile);
}
loloof64::PgnGamesExtractor::~PgnGamesExtractor()
{
//dtor
}
void loloof64::PgnGamesExtractor::parseInput(std::ifstream &inputFile)
{
using namespace std;
if (! inputFile) throw InputFileException("File does not exist !");
string content("");
getline(inputFile, content, (char) inputFile.eof());
if (inputFile.fail() || inputFile.bad()) throw new InputFileException("Could not read the input file !");
loloof64::pgn_parser<string::const_iterator> parser;
std::vector<loloof64::pgn_game> temp_games;
string::const_iterator iter = content.begin();
string::const_iterator end = content.end();
bool success = boost::spirit::qi::phrase_parse(iter, end, parser, boost::spirit::qi::eol, temp_games);
if (success && iter == end)
{
games = temp_games;
}
else
{
string error_fragment(iter, end);
string error_message("");
error_message = "Failed to parse the input at :'" + error_fragment + "' !";
throw PgnParsingException(error_message);
}
}
I am asking this question because I could not parse the following pgn : ScotchGambitPgn.zip. I think it is because of an encoding issue with this file.
I am using Spirit 2 and C++ 11 (Gnu)
As requested the simple X3 translation.
The outputs are identical (exactly).
Live On Coliru
//#define BOOST_SPIRIT_DEBUG
#ifndef PGNGAMESEXTRACTOR_HPP
#define PGNGAMESEXTRACTOR_HPP
#include <string>
#include <vector>
#include <fstream>
#include <stdexcept>
namespace loloof64 {
struct pgn_tag {
std::string key;
std::string value;
};
struct game_move {
unsigned move_number;
std::string white_move;
std::string black_move;
enum result_t { white_won, black_won, draw, undecided } result;
};
struct pgn_game {
std::vector<pgn_tag> header;
std::vector<game_move> moves;
};
class PgnGamesExtractor {
public:
PgnGamesExtractor(std::string inputFilePath);
PgnGamesExtractor(std::istream &inputFile);
/*
Both constructos may throw PgnParsingException (if bad pgn format) and InputFileException (if missing file)
*/
std::vector<pgn_game> getGames() const { return games; }
virtual ~PgnGamesExtractor();
protected:
private:
std::vector<pgn_game> games;
void parseInput(std::istream &inputFile);
};
class PgnParsingException : public virtual std::runtime_error {
public:
PgnParsingException(std::string message) : std::runtime_error(message) {}
};
class InputFileException : public virtual std::runtime_error {
public:
InputFileException(std::string message) : std::runtime_error(message) {}
};
}
#endif // PGNGAMESEXTRACTOR_HPP
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/include/support_istream_iterator.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
BOOST_FUSION_ADAPT_STRUCT(loloof64::pgn_tag, key, value)
BOOST_FUSION_ADAPT_STRUCT(loloof64::game_move, move_number, white_move, black_move, result)
BOOST_FUSION_ADAPT_STRUCT(loloof64::pgn_game, header, moves)
namespace loloof64 {
namespace pgn_parser {
using namespace boost::spirit::x3;
static std::string const no_move;
static auto const result = []{
symbols<game_move::result_t> table;
table.add
("1-0", game_move::white_won)
("0-1", game_move::black_won)
("1/2-1/2", game_move::draw)
("*", game_move::undecided);
return table;
}();
static auto const quoted_string = lexeme['"' >> *~char_('"') >> '"'];
static auto const tag = '[' >> +alnum >> quoted_string >> ']';
static auto const header = +tag;
static auto const regular_move = as_parser("O-O-O") | "O-O" | (+char_("a-hNBRQK") >> +char_("a-h1-8x=NBRQK") >> -lit("e.p."));
static auto const single_move = rule<struct single_move_, std::string> { "single_move" }
= raw [ lexeme [ regular_move >> -char_("+#")] ];
static auto const full_move = rule<struct full_move_, game_move> { "full_move" }
= uint_
>> (lexeme["..." >> attr(no_move)] | "." >> single_move)
>> (single_move | attr(no_move))
>> -result;
static auto const game_description = +full_move;
static auto const single_game = rule<struct single_game_, pgn_game> { "single_game" }
= -header >> game_description;
static auto const games = *single_game;
}
}
loloof64::PgnGamesExtractor::PgnGamesExtractor(std::string inputFilePath) {
std::ifstream inputFile(inputFilePath);
parseInput(inputFile);
}
loloof64::PgnGamesExtractor::PgnGamesExtractor(std::istream &inputFile) { parseInput(inputFile); }
loloof64::PgnGamesExtractor::~PgnGamesExtractor() {
// dtor
}
void loloof64::PgnGamesExtractor::parseInput(std::istream &inputFile) {
if (inputFile.fail() || inputFile.bad())
throw new InputFileException("Could not read the input file !");
typedef boost::spirit::istream_iterator It;
std::vector<loloof64::pgn_game> temp_games;
It iter(inputFile >> std::noskipws), end;
bool success = boost::spirit::x3::phrase_parse(iter, end, pgn_parser::games, boost::spirit::x3::space, temp_games);
if (success && iter == end) {
games.swap(temp_games);
} else {
std::string error_fragment(iter, end);
throw PgnParsingException("Failed to parse the input at :'" + error_fragment + "' !");
}
}
#include <iostream>
int main() {
loloof64::PgnGamesExtractor pge("ScotchGambit.pgn");
std::cout << "Parsed " << pge.getGames().size() << " games\n";
for (auto& g : pge.getGames())
for (auto& m : g.moves)
std::cout << m.move_number << ".\t" << m.white_move << "\t" << m.black_move << "\n";
}
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With