Is there a way to make Boost tokenizer to split below string without splitting quoted part?
string s = "1st 2nd \"3rd with some comment\" 4th";
Exptected output:
1st
2nd
3rd with some comment
4th
You can use an escaped_list_separator
from the tokenizer library. See this question for more details on how to apply it to your problem.
C++11 solution
#include <iostream>
#include <string>
#include <vector>
std::vector<std::string> tokenize(const std::string& str) {
std::vector<std::string> tokens;
std::string buffer;
std::string::const_iterator iter = str.cbegin();
bool in_string = false;
while (iter != str.cend()) {
char c = *iter;
if (c == '"') {
if (in_string) {
tokens.push_back(buffer);
buffer.clear();
}
in_string = !in_string;
} else if (c == ' ') {
if (in_string) {
buffer.push_back(c);
} else {
if (!buffer.empty()) {
tokens.push_back(buffer);
buffer.clear();
}
}
} else {
buffer.push_back(c);
}
++iter;
}
if (!buffer.empty()) {
tokens.push_back(buffer);
}
return tokens;
}
int main() {
std::string s = "1st 2nd \"3rd with some comment\" 4th";
std::vector<std::string> tokens = tokenize(s);
for (auto iter = tokens.cbegin(); iter != tokens.cend(); ++iter) {
std::cout << *iter << "\n";
}
}
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With