I have another question yet. If I had a std::wstring looking like this:
ドイツ語で検索していてこちらのサイトにたどり着きました。
How could I possibly get it to be URL-Encoded (%nn, n = 0-9, a-f) to:
%E3%83%89%E3%82%A4%E3%83%84%E8%AA%9E%E3%81%A7%E6%A4%9C%E7%B4%A2%E3%81%97%E3%81%A6%E3%81%84%E3%81%A6%E3%81%93%E3%81%A1%E3%82%89%E3%81%AE%E3%82%B5%E3%82%A4%E3%83%88%E3%81%AB%E3%81%9F%E3%81%A9%E3%82%8A%E7%9D%80%E3%81%8D%E3%81%BE%E3%81%97%E3%81%9F%E3%80%82
... and also HTML-Encoded (&#nnn(nn);, n = 0-9(?)) to:
ドイツ語で検索していてこちらのサイトにたどり着きました。
Please help me as I am totally lost right now and don't even know where to start. By the way, performance isn't much important to me right now.
Thanks in advance!
Here is an example which shows two methods, one based on the Qt library and one based on the ICU library. Both should be fairly platform-independent:
#include <iostream>
#include <sstream>
#include <iomanip>
#include <stdexcept>
#include <boost/scoped_array.hpp>
#include <QtCore/QString>
#include <QtCore/QUrl>
#include <QtCore/QVector>
#include <unicode/utypes.h>
#include <unicode/ustring.h>
#include <unicode/unistr.h>
#include <unicode/schriter.h>
void encodeQt() {
const QString str = QString::fromWCharArray(L"ドイツ語で検索していてこちらのサイトにたどり着きました。");
const QUrl url = str;
std::cout << "URL encoded: " << url.toEncoded().constData() << std::endl;
typedef QVector<uint> CodePointVector;
const CodePointVector codePoints = str.toUcs4();
std::stringstream htmlEncoded;
for (CodePointVector::const_iterator it = codePoints.constBegin(); it != codePoints.constEnd(); ++it) {
htmlEncoded << "&#" << *it << ';';
}
std::cout << "HTML encoded: " << htmlEncoded.str() << std::endl;
}
void encodeICU() {
const std::wstring cppString = L"ドイツ語で検索していてこちらのサイトにたどり着きました。";
int bufSize = cppString.length() * 2;
boost::scoped_array<UChar> strBuffer(new UChar[bufSize]);
int size = 0;
UErrorCode error = U_ZERO_ERROR;
u_strFromWCS(strBuffer.get(), bufSize, &size, cppString.data(), cppString.length(), &error);
if (error) return;
const UnicodeString str(strBuffer.get(), size);
bufSize = str.length() * 4;
boost::scoped_array<char> buffer(new char[bufSize]);
u_strToUTF8(buffer.get(), bufSize, &size, str.getBuffer(), str.length(), &error);
if (error) return;
const std::string urlUtf8(buffer.get(), size);
std::stringstream urlEncoded;
urlEncoded << std::hex << std::setfill('0');
for (std::string::const_iterator it = urlUtf8.begin(); it != urlUtf8.end(); ++it) {
urlEncoded << '%' << std::setw(2) << static_cast<unsigned int>(static_cast<unsigned char>(*it));
}
std::cout << "URL encoded: " << urlEncoded.str() << std::endl;
std::stringstream htmlEncoded;
StringCharacterIterator it = str;
while (it.hasNext()) {
const UChar32 pt = it.next32PostInc();
htmlEncoded << "&#" << pt << ';';
}
std::cout << "HTML encoded: " << htmlEncoded.str() << std::endl;
}
int main() {
encodeQt();
encodeICU();
}
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With