I tried to printf
with some accented characters such as á é í ó ú
:
printf("my name is Seán\n");
The text editor in the DEVC++ IDE displays them fine - i.e the source code looks fine.
I guess I need some library other than stdio.h
and maybe some variant of the normal printf
.
I'm using IDE Bloodshed DEVC running on Windows XP.
Perhaps the best is to use Unicode.
Here's how...
First, manually set your console font to "Consolas" or "Lucida Console" or whichever True-Type Unicode font you can choose ("Raster fonts" may not work, those aren't Unicode fonts, although they may include characters you're interested in).
Next, set the console code page to 65001 (UTF-8) with SetConsoleOutputCP(CP_UTF8)
.
Then convert your text to UTF-8 (if it's not yet in UTF-8) using WideCharToMultiByte(CP_UTF8, ...)
.
Finally, call WriteConsoleA()
to output the UTF-8 text.
Here's a little function that does all these things for you, it's an "improved" variant of wprintf()
:
int _wprintf(const wchar_t* format, ...)
{
int r;
static int utf8ModeSet = 0;
static wchar_t* bufWchar = NULL;
static size_t bufWcharCount = 256;
static char* bufMchar = NULL;
static size_t bufMcharCount = 256;
va_list vl;
int mcharCount = 0;
if (utf8ModeSet == 0)
{
if (!SetConsoleOutputCP(CP_UTF8))
{
DWORD err = GetLastError();
fprintf(stderr, "SetConsoleOutputCP(CP_UTF8) failed with error 0x%X\n", err);
utf8ModeSet = -1;
}
else
{
utf8ModeSet = 1;
}
}
if (utf8ModeSet != 1)
{
va_start(vl, format);
r = vwprintf(format, vl);
va_end(vl);
return r;
}
if (bufWchar == NULL)
{
if ((bufWchar = malloc(bufWcharCount * sizeof(wchar_t))) == NULL)
{
return -1;
}
}
for (;;)
{
va_start(vl, format);
r = vswprintf(bufWchar, bufWcharCount, format, vl);
va_end(vl);
if (r < 0)
{
break;
}
if (r + 2 <= bufWcharCount)
{
break;
}
free(bufWchar);
if ((bufWchar = malloc(bufWcharCount * sizeof(wchar_t) * 2)) == NULL)
{
return -1;
}
bufWcharCount *= 2;
}
if (r > 0)
{
if (bufMchar == NULL)
{
if ((bufMchar = malloc(bufMcharCount)) == NULL)
{
return -1;
}
}
for (;;)
{
mcharCount = WideCharToMultiByte(CP_UTF8,
0,
bufWchar,
-1,
bufMchar,
bufMcharCount,
NULL,
NULL);
if (mcharCount > 0)
{
break;
}
if (GetLastError() != ERROR_INSUFFICIENT_BUFFER)
{
return -1;
}
free(bufMchar);
if ((bufMchar = malloc(bufMcharCount * 2)) == NULL)
{
return -1;
}
bufMcharCount *= 2;
}
}
if (mcharCount > 1)
{
DWORD numberOfCharsWritten, consoleMode;
if (GetConsoleMode(GetStdHandle(STD_OUTPUT_HANDLE), &consoleMode))
{
fflush(stdout);
if (!WriteConsoleA(GetStdHandle(STD_OUTPUT_HANDLE),
bufMchar,
mcharCount - 1,
&numberOfCharsWritten,
NULL))
{
return -1;
}
}
else
{
if (fputs(bufMchar, stdout) == EOF)
{
return -1;
}
}
}
return r;
}
Following tests this function:
_wprintf(L"\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7"
L"\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF"
L"\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7"
L"\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF"
L"\n"
L"\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7"
L"\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF"
L"\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7"
L"\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF"
L"\n"
L"\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7"
L"\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF"
L"\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7"
L"\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF"
L"\n");
_wprintf(L"\x391\x392\x393\x394\x395\x396\x397"
L"\x398\x399\x39A\x39B\x39C\x39D\x39E\x39F"
L"\x3A0\x3A1\x3A2\x3A3\x3A4\x3A5\x3A6\x3A7"
L"\x3A8\x3A9\x3AA\x3AB\x3AC\x3AD\x3AE\x3AF\x3B0"
L"\n"
L"\x3B1\x3B2\x3B3\x3B4\x3B5\x3B6\x3B7"
L"\x3B8\x3B9\x3BA\x3BB\x3BC\x3BD\x3BE\x3BF"
L"\x3C0\x3C1\x3C2\x3C3\x3C4\x3C5\x3C6\x3C7"
L"\x3C8\x3C9\x3CA\x3CB\x3CC\x3CD\x3CE"
L"\n");
_wprintf(L"\x410\x411\x412\x413\x414\x415\x401\x416\x417"
L"\x418\x419\x41A\x41B\x41C\x41D\x41E\x41F"
L"\x420\x421\x422\x423\x424\x425\x426\x427"
L"\x428\x429\x42A\x42B\x42C\x42D\x42E\x42F"
L"\n"
L"\x430\x431\x432\x433\x434\x435\x451\x436\x437"
L"\x438\x439\x43A\x43B\x43C\x43D\x43E\x43F"
L"\x440\x441\x442\x443\x444\x445\x446\x447"
L"\x448\x449\x44A\x44B\x44C\x44D\x44E\x44F"
L"\n");
And should result in the following text in the console:
¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿
ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞß
àáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ
ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩΪΫάέήίΰ
αβγδεζηθικλμνξοπρςστυφχψωϊϋόύώ
АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ
абвгдеёжзийклмнопрстуфхцчшщъыьэюя
I do not know the encoding in which your IDE stores non-ASCII characters in .c/.cpp files and I do not know what your compiler does when encounters non-ASCII characters. This part you should figure out yourself.
As long as you supply to _wprintf()
properly encoded UTF-16 text or call WriteConsoleA()
with properly encoded UTF-8 text, things should work.
P.S. Some gory details about console fonts can be found here.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With