Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Accept non ASCII characters

Consider this program:

#include <stdio.h>

int main(int argc, char* argv[]) {
   printf("%s\n", argv[1]);  
   return 0;
}

I compile it like this:

x86_64-w64-mingw32-gcc -o alpha alpha.c

The problem is if I give it a non ASCII argument:

$ ./alpha róisín
r�is�n

How can I write and/or compile this program such that it accepts non ASCII characters? To respond to alk: no, the program is printing wrongly. See this example:

$ echo Ω | od -t x1c
0000000  ce  a9  0a
        316 251  \n
0000003

$ ./alpha Ω | od -t x1c
0000000  4f  0d  0a
          O  \r  \n
0000003
like image 242
Zombo Avatar asked Jun 14 '15 18:06

Zombo


1 Answers

The easiest way to do this is with wmain:

#include <fcntl.h>
#include <stdio.h>

int wmain (int argc, wchar_t** argv) {
  _setmode(_fileno(stdout), _O_WTEXT);
  wprintf(L"%s\n", argv[1]);
  return 0;
}

It can also be done with GetCommandLineW; here is a simple version of the code found at the HandBrake repo:

#include <stdio.h>
#include <windows.h>

int get_argv_utf8(int* argc_ptr, char*** argv_ptr) {
  int argc;
  char** argv;
  wchar_t** argv_utf16 = CommandLineToArgvW(GetCommandLineW(), &argc);
  int i;
  int offset = (argc + 1) * sizeof(char*);
  int size = offset;
  for (i = 0; i < argc; i++)
    size += WideCharToMultiByte(CP_UTF8, 0, argv_utf16[i], -1, 0, 0, 0, 0);
  argv = malloc(size);
  for (i = 0; i < argc; i++) {
    argv[i] = (char*) argv + offset;
    offset += WideCharToMultiByte(CP_UTF8, 0, argv_utf16[i], -1,
      argv[i], size-offset, 0, 0);
  }
  *argc_ptr = argc;
  *argv_ptr = argv;
  return 0;
}

int main(int argc, char** argv) {
  get_argv_utf8(&argc, &argv);
  printf("%s\n", argv[1]);
  return 0;
}
like image 50
Zombo Avatar answered Sep 22 '22 16:09

Zombo