There are already questions here on Stackoverflow asking why basic_fstream<uint8_t>
doesn't work. The answers say that char_traits
is only specialized for char
and wchar_t
(plus char16_t
, char32_t
in C++11) and you should stick with basic_fstream<char>
to read binary data and cast it if required.
Well darn it, that isn't good enough! :)
None of the answers (that I can find) say how to specialize char_traits<uint8_t>
and use it with a basic_fstream
template, or if it's even possible. So I thought I'd try implement it myself.
The following compiles without error when using Visual Studio Express 2013 RC on Windows 7 64bit and with g++-4.7 on Kubuntu GNU/Linux 13.04 64bit. However it throws a std::bad_cast exception at runtime. I don't have access to clang++ with libc++ to test that combination.
#include <cinttypes>
#include <cstring>
#include <algorithm>
#include <fstream>
#include <iostream>
#include <locale>
#ifdef _WIN32
#define constexpr
#define NOEXCEPT throw()
#define NOEXCEPT noexcept
// Change this to char and it works.
using byte_type = std::uint8_t;
namespace std
// Specialization of std::char_traits
template <> struct char_traits< std::uint8_t >
using char_type = std::uint8_t;
using int_type = int;
using off_type = std::streamoff;
using pos_type = std::streampos;
using state_type = std::mbstate_t;
static void assign(char_type& value1, const char_type& value2)
value1 = value2;
static char_type* assign(char_type* ptr, std::size_t count, char_type value)
return static_cast<char_type*>(std::memset(ptr, value, count));
static constexpr bool eq(const char_type& value1, const char_type& value2) NOEXCEPT
return value1 == value2;
static constexpr bool lt(const char_type value1, const char_type value2) NOEXCEPT
return value1 < value2;
static std::size_t length(const char_type* ptr)
std::size_t i = 0;
while (!eq(ptr[i], char_type()))
return i;
static int compare(const char_type* ptr1, const char_type* ptr2, std::size_t count)
return std::memcmp(ptr1, ptr2, count);
static const char_type* find(const char_type* ptr, std::size_t count, const char_type& value)
return static_cast<const char_type*>(std::memchr(ptr, value, count));
static char_type* move(char_type* dest, const char_type* src, std::size_t count)
return static_cast<char_type*>(std::memmove(dest, src, count));
static char_type* copy(char_type* dest, const char_type* src, std::size_t count)
return static_cast<char_type*>(std::memcpy(dest, src, count));
static constexpr char_type to_char_type(const int_type& value) NOEXCEPT
return static_cast<char_type>(value);
static constexpr int_type to_int_type(const char_type& value) NOEXCEPT
return static_cast<int_type>(value);
static constexpr bool eq_int_type(const int_type& value1, const int_type& value2) NOEXCEPT
return value1 == value2;
static constexpr int_type eof() NOEXCEPT
return static_cast<int_type>(std::char_traits<char>::eof());
static constexpr int_type not_eof(const int_type& value) NOEXCEPT
return (value == eof()) ? 0 : value;
// Specialization of std::codecvt
template<> class codecvt< std::uint8_t, char, std::mbstate_t > : public locale::facet, public codecvt_base
using internal_type = std::uint8_t;
using external_type = char;
using state_type = std::mbstate_t;
static std::locale::id id;
codecvt(std::size_t refs = 0)
: locale::facet(refs)
std::codecvt_base::result out(state_type& state, const internal_type* from, const internal_type* from_end, const internal_type*& from_next, external_type* to, external_type* to_end, external_type*& to_next) const
return do_out(state, from, from_end, from_next, to, to_end, to_next);
std::codecvt_base::result in(state_type& state, const external_type* from, const external_type* from_end, const external_type*& from_next, internal_type* to, internal_type* to_end, internal_type*& to_next) const
return do_in(state, from, from_end, from_next, to, to_end, to_next);
std::codecvt_base::result unshift(state_type& state, external_type* to, external_type* to_end, external_type*& to_next) const
return do_unshift(state, to, to_end, to_next);
int length(state_type& state, const external_type* from, const external_type* from_end, std::size_t max) const
return do_length(state, from, from_end, max);
int max_length() const NOEXCEPT
return do_max_length();
int encoding() const NOEXCEPT
return do_encoding();
bool always_noconv() const NOEXCEPT
return do_always_noconv();
virtual ~codecvt() {}
virtual std::codecvt_base::result do_out(state_type& state, const internal_type* from, const internal_type* from_end, const internal_type*& from_next, external_type* to, external_type* to_end, external_type*& to_next) const;
virtual std::codecvt_base::result do_in(state_type& state, const external_type* from, const external_type* from_end, const external_type*& from_next, internal_type* to, internal_type* to_end, internal_type*& to_next) const;
virtual std::codecvt_base::result do_unshift(state_type& state, external_type* to, external_type* to_end, external_type*& to_next) const;
virtual int do_length(state_type& state, const external_type* from, const external_type* from_end, std::size_t max) const;
virtual int do_max_length() const NOEXCEPT;
virtual int do_encoding() const NOEXCEPT;
virtual bool do_always_noconv() const NOEXCEPT;
}; // class codecvt
locale::id codecvt< std::uint8_t, char, std::mbstate_t >::id;
codecvt_base::result codecvt< std::uint8_t, char, std::mbstate_t >::do_out(state_type& state, const internal_type* from, const internal_type* from_end, const internal_type*& from_next, external_type* to, external_type* to_end, external_type*& to_next) const
(void) state; (void) from_end; (void) to_end; // Unused parameters
from_next = from;
to_next = to;
return codecvt_base::noconv;
codecvt_base::result codecvt< std::uint8_t, char, std::mbstate_t >::do_in(state_type& state, const external_type* from, const external_type* from_end, const external_type*& from_next, internal_type* to, internal_type* to_end, internal_type*& to_next) const
(void) state; (void) from_end; (void) to_end; // Unused parameters
from_next = from;
to_next = to;
return std::codecvt_base::noconv;
codecvt_base::result codecvt< std::uint8_t, char, std::mbstate_t >::do_unshift(state_type& state, external_type* to, external_type* to_end, external_type*& to_next) const
(void) state; (void) to_end; // Unused perameters
to_next = to;
return std::codecvt_base::noconv;
int codecvt< std::uint8_t, char, std::mbstate_t >::do_length(state_type& state, const external_type* from, const external_type* from_end, std::size_t max) const
(void) state; // Unused parameter
return static_cast<int>(std::min< std::size_t >(max, static_cast<std::size_t>(from_end - from)));
int codecvt< std::uint8_t, char, std::mbstate_t >::do_max_length() const NOEXCEPT
return 1;
int codecvt< std::uint8_t, char, std::mbstate_t >::do_encoding() const NOEXCEPT
return 1;
bool codecvt< std::uint8_t, char, std::mbstate_t >::do_always_noconv() const NOEXCEPT
return true;
} // namespace std
int main(int argc, char *argv [])
if (argc < 2)
std::cerr << argv[0] << " {file to read}" << std::endl;
using stream_type = std::basic_ifstream< byte_type, std::char_traits<byte_type> >;
stream_type stream(argv[1], std::ifstream::in | std::ifstream::binary);
if (stream.is_open() == false)
std::cerr << "file not found" << std::endl;
static const auto read_size = 4;
stream_type::char_type buffer[read_size];, read_size);
std::cout << "Got:" << stream.gcount() << std::endl;
Compile and run with g++ and GNU/Linux:
$ g++ -std=c++11 -Wall -Wextra -pedantic stream.cpp -o stream && ./stream /dev/random
terminate called after throwing an instance of 'std::bad_cast'
what(): std::bad_cast
Aborted (core dumped)
And with Visual Studio Express RC 2013:
First-chance exception at 0x76A6C41F in traits test.exe: Microsoft C++ exception: std::bad_cast at memory location 0x0038F978.
Unhandled exception at 0x76A6C41F in traits test.exe: Microsoft C++ exception: std::bad_cast at memory location 0x0038F978.
Changing byte_type
to char
gives the expected output:
$ g++ -std=c++11 -Wall -Wextra -pedantic stream.cpp -o stream && ./stream /dev/random
Why is this throwing std::bad_cast and how can I fix it?
I was able to reproduce a bad_cast on my gcc (4.7.2 on AIX).
The reason you got it is that gcc library implementors optimized basic_filebuf::xsgetn
(which is called from basic_istream::read
) to call plain C fread
to read from the file if your stream's locale is non-converting (that is, you're not trying to read a UTF-8 or maybe GB18030 file into a UTF-32 string or something), which is absolutely the right thing to do. To find out if it is non-converting, it checks codecvt::always_noconv on the codecvt facet of the locale imbued in your stream... which doesn't exist.
You can reproduce the exception by executing
std::cout << std::use_facet<
std::codecvt<std::uint8_t, char, std::mbstate_t>
>(stream.getloc()).always_noconv() << '\n';
I don't have access to Visual Studio to see why it works there (do they just call basic_filebuf::sgetc()
for every char in basic_fstream::read()
?), but to use basic_filestream in any case, you need to provide a codecvt facet for your combination of internal and external types (uint8_t
and char
, in this case).
EDIT: You're almost there, the last missing piece is the line
new std::codecvt<uint8_t, char, std::mbstate_t>));
anywhere before
or, alternatively, imbue the global: std::locale::global(std::locale(std::locale(), new std::codecvt<uint8_t, char, std::mbstate_t>));
anywhere before you construct the basic_ifstream
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With