Comparing reading files using the following three techniques:
<stdio.h>
FILE*
CreateFile()
/ReadFile()
I noted that #1 is faster than #2, and #3 is the fastest.
e.g. Sorted from the fastest to the slowest, for processing a 900MB test file, I got these results:
Win32 memory mapping: 821.308 ms
C file (FILE*): 1779.83 ms
Win32 file (CreateFile): 3649.67 ms
Why is the C <stdio.h>
technique faster than Win32 ReadFile()
access? I'd expect raw Win32 APIs to have less overhead than CRT. What am I missing here?
Compilable test C++ source code follows.
EDIT
I repeated the tests with 4KB read buffers and using three different files (with the same content) to avoid caching effects that could distort performance measurements, and now the results are as expected.
For example, for a file of circa 400 MB the results are:
Win32 memory mapping: 305.908 ms
Win32 file (CreateFile): 451.402 ms
C file (FILE*): 460.579 ms
////////////////////////////////////////////////////////////////////////////////
// Test file reading using C FILE*, Win32 CreateFile and Win32 memory mapping.
////////////////////////////////////////////////////////////////////////////////
#include <stdio.h>
#include <stdlib.h>
#include <algorithm>
#include <exception>
#include <iostream>
#include <stdexcept>
#include <vector>
#include <Windows.h>
//------------------------------------------------------------------------
// Performance (speed) measurement
//------------------------------------------------------------------------
long long counter()
{
LARGE_INTEGER li;
QueryPerformanceCounter(&li);
return li.QuadPart;
}
long long frequency()
{
LARGE_INTEGER li;
QueryPerformanceFrequency(&li);
return li.QuadPart;
}
void print_time(const long long start, const long long finish,
const char * const s)
{
std::cout << s << ": " << (finish - start) * 1000.0 / frequency() << " ms\n";
}
//------------------------------------------------------------------------
// RAII handle wrappers
//------------------------------------------------------------------------
struct c_file_traits
{
typedef FILE* type;
static FILE* invalid_value()
{
return nullptr;
}
static void close(FILE* f)
{
fclose(f);
}
};
struct win32_file_traits
{
typedef HANDLE type;
static HANDLE invalid_value()
{
return INVALID_HANDLE_VALUE;
}
static void close(HANDLE h)
{
CloseHandle(h);
}
};
struct win32_handle_traits
{
typedef HANDLE type;
static HANDLE invalid_value()
{
return nullptr;
}
static void close(HANDLE h)
{
CloseHandle(h);
}
};
template <typename Traits>
class handle
{
public:
typedef typename Traits::type type;
handle()
: _h(Traits::invalid_value())
{
}
explicit handle(type h)
: _h(h)
{
}
~handle()
{
close();
}
bool valid() const
{
return (_h != Traits::invalid_value());
}
type get() const
{
return _h;
}
void close()
{
if (valid())
Traits::close(_h);
_h = Traits::invalid_value();
}
void reset(type h)
{
if (h != _h)
{
close();
_h = h;
}
}
private: // Ban copy
handle(const handle&);
handle& operator=(const handle&);
private:
type _h; // wrapped raw handle
};
typedef handle<c_file_traits> c_file_handle;
typedef handle<win32_file_traits> win32_file_handle;
typedef handle<win32_handle_traits> win32_handle;
//------------------------------------------------------------------------
// File reading tests using various techniques
//------------------------------------------------------------------------
unsigned long long count_char_using_c_file(const std::string& filename, const char ch)
{
unsigned long long char_count = 0;
#pragma warning(push)
#pragma warning(disable: 4996) // fopen use is OK
c_file_handle file(fopen(filename.c_str(), "rb"));
#pragma warning(pop)
if (!file.valid())
throw std::runtime_error("Can't open file.");
std::vector<char> read_buffer(4*1024); // 4 KB
bool has_more_data = true;
while (has_more_data)
{
size_t read_count = fread(read_buffer.data(), 1, read_buffer.size(), file.get());
for (size_t i = 0; i < read_count; i++)
{
if (read_buffer[i] == ch)
char_count++;
}
if (read_count < read_buffer.size())
has_more_data = false;
}
return char_count;
}
unsigned long long count_char_using_win32_file(const std::string& filename, const char ch)
{
unsigned long long char_count = 0;
win32_file_handle file(::CreateFileA(
filename.c_str(),
GENERIC_READ,
FILE_SHARE_READ,
nullptr,
OPEN_EXISTING,
FILE_FLAG_SEQUENTIAL_SCAN,
nullptr
)
);
if (!file.valid())
throw std::runtime_error("Can't open file.");
std::vector<char> read_buffer(4*1024); // 4 KB
bool has_more_data = true;
while (has_more_data)
{
DWORD read_count = 0;
if (!ReadFile(file.get(), read_buffer.data(), read_buffer.size(), &read_count, nullptr))
throw std::runtime_error("File read error using ReadFile().");
for (size_t i = 0; i < read_count; i++)
{
if (read_buffer[i] == ch)
char_count++;
}
if (read_count < sizeof(read_buffer))
has_more_data = false;
}
return char_count;
}
// Memory-map a file.
class file_map
{
public:
explicit file_map(const std::string& filename)
: _view(nullptr), _length(0)
{
_file.reset(::CreateFileA(
filename.c_str(),
GENERIC_READ,
FILE_SHARE_READ,
nullptr,
OPEN_EXISTING,
FILE_ATTRIBUTE_NORMAL,
nullptr));
if (!_file.valid())
return;
LARGE_INTEGER file_size;
if (!GetFileSizeEx(_file.get(), &file_size))
return;
if (file_size.QuadPart == 0)
return;
_mapping.reset(::CreateFileMapping(
_file.get(), nullptr,
PAGE_READONLY,
0,
0,
nullptr)
);
if (!_mapping.valid())
return;
_view = reinterpret_cast<char*>
(::MapViewOfFile(_mapping.get(), FILE_MAP_READ, 0, 0, 0));
if (!_view)
return;
_length = file_size.QuadPart;
}
~file_map()
{
if (_view)
UnmapViewOfFile(_view);
}
bool valid() const
{
return (_view != nullptr);
}
const char * begin() const
{
return _view;
}
const char * end() const
{
return begin() + length();
}
unsigned long long length() const
{
return _length;
}
private: // ban copy
file_map(const file_map&);
file_map& operator=(const file_map&);
private:
win32_file_handle _file;
win32_handle _mapping;
char* _view;
unsigned long long _length; // in bytes
};
unsigned long long count_char_using_memory_mapping(const std::string& filename, const char ch)
{
unsigned long long char_count = 0;
file_map view(filename);
if (!view.valid())
throw std::runtime_error("Can't create memory-mapping of file.");
for (auto it = view.begin(); it != view.end(); ++it)
{
if (*it == ch)
{
char_count++;
}
}
return char_count;
}
template <typename TestFunc>
void run_test(const char * message, TestFunc test, const std::string& filename, const char ch)
{
const long long start = counter();
const unsigned long long char_count = test(filename, ch);
const long long finish = counter();
print_time(start, finish, message);
std::cout << "Count of \'" << ch << "\' : " << char_count << "\n\n";
}
int main(int argc, char* argv[])
{
static const int kExitOk = 0;
static const int kExitError = 1;
if (argc != 3)
{
std::cerr << argv[0] << " <char> <filename>.\n";
std::cerr << "Counts occurrences of ASCII character <char>\n";
std::cerr << "in the <filename> file.\n\n";
return kExitError;
}
const char ch = *(argv[1]);
const std::string filename = argv[2];
try
{
// Execute tests on THREE different files with the same content,
// to avoid caching effects.
// (file names have incremental number suffix).
run_test("C <stdio.h> file (FILE*)", count_char_using_c_file, filename + "1", ch);
run_test("Win32 file (CreateFile)", count_char_using_win32_file, filename + "2", ch);
run_test("Win32 memory mapping", count_char_using_memory_mapping, filename + "3", ch);
return kExitOk;
}
catch (const std::exception& e)
{
std::cerr << "\n*** ERROR: " << e.what() << '\n';
return kExitError;
}
}
////////////////////////////////////////////////////////////////////////////////
fread mmap s the file. This takes some time, and will map the whole file. This means subsequent "read-ins" will be faster. read.
Not only was fread() almost 2.5 times faster than readr's functionality in reading and binding the data, but perhaps even more importantly, the maximum used memory was only 15.25 GB, compared to readr's 27 GB.
The fread() function returns the number of full items successfully read, which can be less than count if an error occurs, or if the end-of-file is met before reaching count. If size or count is 0, the fread() function returns zero, and the contents of the array and the state of the stream remain unchanged.
It makes a direct system call on UNIX. fread() is part of the C library, and provides buffered reads. It is usually implemented by calling read() in order to fill its buffer.
Just ran some tests on my machine that shows that increasing the buffer size actually increases performance:
C <stdio.h> file (FILE*): 1431.93 ms
Bufsize: 0
Count of 'x' : 3161882
Win32 file (CreateFile): 2289.45 ms
Bufsize: 1024
Count of 'x' : 3161882
Win32 file (CreateFile): 1714.5 ms
Bufsize: 2048
Count of 'x' : 3161882
Win32 file (CreateFile): 1479.16 ms
Bufsize: 4096
Count of 'x' : 3161882
Win32 file (CreateFile): 1328.25 ms
Bufsize: 8192
Count of 'x' : 3161882
Win32 file (CreateFile): 1256.1 ms
Bufsize: 16384
Count of 'x' : 3161882
Win32 file (CreateFile): 1223.54 ms
Bufsize: 32768
Count of 'x' : 3161882
Win32 file (CreateFile): 1224.84 ms
Bufsize: 65536
Count of 'x' : 3161882
Win32 file (CreateFile): 1212.4 ms
Bufsize: 131072
Count of 'x' : 3161882
Win32 file (CreateFile): 1238.09 ms
Bufsize: 262144
Count of 'x' : 3161882
Win32 file (CreateFile): 1209.2 ms
Bufsize: 524288
Count of 'x' : 3161882
Win32 file (CreateFile): 1223.67 ms
Bufsize: 1048576
Count of 'x' : 3161882
Win32 file (CreateFile): 1349.98 ms
Bufsize: 2097152
Count of 'x' : 3161882
Win32 memory mapping: 796.281 ms
Bufsize: 0
Count of 'x' : 3161882
Some steps in the Visual Studio 2012 debugger reveals that the buffer size of the FILE* method is 4096 bytes, at least on my machine. (And as others have already said it calls ReadFile
too unless you are reading from a console.)
It is also interesting that big buffers marginally slow down the performance. Moving the new
operator outside the test doesn't solve the issue either.
First the memory-mapped test was pretty slow for me because I ran it in Debug mode. I've updated all the results with the Release mode compilation. Memory mapping became the first.
The fastest disk access I've ever attained was using ReadFile
. But I specifically opened the file with flags to meet my disk access and caching requirements. If you just use it verbatim, the comparison is a little bit lame.
You should read up more on the function, as well as CreateFile
. You'll find you can read data in (multiples of) sector-sized blocks to sector-aligned memory. Then you'll out-perform fread
.
As others have said, fread
is doing its own buffering. Your buffering implementation with ReadFile
still needs work.
Check out MSDN. ALl the information is there. Specifically, here:
File buffering
Caching behaviour
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With