I want to read single bytes as fast as possible from a file into a D2 application. The application need byte per byte, so reading larger blocks of data is not an option for the interface to the reader.
For this I created some trivial implementations in C++, Java, D2 at: https://github.com/gizmomogwai/performance.
As you can see I tried plain reads, buffers in the application code and memory mapped files. For my usecase the memory mapped solution worked best, but the strange thing is that D2 is slower than java. I would have hoped for D2 to land between C++ and Java (C++ code is compiled with -O3 -g, D2 code is compiled with -O -release).
So please tell me what I am doing wrong here and how to speed up the D2 implementation.
To give you an idea of the use case here is a C++ implementation:
class StdioFileReader {
private:
FILE* fFile;
static const size_t BUFFER_SIZE = 1024;
unsigned char fBuffer[BUFFER_SIZE];
unsigned char* fBufferPtr;
unsigned char* fBufferEnd;
public:
StdioFileReader(std::string s) : fFile(fopen(s.c_str(), "rb")), fBufferPtr(fBuffer), fBufferEnd(fBuffer) {
assert(fFile);
}
~StdioFileReader() {
fclose(fFile);
}
int read() {
bool finished = fBufferPtr == fBufferEnd;
if (finished) {
finished = fillBuffer();
if (finished) {
return -1;
}
}
return *fBufferPtr++;
}
private:
bool fillBuffer() {
size_t l = fread(fBuffer, 1, BUFFER_SIZE, fFile);
fBufferPtr = fBuffer;
fBufferEnd = fBufferPtr+l;
return l == 0;
}
};
size_t readBytes() {
size_t res = 0;
for (int i=0; i<10; i++) {
StdioFileReader r("/tmp/shop_with_ids.pb");
int read = r.read();
while (read != -1) {
++res;
read = r.read();
}
}
return res;
}
which is much faster compared to the "same" solution in D:
struct FileReader {
private FILE* fFile;
private static const BUFFER_SIZE = 8192;
private ubyte fBuffer[BUFFER_SIZE];
private ubyte* fBufferPtr;
private ubyte* fBufferEnd;
public this(string fn) {
fFile = std.c.stdio.fopen("/tmp/shop_with_ids.pb", "rb");
fBufferPtr = fBuffer.ptr;
fBufferEnd = fBuffer.ptr;
}
public int read(ubyte* targetBuffer) {
auto finished = fBufferPtr == fBufferEnd;
if (finished) {
finished = fillBuffer();
if (finished) {
return 0;
}
}
*targetBuffer = *fBufferPtr++;
return 1;
}
private bool fillBuffer() {
fBufferPtr = fBuffer.ptr;
auto l = std.c.stdio.fread(fBufferPtr, 1, BUFFER_SIZE, fFile);
fBufferEnd = fBufferPtr + l;
return l == 0;
}
}
size_t readBytes() {
size_t count = 0;
for (int i=0; i<10; i++) {
auto reader = FileReader("/tmp/shop_with_ids.pb");
ubyte buffer[1];
ubyte* p = buffer.ptr;
auto c = reader.read(p);
while (1 == c) {
++count;
c = reader.read(p);
}
}
return count;
}
It's very likely because of sfread
. No one guarantees it to be doing the same thing in D as in C -- you're very likely using a different CRT altogether (unless you're using the Digital Mars C++ compiler?).
That means the library could be doing things like synchronization, etc. which slow things down. The only way you can know is to force D to use the same library as C, by telling the linker to link to the same libraries.
Until you can do that, you're comparing apples to oranges. If that's not possible, then call the OS directly from both, and then compare the results -- that way you're guaranteed that the underlying call is the same for both.
what happens if you use the std.stdio
module:
import std.stdio;
struct FileReader {
private File fFile;
private enum BUFFER_SIZE = 8192;//why not enum?
private ubyte[BUFFER_SIZE] fBuffer=void;//avoid (costly) initialization to 0
private ubyte[] buff;
public this(string fn) {
fFile = File("/tmp/shop_with_ids.pb", "rb");
}
/+
public ~this(){//you really should have been doing this if you used std.c.stdio.fopen
//but it's unnecessary for std.stdio's File (it's ref counted)
fFile.close();
}
+/
public int read(out ubyte targetBuffer) {
auto finished = buff.length==0;
if (finished) {
finished = fillBuffer();
if (finished) {
return 0;
}
}
targetBuffer = buff[0];
buff = buff[1..$];
return 1;
}
private bool fillBuffer() {
if(!fFile.isOpen())return false;
buff = fFile.rawRead(fBuffer[]);
return buff.length>0;
}
}
size_t readBytes() {
size_t count = 0;
for (int i=0; i<10; i++) {
auto reader = FileReader("/tmp/shop_with_ids.pb");
ubyte buffer;
auto c = reader.read(buffer);
while (1 == c) {
++count;
c = reader.read(buffer);
}
}
return count;
}
if you want true speed comparison you should compile with -release -O -inline (this turns off debugging (mostly array OOB checks) optimizes and inlines what it can) (and of course similar with the c++ solution as well)
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With