I'm writing a large file with node.js using a writable stream:
var fs = require('fs'); var stream = fs.createWriteStream('someFile.txt', { flags : 'w' }); var lines; while (lines = getLines()) { for (var i = 0; i < lines.length; i++) { stream.write( lines[i] ); } }
I'm wondering if this scheme is safe without using drain
event? If it is not (which I think is the case), what is the pattern for writing an arbitrary large data to a file?
It is suitable for large enterprise projects that do complex and complicated computations and data processing. The comparison in terms of development time between Node. js and Java is that, Node. js is easier to learn than Java, leading to faster development when using Node.
To do that we create a request of type stream using axios. In case the requested file has the . zip extension, we are going to pipe the response through unzip, otherwise we write the stream to the disk using the node. js native fs.
js receives a CPU bound task: Whenever a heavy request comes to the event loop, Node. js would set all the CPU available to process it first, and then answer other requests queued. That results in slow processing and overall delay in the event loop, which is why Node. js is not recommended for heavy computation.
256 MB is sufficient amount of RAM to run Node. js (e.g. on Linux VPS instance), assuming no other memory-hog software is run.
That's how I finally did it. The idea behind is to create readable stream implementing ReadStream interface and then use pipe()
method to pipe data to writable stream.
var fs = require('fs'); var writeStream = fs.createWriteStream('someFile.txt', { flags : 'w' }); var readStream = new MyReadStream(); readStream.pipe(writeStream); writeStream.on('close', function () { console.log('All done!'); });
The example of MyReadStream
class can be taken from mongoose QueryStream.
The idea behind drain is that you would use it to test here:
var fs = require('fs'); var stream = fs.createWriteStream('someFile.txt', {flags: 'w'}); var lines; while (lines = getLines()) { for (var i = 0; i < lines.length; i++) { stream.write(lines[i]); //<-- the place to test } }
which you're not. So you would need to rearchitect to make it "reentrant".
var fs = require('fs'); var stream = fs.createWriteStream('someFile.txt', {flags: 'w'}); var lines; while (lines = getLines()) { for (var i = 0; i < lines.length; i++) { var written = stream.write(lines[i]); //<-- the place to test if (!written){ //do something here to wait till you can safely write again //this means prepare a buffer and wait till you can come back to finish // lines[i] -> remainder } } }
However, does this mean that you need to keep buffering getLines as well while you wait?
var fs = require('fs'); var stream = fs.createWriteStream('someFile.txt', {flags: 'w'}); var lines, buffer = { remainingLines = [] }; while (lines = getLines()) { for (var i = 0; i < lines.length; i++) { var written = stream.write(lines[i]); //<-- the place to test if (!written){ //do something here to wait till you can safely write again //this means prepare a buffer and wait till you can come back to finish // lines[i] -> remainder buffer.remainingLines = lines.slice(i); break; //notice there's no way to re-run this once we leave here. } } } stream.on('drain',function(){ if (buffer.remainingLines.length){ for (var i = 0; i < buffer.remainingLines.length; i++) { var written = stream.write(buffer.remainingLines[i]); //<-- the place to test if (!written){ //do something here to wait till you can safely write again //this means prepare a buffer and wait till you can come back to finish // lines[i] -> remainder buffer.remainingLines = lines.slice(i); } } } });
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With