Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

javascript FileReader - parsing long file in chunks

I have long file I need to parse. Because it's very long I need to do it chunk by chunk. I tried this:

function parseFile(file){
    var chunkSize = 2000;
    var fileSize = (file.size - 1);

    var foo = function(e){
        console.log(e.target.result);
    };

    for(var i =0; i < fileSize; i += chunkSize)
    {
        (function( fil, start ) {
            var reader = new FileReader();
            var blob = fil.slice(start, chunkSize + 1);
            reader.onload = foo;
            reader.readAsText(blob);
        })( file, i );
    }
}

After running it I see only the first chunk in the console. If I change 'console.log' to jquery append to some div I see only first chunk in that div. What about other chunks? How to make it work?

like image 524
mnowotka Avatar asked Jan 21 '13 12:01

mnowotka


3 Answers

FileReader API is asynchronous so you should handle it with block calls. A for loop wouldn't do the trick since it wouldn't wait for each read to complete before reading the next chunk. Here's a working approach.

function parseFile(file, callback) {
    var fileSize   = file.size;
    var chunkSize  = 64 * 1024; // bytes
    var offset     = 0;
    var self       = this; // we need a reference to the current object
    var chunkReaderBlock = null;

    var readEventHandler = function(evt) {
        if (evt.target.error == null) {
            offset += evt.target.result.length;
            callback(evt.target.result); // callback for handling read chunk
        } else {
            console.log("Read error: " + evt.target.error);
            return;
        }
        if (offset >= fileSize) {
            console.log("Done reading file");
            return;
        }

        // of to the next chunk
        chunkReaderBlock(offset, chunkSize, file);
    }

    chunkReaderBlock = function(_offset, length, _file) {
        var r = new FileReader();
        var blob = _file.slice(_offset, length + _offset);
        r.onload = readEventHandler;
        r.readAsText(blob);
    }

    // now let's start the read with the first block
    chunkReaderBlock(offset, chunkSize, file);
}
like image 146
alediaferia Avatar answered Oct 05 '22 22:10

alediaferia


You can take advantage of Response (part of fetch) to convert most things to anything else blob, text, json and also get a ReadableStream that can help you read the blob in chunks 👍

var dest = new WritableStream({
  write (str) {
    console.log(str)
  }
})

var blob = new Blob(['bloby']);

(blob.stream ? blob.stream() : new Response(blob).body)
  // Decode the binary-encoded response to string
  .pipeThrough(new TextDecoderStream())
  .pipeTo(dest)
  .then(() => {
    console.log('done')
  })

Old answer (WritableStreams pipeTo and pipeThrough was not implemented before)

I came up with a interesting idéa that is probably very fast since it will convert the blob to a ReadableByteStreamReader probably much easier too since you don't need to handle stuff like chunk size and offset and then doing it all recursive in a loop

function streamBlob(blob) {
  const reader = new Response(blob).body.getReader()
  const pump = reader => reader.read()
  .then(({ value, done }) => {
    if (done) return
    // uint8array chunk (use TextDecoder to read as text)
    console.log(value)
    return pump(reader)
  })
  return pump(reader)
}

streamBlob(new Blob(['bloby'])).then(() => {
  console.log('done')
})
like image 26
Endless Avatar answered Oct 05 '22 23:10

Endless


The second argument of slice is actually the end byte. Your code should look something like:

 function parseFile(file){
    var chunkSize = 2000;
    var fileSize = (file.size - 1);

    var foo = function(e){
        console.log(e.target.result);
    };

    for(var i =0; i < fileSize; i += chunkSize) {
        (function( fil, start ) {
            var reader = new FileReader();
            var blob = fil.slice(start, chunkSize + start);
            reader.onload = foo;
            reader.readAsText(blob);
        })(file, i);
    }
}

Or you can use this BlobReader for easier interface:

BlobReader(blob)
.readText(function (text) {
  console.log('The text in the blob is', text);
});

More information:

  • README.md
  • Docs
like image 33
Minko Gechev Avatar answered Oct 05 '22 22:10

Minko Gechev