I am trying to access the first few lines of text files using the FileApi
in JavaScript.
In order to do so, I slice an arbitrary number of bytes from the beginning of the file and hand the blob over to the FileReader
.
For large files this takes very long, even though, my understanding currently is that only the first few bytes of the file need to be accessed.
I currently have tested in both Chrome and Edge (chromium).
Analysis in Chrome using the performance dev tools shows a lot of idle time before the reader.onloadend
and no increase in ram usage. This might be however, because the FileApi
is implemented in the Browser itself and does not reflect in the JavaScript performance statistics.
My implementation of the FileReader looks something like this:
const reader = new FileReader();
reader.onloadend = (evt) => {
if (evt.target.readyState == FileReader.DONE) {
console.log(evt.target.result.toString());
}
};
// Slice first 10240 bytes of the file
var blob = files.item(0).slice(0, 1024 * 10);
// Start reading the sliced blob
reader.readAsBinaryString(blob);
This works fine but as described performs quite underwhelmingly for large files. I tried it for 10kb, 100mb and 6gb. The time until the first 10kb are logged seems to correlate directly to the file size.
Any suggestions on how to improve performance for reading the beginning of a file?
Edit: Using Response and DOM streams as suggested by @BenjaminGruenbaum does sadly not improve the read performance.
var dest = newWritableStream({
write(str) {
console.log(str);
},
});
var blob = files.item(0).slice(0, 1024 * 10);
(blob.stream ? blob.stream() : newResponse(blob).body)
// Decode the binary-encoded response to string
.pipeThrough(newTextDecoderStream())
.pipeTo(dest)
.then(() => {
console.log('done');
});
Stream the uploaded File, rather then read the entire content.
Following code snippet prints the first 3 lines, of the uploaded text file. It extracted these lines in a streaming manner.
The performance gain achieved is derived from the fact it does only upload and process the portion of the text file. After the first lines have been received, the stream is closed.
async function lineReader(blob, lineCallback) {
const decoder = new TextDecoder("utf-8");
const stream = blob.stream();
// Stream the file content instead of reading the entire file
const reader = stream.pipeThrough(new TextDecoderStream()).getReader();
try {
let buffer = ""; // Buffer to hold incomplete lines
do {
const {done, value} = await reader.read();
if (done) break; // Exit when the file has been fully read
// Decode the current chunk and append it to the buffer
buffer += value;
// Process lines in the buffer
let lines = buffer.split(/\r\n|\n/);
buffer = lines.pop(); // Save the last line for the next chunk
for (const line of lines) {
if (!lineCallback(line)) return;
}
} while (true);
// Process any remaining text in the buffer
if (buffer) {
lineCallback(buffer);
}
} finally {
reader.releaseLock();
}
}
function printFirstLines(file, nrOfLines) {
const output = document.getElementById("output");
let lineCount = 0;
return lineReader(file, line => {
output.textContent += `Line #${lineCount}: ${line}\n`;
++lineCount;
return lineCount < nrOfLines;
});
}
// Event listener for file input
document.getElementById("fileInput").addEventListener("change", (event) => {
console.log('Start processing...');
const t0 = Date.now();
const file = event.target.files[0];
if (file) {
if (file.stream) {
printFirstLines(file, 3).then(() => {
const t1 = Date.now();
console.log(`Completed in ${t1 - t0} ms.`);
}, err => {
console.error(err.message);
}); // Print first 3 lines
} else {
// You can fall back on the slower blob.body here
alert("Your browser lacks Blob.stream() support");
}
} else {
alert("No file selected");
}
}
);
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Test streaming text reade</title>
</head>
<body>
<h1>Test streaming text reader</h1>
<input type="file" id="fileInput" />
<pre id="output"></pre>
</body>
</html>
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With