I have a web app that needs to upload large files to Azure BLOB storage. My solution uses HTML5 File API to slice into chunks which are then put as blob blocks, the IDs of the blocks are stored in an array and then the blocks are committed as a blob.
The solution works fine in IE. On 64 bit Chrome I have successfully uploaded 4Gb files but see very heavy memory usage (2Gb+). On 32 bit Chrome the specific chrome process will get to around 500-550Mb and then crash.
I can't see any obvious memory leaks or things I can change to help garbage collection. I store the block IDs in an array so obviously there will be some memory creeep but this shouldn't be massive. It's almost as if the File API is holding the whole file it slices into memory.
It's written as an Angular service called from a controller, I think just the service code is pertinent:
(function() {
'use strict';
angular
.module('app.core')
.factory('blobUploadService',
[
'$http', 'stringUtilities',
blobUploadService
]);
function blobUploadService($http, stringUtilities) {
var defaultBlockSize = 1024 * 1024; // Default to 1024KB
var stopWatch = {};
var state = {};
var initializeState = function(config) {
var blockSize = defaultBlockSize;
if (config.blockSize) blockSize = config.blockSize;
var maxBlockSize = blockSize;
var numberOfBlocks = 1;
var file = config.file;
var fileSize = file.size;
if (fileSize < blockSize) {
maxBlockSize = fileSize;
}
if (fileSize % maxBlockSize === 0) {
numberOfBlocks = fileSize / maxBlockSize;
} else {
numberOfBlocks = parseInt(fileSize / maxBlockSize, 10) + 1;
}
return {
maxBlockSize: maxBlockSize,
numberOfBlocks: numberOfBlocks,
totalBytesRemaining: fileSize,
currentFilePointer: 0,
blockIds: new Array(),
blockIdPrefix: 'block-',
bytesUploaded: 0,
submitUri: null,
file: file,
baseUrl: config.baseUrl,
sasToken: config.sasToken,
fileUrl: config.baseUrl + config.sasToken,
progress: config.progress,
complete: config.complete,
error: config.error,
cancelled: false
};
};
/* config: {
baseUrl: // baseUrl for blob file uri (i.e. http://<accountName>.blob.core.windows.net/<container>/<blobname>),
sasToken: // Shared access signature querystring key/value prefixed with ?,
file: // File object using the HTML5 File API,
progress: // progress callback function,
complete: // complete callback function,
error: // error callback function,
blockSize: // Use this to override the defaultBlockSize
} */
var upload = function(config) {
state = initializeState(config);
var reader = new FileReader();
reader.onloadend = function(evt) {
if (evt.target.readyState === FileReader.DONE && !state.cancelled) { // DONE === 2
var uri = state.fileUrl + '&comp=block&blockid=' + state.blockIds[state.blockIds.length - 1];
var requestData = new Uint8Array(evt.target.result);
$http.put(uri,
requestData,
{
headers: {
'x-ms-blob-type': 'BlockBlob',
'Content-Type': state.file.type
},
transformRequest: []
})
.success(function(data, status, headers, config) {
state.bytesUploaded += requestData.length;
var percentComplete = ((parseFloat(state.bytesUploaded) / parseFloat(state.file.size)) * 100
).toFixed(2);
if (state.progress) state.progress(percentComplete, data, status, headers, config);
uploadFileInBlocks(reader, state);
})
.error(function(data, status, headers, config) {
if (state.error) state.error(data, status, headers, config);
});
}
};
uploadFileInBlocks(reader, state);
return {
cancel: function() {
state.cancelled = true;
}
};
};
function cancel() {
stopWatch = {};
state.cancelled = true;
return true;
}
function startStopWatch(handle) {
if (stopWatch[handle] === undefined) {
stopWatch[handle] = {};
stopWatch[handle].start = Date.now();
}
}
function stopStopWatch(handle) {
stopWatch[handle].stop = Date.now();
var duration = stopWatch[handle].stop - stopWatch[handle].start;
delete stopWatch[handle];
return duration;
}
var commitBlockList = function(state) {
var uri = state.fileUrl + '&comp=blocklist';
var requestBody = '<?xml version="1.0" encoding="utf-8"?><BlockList>';
for (var i = 0; i < state.blockIds.length; i++) {
requestBody += '<Latest>' + state.blockIds[i] + '</Latest>';
}
requestBody += '</BlockList>';
$http.put(uri,
requestBody,
{
headers: {
'x-ms-blob-content-type': state.file.type
}
})
.success(function(data, status, headers, config) {
if (state.complete) state.complete(data, status, headers, config);
})
.error(function(data, status, headers, config) {
if (state.error) state.error(data, status, headers, config);
// called asynchronously if an error occurs
// or server returns response with an error status.
});
};
var uploadFileInBlocks = function(reader, state) {
if (!state.cancelled) {
if (state.totalBytesRemaining > 0) {
var fileContent = state.file.slice(state.currentFilePointer,
state.currentFilePointer + state.maxBlockSize);
var blockId = state.blockIdPrefix + stringUtilities.pad(state.blockIds.length, 6);
state.blockIds.push(btoa(blockId));
reader.readAsArrayBuffer(fileContent);
state.currentFilePointer += state.maxBlockSize;
state.totalBytesRemaining -= state.maxBlockSize;
if (state.totalBytesRemaining < state.maxBlockSize) {
state.maxBlockSize = state.totalBytesRemaining;
}
} else {
commitBlockList(state);
}
}
};
return {
upload: upload,
cancel: cancel,
startStopWatch: startStopWatch,
stopStopWatch: stopStopWatch
};
};
})();
Are there any ways I can move the scope of objects to help with Chrome GC? I have seen other people mentioning similar issues but understood Chromium had resolved some.
I should say my solution is heavily based on Gaurav Mantri's blog post here:
http://gauravmantri.com/2013/02/16/uploading-large-files-in-windows-azure-blob-storage-using-shared-access-signature-html-and-javascript/#comment-47480
I can't see any obvious memory leaks or things I can change to help garbage collection. I store the block IDs in an array so obviously there will be some memory creeep but this shouldn't be massive. It's almost as if the File API is holding the whole file it slices into memory.
You are correct. The new Blob
s created by .slice()
are being held in memory.
The solution is to call Blob.prototype.close()
on the Blob
reference when processing Blob
or File
object is complete.
Note also, at javascript
at Question also creates a new instance of FileReader
if upload
function is called more than once.
4.3.1. The slice method
The
slice()
method returns a newBlob
object with bytes ranging from the optionalstart
parameter up to but not including the optionalend
parameter, and with atype
attribute that is the value of the optionalcontentType
parameter.
Blob
instances exist for the life of document
. Though Blob
should be garbage collected once removed from Blob URL Store
9.6. Lifetime of Blob URLs
Note: User agents are free to garbage collect resources removed from the
Blob URL Store
.
Each
Blob
must have an internal snapshot state, which must be initially set to the state of the underlying storage, if any such underlying storage exists, and must be preserved throughStructuredClone
. Further normative definition of snapshot state can be found forFile
s.
4.3.2. The close method
The
close()
method is said toclose
aBlob
, and must act as follows:
- If the
readability state
of the context object isCLOSED
, terminate this algorithm.- Otherwise, set the
readability state
of thecontext object
toCLOSED
.- If the context object has an entry in the
Blob URL Store
, remove the entry that corresponds to thecontext object
.
If Blob
object is passed to URL.createObjectURL()
, call URL.revokeObjectURL()
on Blob
or File
object, then call .close()
.
The
revokeObjectURL(url)
static methodRevokes the
Blob URL
provided in the stringurl
by removing the corresponding entry from the Blob URL Store. This method must act as follows: 1. If theurl
refers to aBlob
that has areadability state
ofCLOSED
OR if the value provided for theurl
argument is not aBlob URL
, OR if the value provided for theurl
argument does not have an entry in theBlob URL Store
, this method call does nothing. User agents may display a message on the error console. 2. Otherwise, user agents mustremove the entry
from theBlob URL Store
forurl
.
You can view the result of these calls by opening
chrome://blob-internals
reviewing details of before and after calls which create Blob
and close Blob
.
For example, from
xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
Refcount: 1
Content Type: text/plain
Type: data
Length: 3
to
xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
Refcount: 1
Content Type: text/plain
following call to .close()
. Similarly from
blob:http://example.com/c2823f75-de26-46f9-a4e5-95f57b8230bd
Uuid: 29e430a6-f093-40c2-bc70-2b6838a713bc
An alternative approach could be to send file as an ArrayBuffer
or chunks of array buffers. Then re-assemble the file at server.
Or you can call FileReader
constructor, FileReader.prototype.readAsArrayBuffer()
, and load
event of FileReader
each once.
At load
event of FileReader
pass ArrayBuffer
to Uint8Array
, use ReadableStream
, TypedArray.prototype.subarray()
, .getReader()
, .read()
to get N
chunks of ArrayBuffer
as a TypedArray
at pull
from Uint8Array
. When N
chunks equaling .byteLength
of ArrayBuffer
have been processed, pass array of Uint8Array
s to Blob
constructor to recombine file parts into single file at browser; then send Blob
to server.
<!DOCTYPE html>
<html>
<head>
</head>
<body>
<input id="file" type="file">
<br>
<progress value="0"></progress>
<br>
<output for="file"><img alt="preview"></output>
<script type="text/javascript">
const [input, output, img, progress, fr, handleError, CHUNK] = [
document.querySelector("input[type='file']")
, document.querySelector("output[for='file']")
, document.querySelector("output img")
, document.querySelector("progress")
, new FileReader
, (err) => console.log(err)
, 1024 * 1024
];
progress.addEventListener("progress", e => {
progress.value = e.detail.value;
e.detail.promise();
});
let [chunks, NEXT, CURR, url, blob] = [Array(), 0, 0];
input.onchange = () => {
NEXT = CURR = progress.value = progress.max = chunks.length = 0;
if (url) {
URL.revokeObjectURL(url);
if (blob.hasOwnProperty("close")) {
blob.close();
}
}
if (input.files.length) {
console.log(input.files[0]);
progress.max = input.files[0].size;
progress.step = progress.max / CHUNK;
fr.readAsArrayBuffer(input.files[0]);
}
}
fr.onload = () => {
const VIEW = new Uint8Array(fr.result);
const LEN = VIEW.byteLength;
const {type, name:filename} = input.files[0];
const stream = new ReadableStream({
pull(controller) {
if (NEXT < LEN) {
controller
.enqueue(VIEW.subarray(NEXT, !NEXT ? CHUNK : CHUNK + NEXT));
NEXT += CHUNK;
} else {
controller.close();
}
},
cancel(reason) {
console.log(reason);
throw new Error(reason);
}
});
const [reader, processData] = [
stream.getReader()
, ({value, done}) => {
if (done) {
return reader.closed.then(() => chunks);
}
chunks.push(value);
return new Promise(resolve => {
progress.dispatchEvent(
new CustomEvent("progress", {
detail:{
value:CURR += value.byteLength,
promise:resolve
}
})
);
})
.then(() => reader.read().then(data => processData(data)))
.catch(e => reader.cancel(e))
}
];
reader.read()
.then(data => processData(data))
.then(data => {
blob = new Blob(data, {type});
console.log("complete", data, blob);
if (/image/.test(type)) {
url = URL.createObjectURL(blob);
img.onload = () => {
img.title = filename;
input.value = "";
}
img.src = url;
} else {
input.value = "";
}
})
.catch(e => handleError(e))
}
</script>
</body>
</html>
plnkr http://plnkr.co/edit/AEZ7iQce4QaJOKut71jk?p=preview
You can also use utilize fetch()
fetch(new Request("/path/to/server/", {method:"PUT", body:blob}))
To transmit body for a request request, run these steps:
- Let body be request’s body.
If body is null, then queue a fetch task on request to process request end-of-body for request and abort these steps.
Let read be the result of reading a chunk from body’s stream.
When read is fulfilled with an object whose
done
property is false and whosevalue
property is aUint8Array
object, run these substeps:
- Let bytes be the byte sequence represented by the
Uint8Array
object.Transmit bytes.
Increase body’s transmitted bytes by bytes’s length.
Run the above step again.
When read is fulfilled with an object whose
done
property is true, queue a fetch task on request to process request end-of-body for request.When read is fulfilled with a value that matches with neither of the above patterns, or read is rejected, terminate the ongoing fetch with reason fatal.
See also
Progress indicators for fetch?
Fetch with ReadableStream
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With