In the browser, I read in a file using the JS FileReader().readAsBinaryString(). Using the CryptoJS library I can MD5 hash the data.
This works fine but I do not know how to handle large files. E.g. Just reading a 2GiB file crashes the browser window. I can slice blobs from the file data and hash that as I go but wouldn't this prevent anyone else from verifying the same hash without following the same steps as me?
Is there a way to get the md5 hash of a large file in this circumstance? How would you calc the md5 hash of a 1TB file, for example? Do I need to read the file in as a stream?
First time cutting my teeth on this one and I'm not sure how to do it.
This resides in an angular directive, hence the scope.
var reader = new FileReader();
reader.onload = function (loadEvent) {
scope.$apply(function () {
scope.files = changeEvent.target.files;
scope.fileread = loadEvent.target.result;
scope.md5Data = CryptoJS.MD5(scope.fileread).toString();
});
}
// First ten megs of the file
reader.readAsBinaryString((changeEvent.target.files[0]).slice(0, 10 * 1024 * 1024));
Generally, two files can have the same md5 hash only if their contents are exactly the same. Even a single bit of variation will generate a completely different hash value. There is one caveat, though: An md5 sum is 128 bits (16 bytes).
It generally takes 3-4 hours to transfer via NC and then 40 minutes to get the md5sum. The security of the hash is not an issue in this case.
MD5 time depends of the amount of data that is being MD5'ed. To MD5 a small document it's an instant procedure, to do it for a movie, you actually have to read the whole file which takes a lot more. It would be helpful if you provide some context.
md5sum is used to verify the integrity of files, as virtually any change to a file will cause its MD5 hash to change. Most commonly, md5sum is used to verify that a file has not changed as a result of a faulty file transfer, a disk error or non-malicious meddling.
spark-md5
and Q
Since none of the other answers provided a full snippet, here's how you would calculage the MD5 Hash of a large file
function calculateMD5Hash(file, bufferSize) {
var def = Q.defer();
var fileReader = new FileReader();
var fileSlicer = File.prototype.slice || File.prototype.mozSlice || File.prototype.webkitSlice;
var hashAlgorithm = new SparkMD5();
var totalParts = Math.ceil(file.size / bufferSize);
var currentPart = 0;
var startTime = new Date().getTime();
fileReader.onload = function(e) {
currentPart += 1;
def.notify({
currentPart: currentPart,
totalParts: totalParts
});
var buffer = e.target.result;
hashAlgorithm.appendBinary(buffer);
if (currentPart < totalParts) {
processNextPart();
return;
}
def.resolve({
hashResult: hashAlgorithm.end(),
duration: new Date().getTime() - startTime
});
};
fileReader.onerror = function(e) {
def.reject(e);
};
function processNextPart() {
var start = currentPart * bufferSize;
var end = Math.min(start + bufferSize, file.size);
fileReader.readAsBinaryString(fileSlicer.call(file, start, end));
}
processNextPart();
return def.promise;
}
function calculate() {
var input = document.getElementById('file');
if (!input.files.length) {
return;
}
var file = input.files[0];
var bufferSize = Math.pow(1024, 2) * 10; // 10MB
calculateMD5Hash(file, bufferSize).then(
function(result) {
// Success
console.log(result);
},
function(err) {
// There was an error,
},
function(progress) {
// We get notified of the progress as it is executed
console.log(progress.currentPart, 'of', progress.totalParts, 'Total bytes:', progress.currentPart * bufferSize, 'of', progress.totalParts * bufferSize);
});
}
<script src="https://cdnjs.cloudflare.com/ajax/libs/q.js/1.4.1/q.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/spark-md5/2.0.2/spark-md5.min.js"></script>
<div>
<input type="file" id="file"/>
<input type="button" onclick="calculate();" value="Calculate" class="btn primary" />
</div>
use SparkMD5 https://github.com/satazor/SparkMD5
var spark = new SparkMD5();
spark.append('Hi');
spark.append('there');
var hexHash = spark.end();
and it has a file-slice example
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With