I`m looking for solution for hashing large file content (files may be over 2gb in 32bit os). It there any easy solution for that? Or just reading by part and loading to buffer?
If you choose to use TransformBlock
, then you can safely ignore the last parameter and set the outputBuffer to null
. TransformBlock will copy from the input to the output array - but why would you want to simply copy bits for no good reason?
Furthermore, all mscorlib HashAlgorithms work as you might expect, i.e. the block size doesn't seem to affect the hash output; and whether you pass the data in one array and then hash in chunks by changing the inputOffset
or you hash by passing smaller, separate arrays doesn't matter. I verified this using the following code:
(this is slightly long, just here so people can verify for themselves that HashAlgorithm
implementations are sane).
public static void Main() {
RandomNumberGenerator rnd = RandomNumberGenerator.Create();
byte[] input = new byte[20];
rnd.GetBytes(input);
Console.WriteLine("Input Data: " + BytesToStr(input));
var hashAlgoTypes = Assembly.GetAssembly(typeof(HashAlgorithm)).GetTypes()
.Where(t => typeof(HashAlgorithm).IsAssignableFrom(t) && !t.IsAbstract);
foreach (var hashType in hashAlgoTypes)
new AlgoTester(hashType).AssertOkFor(input.ToArray());
}
public static string BytesToStr(byte[] bytes) {
StringBuilder str = new StringBuilder();
for (int i = 0; i < bytes.Length; i++)
str.AppendFormat("{0:X2}", bytes[i]);
return str.ToString();
}
public class AlgoTester {
readonly byte[] key;
readonly Type type;
public AlgoTester(Type type) {
this.type=type;
if (typeof(KeyedHashAlgorithm).IsAssignableFrom(type))
using(var algo = (KeyedHashAlgorithm)Activator.CreateInstance(type))
key = algo.Key.ToArray();
}
public HashAlgorithm MakeAlgo() {
HashAlgorithm algo = (HashAlgorithm)Activator.CreateInstance(type);
if (key != null)
((KeyedHashAlgorithm)algo).Key = key;
return algo;
}
public byte[] GetHash(byte[] input) {
using(HashAlgorithm sha = MakeAlgo())
return sha.ComputeHash(input);
}
public byte[] GetHashOneBlock(byte[] input) {
using(HashAlgorithm sha = MakeAlgo()) {
sha.TransformFinalBlock(input, 0, input.Length);
return sha.Hash;
}
}
public byte[] GetHashMultiBlock(byte[] input, int size) {
using(HashAlgorithm sha = MakeAlgo()) {
int offset = 0;
while (input.Length - offset >= size)
offset += sha.TransformBlock(input, offset, size, input, offset);
sha.TransformFinalBlock(input, offset, input.Length - offset);
return sha.Hash;
}
}
public byte[] GetHashMultiBlockInChunks(byte[] input, int size) {
using(HashAlgorithm sha = MakeAlgo()) {
int offset = 0;
while (input.Length - offset >= size)
offset += sha.TransformBlock(input.Skip(offset).Take(size).ToArray()
, 0, size, null, -24124512);
sha.TransformFinalBlock(input.Skip(offset).ToArray(), 0
, input.Length - offset);
return sha.Hash;
}
}
public void AssertOkFor(byte[] data) {
var direct = GetHash(data);
var indirect = GetHashOneBlock(data);
var outcomes =
new[] { 1, 2, 3, 5, 10, 11, 19, 20, 21 }.SelectMany(i =>
new[]{
new{ Hash=GetHashMultiBlock(data,i), Name="ByMSDN"+i},
new{ Hash=GetHashMultiBlockInChunks(data,i), Name="InChunks"+i}
}).Concat(new[] { new { Hash = indirect, Name = "OneBlock" } })
.Where(result => !result.Hash.SequenceEqual(direct)).ToArray();
Console.Write("Testing: " + type);
if (outcomes.Any()) {
Console.WriteLine("not OK.");
Console.WriteLine(type.Name + " direct was: " + BytesToStr(direct));
} else Console.WriteLine(" OK.");
foreach (var outcome in outcomes)
Console.WriteLine(type.Name + " differs with: " + outcome.Name + " "
+ BytesToStr(outcome.Hash));
}
}
Use TransformBlock
and TransformFinalBlock
to calculate the hash block by block, so you won't need to read the entire file into memory. (There is a nice example in the first link - and another one in this previous question).
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With