Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Can I decompress and deserialize a file using streams?

My application serializes an object using Json.Net, compresses the resulting JSON, then saves this to file. Additionally the application can load an object from one of these files. These objects can be tens of Mb in size and I'm concerned about memory usage, due to the way the existing code creates large strings and byte arrays:-

public void Save(MyClass myObject, string filename)
{
    var json = JsonConvert.SerializeObject(myObject);
    var bytes = Compress(json);
    File.WriteAllBytes(filename, bytes);
}

public MyClass Load(string filename)
{    
    var bytes = File.ReadAllBytes(filename);
    var json = Decompress(bytes);
    var myObject = JsonConvert.DeserializeObject<MyClass>(json);
}

private static byte[] Compress(string s)
{
    var bytes = Encoding.Unicode.GetBytes(s);

    using (var ms = new MemoryStream())
    {
        using (var gs = new GZipStream(ms, CompressionMode.Compress))
        {
            gs.Write(bytes, 0, bytes.Length);
            gs.Close();
            return ms.ToArray();
        }
    }
}

private static string Decompress(byte[] bytes)
{
    using (var msi = new MemoryStream(bytes))
    {
        using (var mso = new MemoryStream())
        {
            using (var gs = new GZipStream(msi, CompressionMode.Decompress))
            {
                gs.CopyTo(mso);
                return Encoding.Unicode.GetString(mso.ToArray());
            }
        }
    } 
}

I was wondering if the Save/Load methods could be replaced with streams? I've found examples of using streams with Json.Net but am struggling to get my head around how to fit in the additional compression stuff.

like image 842
Andrew Stephens Avatar asked Oct 05 '15 08:10

Andrew Stephens


2 Answers

JsonSerializer has methods to serialize from a JsonTextReader and to a StreamWriter, both of which can be created on top of any sort of stream, including a GZipStream. Using them, you can create the following extension methods:

public static partial class JsonExtensions
{
    // Buffer sized as recommended by Bradley Grainger, https://faithlife.codes/blog/2012/06/always-wrap-gzipstream-with-bufferedstream/
    // But anything smaller than 85,000 bytes should be OK, since objects larger than that go on the large object heap.  See:
    // https://docs.microsoft.com/en-us/dotnet/standard/garbage-collection/large-object-heap
    const int BufferSize = 8192;
    // Disable writing of BOM as per https://datatracker.ietf.org/doc/html/rfc8259#section-8.1
    static readonly Encoding DefaultEncoding = new UTF8Encoding(false);

    public static void SerializeToFileCompressed(object value, string path, JsonSerializerSettings settings = null)
    {
        using (var fs = new FileStream(path, FileMode.Create, FileAccess.Write, FileShare.Read))
            SerializeCompressed(value, fs, settings);
    }

    public static void SerializeCompressed(object value, Stream stream, JsonSerializerSettings settings = null)
    {
        using (var compressor = new GZipStream(stream, CompressionMode.Compress))
        using (var writer = new StreamWriter(compressor, DefaultEncoding, BufferSize))
        {
            var serializer = JsonSerializer.CreateDefault(settings);
            serializer.Serialize(writer, value);
        }
    }

    public static T DeserializeFromFileCompressed<T>(string path, JsonSerializerSettings settings = null)
    {
        using (var fs = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read))
            return DeserializeCompressed<T>(fs, settings);
    }

    public static T DeserializeCompressed<T>(Stream stream, JsonSerializerSettings settings = null)
    {
        using (var compressor = new GZipStream(stream, CompressionMode.Decompress))
        using (var reader = new StreamReader(compressor))
        using (var jsonReader = new JsonTextReader(reader))
        {
            var serializer = JsonSerializer.CreateDefault(settings);
            return serializer.Deserialize<T>(jsonReader);
        }
    }
}

See Performance Tips: Optimize Memory Usage in the Json.NET documentation.

like image 192
dbc Avatar answered Sep 27 '22 21:09

dbc


For those looking for an idea how to use the extensions from @dbc in uwp apps, I modified the code to this - where the StorageFile is a file you have access to write to.

public static async void SerializeToFileCompressedAsync(object value, StorageFile file, JsonSerializerSettings settings = null)
{
    using (var stream = await file.OpenStreamForWriteAsync())
        SerializeCompressed(value, stream, settings);
}

public static void SerializeCompressed(object value, Stream stream, JsonSerializerSettings settings = null)
{
    using (var compressor = new GZipStream(stream, CompressionMode.Compress))
    using (var writer = new StreamWriter(compressor))
    {
        var serializer = JsonSerializer.CreateDefault(settings);
        serializer.Serialize(writer, value);
    }
}

public static async Task<T> DeserializeFromFileCompressedAsync<T>(StorageFile file, JsonSerializerSettings settings = null)
{
    using (var stream = await file.OpenStreamForReadAsync())
        return DeserializeCompressed<T>(stream, settings);
}

public static T DeserializeCompressed<T>(Stream stream, JsonSerializerSettings settings = null)
{
    using (var compressor = new GZipStream(stream, CompressionMode.Decompress))
    using (var reader = new StreamReader(compressor))
    using (var jsonReader = new JsonTextReader(reader))
    {
        var serializer = JsonSerializer.CreateDefault(settings);
        return serializer.Deserialize<T>(jsonReader);
    }
}
like image 43
Mutley Avatar answered Sep 27 '22 21:09

Mutley