Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Compress an InputStream with gzip

Tags:

I would like to compress an input stream in java using Gzip compression.

Let's say we have an input stream (1GB of data..) not compressed. I want as a result a compressed inputstream from the source :

public InputStream getCompressedStream(InputStream unCompressedStream) {      // Not working because it's uncompressing the stream, I want the opposite.     return new GZIPInputStream(unCompressedStream);   } 
like image 890
Fabien Avatar asked Jun 14 '12 15:06

Fabien


2 Answers

DeflaterInputStream is not what you want because it lacks gzip header/trailer and uses a slightly different compression.

If you change from OutputStream (push) to InputStream (pull) you need to do things different.

What GzipOutputStream does is:

  • write a static gzip header
  • write a deflated stream using DeflaterOutputStream. While the stream is written, a CRC32 checksum is built from the uncompressed data and the number of bytes is count
  • write a trailer containing the CRC32 Checksum and the number of bytes.

If you want to do the same with InputStreams, you need a stream that contains:

  • the header
  • the deflated content
  • the trailer

The best way to do this is to provide 3 different streams and combine them to one. Fortunately there is SequenceInputStream that does the combining of the streams for you.

Here's my implementation plus a simple unit test:

import java.io.ByteArrayInputStream; import java.io.FileInputStream; import java.io.FilterInputStream; import java.io.IOException; import java.io.InputStream; import java.io.SequenceInputStream; import java.util.Enumeration; import java.util.zip.CRC32; import java.util.zip.Deflater; import java.util.zip.DeflaterInputStream; import java.util.zip.DeflaterOutputStream;  /**  * @author mwyraz  * Wraps an input stream and compresses it's contents. Similiar to DeflateInputStream but adds GZIP-header and trailer  * See GzipOutputStream for details.  * LICENSE: Free to use. Contains some lines from GzipOutputStream, so oracle's license might apply as well!  */ public class GzipCompressingInputStream extends SequenceInputStream {     public GzipCompressingInputStream(InputStream in) throws IOException     {         this(in,512);     }     public GzipCompressingInputStream(InputStream in, int bufferSize) throws IOException     {         super(new StatefullGzipStreamEnumerator(in,bufferSize));     }      static enum StreamState     {         HEADER,         CONTENT,         TRAILER     }      protected static class StatefullGzipStreamEnumerator implements Enumeration<InputStream>     {          protected final InputStream in;         protected final int bufferSize;         protected StreamState state;          public StatefullGzipStreamEnumerator(InputStream in, int bufferSize)         {             this.in=in;             this.bufferSize=bufferSize;             state=StreamState.HEADER;         }          public boolean hasMoreElements()         {             return state!=null;         }         public InputStream nextElement()         {             switch (state)             {                 case HEADER:                     state=StreamState.CONTENT;                     return createHeaderStream();                 case CONTENT:                     state=StreamState.TRAILER;                     return createContentStream();                 case TRAILER:                     state=null;                     return createTrailerStream();             }             return null;         }          static final int GZIP_MAGIC = 0x8b1f;         static final byte[] GZIP_HEADER=new byte[] {                 (byte) GZIP_MAGIC,        // Magic number (short)                 (byte)(GZIP_MAGIC >> 8),  // Magic number (short)                 Deflater.DEFLATED,        // Compression method (CM)                 0,                        // Flags (FLG)                 0,                        // Modification time MTIME (int)                 0,                        // Modification time MTIME (int)                 0,                        // Modification time MTIME (int)                 0,                        // Modification time MTIME (int)                 0,                        // Extra flags (XFLG)                 0                         // Operating system (OS)         };         protected InputStream createHeaderStream()         {             return new ByteArrayInputStream(GZIP_HEADER);         }         protected InternalGzipCompressingInputStream contentStream;         protected InputStream createContentStream()         {             contentStream=new InternalGzipCompressingInputStream(new CRC32InputStream(in), bufferSize);             return contentStream;         }         protected InputStream createTrailerStream()         {             return new ByteArrayInputStream(contentStream.createTrailer());         }     }      /**      * Internal stream without header/trailer        */     protected static class CRC32InputStream extends FilterInputStream     {         protected CRC32 crc = new CRC32();         protected long byteCount;         public CRC32InputStream(InputStream in)         {             super(in);         }          @Override         public int read() throws IOException         {             int val=super.read();             if (val>=0)             {                 crc.update(val);                 byteCount++;             }             return val;         }         @Override         public int read(byte[] b, int off, int len) throws IOException         {             len=super.read(b, off, len);             if (len>=0)             {                 crc.update(b,off,len);                 byteCount+=len;             }             return len;         }         public long getCrcValue()         {             return crc.getValue();         }         public long getByteCount()         {             return byteCount;         }     }      /**      * Internal stream without header/trailer        */     protected static class InternalGzipCompressingInputStream extends DeflaterInputStream     {         protected final CRC32InputStream crcIn;         public InternalGzipCompressingInputStream(CRC32InputStream in, int bufferSize)         {             super(in, new Deflater(Deflater.DEFAULT_COMPRESSION, true),bufferSize);             crcIn=in;         }         public void close() throws IOException         {             if (in != null)             {                 try                 {                     def.end();                     in.close();                 }                 finally                 {                     in = null;                 }             }         }          protected final static int TRAILER_SIZE = 8;          public byte[] createTrailer()         {             byte[] trailer= new byte[TRAILER_SIZE];             writeTrailer(trailer, 0);             return trailer;         }          /*          * Writes GZIP member trailer to a byte array, starting at a given          * offset.          */         private void writeTrailer(byte[] buf, int offset)         {             writeInt((int)crcIn.getCrcValue(), buf, offset); // CRC-32 of uncompr. data             writeInt((int)crcIn.getByteCount(), buf, offset + 4); // Number of uncompr. bytes         }          /*          * Writes integer in Intel byte order to a byte array, starting at a          * given offset.          */         private void writeInt(int i, byte[] buf, int offset)         {             writeShort(i & 0xffff, buf, offset);             writeShort((i >> 16) & 0xffff, buf, offset + 2);         }          /*          * Writes short integer in Intel byte order to a byte array, starting          * at a given offset          */         private void writeShort(int s, byte[] buf, int offset)         {             buf[offset] = (byte)(s & 0xff);             buf[offset + 1] = (byte)((s >> 8) & 0xff);         }     }  } 

import static org.junit.Assert.*;  import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; import java.util.Arrays; import java.util.zip.CRC32; import java.util.zip.GZIPInputStream;  import org.junit.Test;  public class TestGzipCompressingInputStream {      @Test     public void test() throws Exception     {         testCompressor("test1 test2 test3");         testCompressor("1MB binary data",createTestPattern(1024*1024));         for (int i=0;i<4096;i++)         {             testCompressor(i+" bytes of binary data",createTestPattern(i));         }     }      protected byte[] createTestPattern(int size)     {         byte[] data=new byte[size];         byte pattern=0;         for (int i=0;i<size;i++)         {             data[i]=pattern++;         }         return data;     }      protected void testCompressor(String data) throws IOException     {         testCompressor("String: "+data,data.getBytes());     }     protected void testCompressor(String dataInfo, byte[] data) throws IOException     {         InputStream uncompressedIn=new ByteArrayInputStream(data);         InputStream compressedIn=new GzipCompressingInputStream(uncompressedIn);         InputStream uncompressedOut=new GZIPInputStream(compressedIn);          byte[] result=StreamHelper.readBinaryStream(uncompressedOut);          assertTrue("Test failed for: "+dataInfo,Arrays.equals(data,result));      }  } 
like image 58
2 revs Avatar answered Oct 24 '22 14:10

2 revs


A working example of a compressing input stream can be found in the popular open source ESB Mule: GZIPCompressorInputStream.

It uses the DeflaterInputStream provided by the JRE for compression, prepends the gzip header and appends the gzip trailer (aka footer).

Unfortunately, it is under CPA License, which does not seem to be very common. In addition, there seems to be no unit test.

like image 32
schnatterer Avatar answered Oct 24 '22 16:10

schnatterer