Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

What's the most standard Java way to store raw binary data along with XML?

I need to store a huge amount of binary data into a file, but I want also to read/write the header of that file in XML format.

Yes, I could just store the binary data into some XML value and let it be serialized using base64 encoding. But this wouldn't be space-efficient.

Can I "mix" XML data and raw binary data in a more-or-less standardized way?

I was thinking about two options:

  • Is there a way to do this using JAXB?

  • Or is there a way to take some existing XML data and append binary data to it, in such a way that the the boundary is recognized?

  • Isn't the concept I'm looking for somehow used by / for SOAP?

  • Or is it used in the email standard? (Separation of binary attachments)

Scheme of what I'm trying to achieve:

[meta-info-about-boundary][XML-data][boundary][raw-binary-data]

Thank you!

like image 235
java.is.for.desktop Avatar asked Mar 15 '11 18:03

java.is.for.desktop


2 Answers

You can leverage AttachementMarshaller & AttachmentUnmarshaller for this. This is the bridge used by JAXB/JAX-WS to pass binary content as attachments. You can leverage this same mechanism to do what you want.

  • http://download.oracle.com/javase/6/docs/api/javax/xml/bind/attachment/package-summary.html

PROOF OF CONCEPT

Below is how it could be implemented. This should work with any JAXB impl (it works for me with EclipseLink JAXB (MOXy), and the reference implementation).

Message Format

[xml_length][xml][attach1_length][attach1]...[attachN_length][attachN]

Root

This is an object with multiple byte[] properties.

import javax.xml.bind.annotation.XmlRootElement;

@XmlRootElement
public class Root {

    private byte[] foo;
    private byte[] bar;

    public byte[] getFoo() {
        return foo;
    }

    public void setFoo(byte[] foo) {
        this.foo = foo;
    }

    public byte[] getBar() {
        return bar;
    }

    public void setBar(byte[] bar) {
        this.bar = bar;
    }

}

Demo

This class has is used to demonstrate how MessageWriter and MessageReader are used:

import java.io.FileInputStream;
import java.io.FileOutputStream;
import javax.xml.bind.JAXBContext;

public class Demo {

    public static void main(String[] args) throws Exception {
        JAXBContext jc = JAXBContext.newInstance(Root.class);

        Root root = new Root();
        root.setFoo("HELLO WORLD".getBytes());
        root.setBar("BAR".getBytes());

        MessageWriter writer = new MessageWriter(jc);
        FileOutputStream outStream = new FileOutputStream("file.xml");
        writer.write(root, outStream);
        outStream.close();

        MessageReader reader = new MessageReader(jc);
        FileInputStream inStream = new FileInputStream("file.xml");
        Root root2 = (Root) reader.read(inStream);
        inStream.close();

        System.out.println(new String(root2.getFoo()));
        System.out.println(new String(root2.getBar()));
    }

}

MessageWriter

Is responsible for writing the message to the desired format:

import java.io.ByteArrayOutputStream;
import java.io.ObjectOutputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.List;

import javax.activation.DataHandler;
import javax.xml.bind.JAXBContext;
import javax.xml.bind.Marshaller;
import javax.xml.bind.attachment.AttachmentMarshaller;

public class MessageWriter {

    private JAXBContext jaxbContext;

    public MessageWriter(JAXBContext jaxbContext) {
        this.jaxbContext = jaxbContext;
    }

    /**
     * Write the message in the following format:
     * [xml_length][xml][attach1_length][attach1]...[attachN_length][attachN] 
     */
    public void write(Object object, OutputStream stream) {
        try {
            Marshaller marshaller = jaxbContext.createMarshaller();
            marshaller.setProperty(Marshaller.JAXB_FRAGMENT, true);
            BinaryAttachmentMarshaller attachmentMarshaller = new BinaryAttachmentMarshaller();
            marshaller.setAttachmentMarshaller(attachmentMarshaller);
            ByteArrayOutputStream xmlStream = new ByteArrayOutputStream();
            marshaller.marshal(object, xmlStream);
            byte[] xml = xmlStream.toByteArray();
            xmlStream.close();

            ObjectOutputStream messageStream = new ObjectOutputStream(stream);

            messageStream.write(xml.length); //[xml_length]
            messageStream.write(xml); // [xml]

            for(Attachment attachment : attachmentMarshaller.getAttachments()) {
                messageStream.write(attachment.getLength()); // [attachX_length]
                messageStream.write(attachment.getData(), attachment.getOffset(), attachment.getLength());  // [attachX]
            }

            messageStream.flush();
        } catch(Exception e) {
            throw new RuntimeException(e);
        }
    }

    private static class BinaryAttachmentMarshaller extends AttachmentMarshaller {

        private static final int THRESHOLD = 10;

        private List<Attachment> attachments = new ArrayList<Attachment>();

        public List<Attachment> getAttachments() {
            return attachments;
        }

        @Override
        public String addMtomAttachment(DataHandler data, String elementNamespace, String elementLocalName) {
            return null;
        }

        @Override
        public String addMtomAttachment(byte[] data, int offset, int length, String mimeType, String elementNamespace, String elementLocalName) {
            if(data.length < THRESHOLD) {
                return null;
            }
            int id = attachments.size() + 1;
            attachments.add(new Attachment(data, offset, length));
            return "cid:" + String.valueOf(id);
        }

        @Override
        public String addSwaRefAttachment(DataHandler data) {
            return null;
        }

        @Override
        public boolean isXOPPackage() {
            return true;
        }

    }

    public static class Attachment {

        private byte[] data;
        private int offset;
        private int length;

        public Attachment(byte[] data, int offset, int length) {
            this.data = data;
            this.offset = offset;
            this.length = length;
        }

        public byte[] getData() {
            return data;
        }

        public int getOffset() {
            return offset;
        }

        public int getLength() {
            return length;
        }

    }

}

MessageReader

Is responsible for reading the message:

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.ObjectInputStream;
import java.io.OutputStream;
import java.util.HashMap;
import java.util.Map;

import javax.activation.DataHandler;
import javax.activation.DataSource;
import javax.xml.bind.JAXBContext;
import javax.xml.bind.Unmarshaller;
import javax.xml.bind.attachment.AttachmentUnmarshaller;

public class MessageReader {

    private JAXBContext jaxbContext;

    public MessageReader(JAXBContext jaxbContext) {
        this.jaxbContext = jaxbContext;
    }

    /**
     * Read the message from the following format:
     * [xml_length][xml][attach1_length][attach1]...[attachN_length][attachN] 
     */
    public Object read(InputStream stream) {
        try {
            ObjectInputStream inputStream = new ObjectInputStream(stream);
            int xmlLength = inputStream.read();  // [xml_length]

            byte[] xmlIn = new byte[xmlLength]; 
            inputStream.read(xmlIn);  // [xml]

            BinaryAttachmentUnmarshaller attachmentUnmarshaller = new BinaryAttachmentUnmarshaller();
            int id = 1;
            while(inputStream.available() > 0) {
                int length = inputStream.read();  // [attachX_length]
                byte[] data = new byte[length];  // [attachX]
                inputStream.read(data);
                attachmentUnmarshaller.getAttachments().put("cid:" + String.valueOf(id++), data);
            }

            Unmarshaller unmarshaller = jaxbContext.createUnmarshaller();
            unmarshaller.setAttachmentUnmarshaller(attachmentUnmarshaller);
            ByteArrayInputStream byteInputStream = new ByteArrayInputStream(xmlIn);
            Object object = unmarshaller.unmarshal(byteInputStream);
            byteInputStream.close();
            inputStream.close();
            return object;
        } catch(Exception e) {
            throw new RuntimeException(e);
        }
    }

    private static class BinaryAttachmentUnmarshaller extends AttachmentUnmarshaller {

        private Map<String, byte[]> attachments = new HashMap<String, byte[]>();

        public Map<String, byte[]> getAttachments() {
            return attachments;
        }

        @Override
        public DataHandler getAttachmentAsDataHandler(String cid) {
            byte[] bytes = attachments.get(cid);
            return new DataHandler(new ByteArrayDataSource(bytes));
        }

        @Override
        public byte[] getAttachmentAsByteArray(String cid) {
            return attachments.get(cid);
        }

        @Override
        public boolean isXOPPackage() {
            return true;
        }

    }

    private static class ByteArrayDataSource implements DataSource {

        private byte[] bytes;

        public ByteArrayDataSource(byte[] bytes) {
            this.bytes = bytes;
        }

        public String getContentType() {
            return  "application/octet-stream";
        }

        public InputStream getInputStream() throws IOException {
            return new ByteArrayInputStream(bytes);
        }

        public String getName() {
            return null;
        }

        public OutputStream getOutputStream() throws IOException {
            return null;
        }

    }

}

For More Information

  • http://bdoughan.blogspot.com/2011/03/jaxb-web-services-and-binary-data.html
like image 99
bdoughan Avatar answered Oct 07 '22 04:10

bdoughan


I followed the concept suggested by Blaise Doughan, but without attachment marshallers:

I let an XmlAdapter convert a byte[] to a URI-reference and back, while references point to separate files, where raw data is stored. The XML file and all binary files are then put into a zip.

It is similar to the approach of OpenOffice and the ODF format, which in fact is a zip with few XMLs and binary files.

(In the example code, no actual binary files are written, and no zip is created.)

Bindings.java

import java.net.*;
import java.util.*;
import javax.xml.bind.annotation.*;
import javax.xml.bind.annotation.adapters.*;

final class Bindings {

  static final String SCHEME = "storage";
  static final Class<?>[] ALL_CLASSES = new Class<?>[]{
    Root.class, RawRef.class
  };

  static final class RawRepository
      extends XmlAdapter<URI, byte[]> {

    final SortedMap<String, byte[]> map = new TreeMap<>();
    final String host;
    private int lastID = 0;

    RawRepository(String host) {
      this.host = host;
    }

    @Override
    public byte[] unmarshal(URI o) {
      if (!SCHEME.equals(o.getScheme())) {
        throw new Error("scheme is: " + o.getScheme()
            + ", while expected was: " + SCHEME);
      } else if (!host.equals(o.getHost())) {
        throw new Error("host is: " + o.getHost()
            + ", while expected was: " + host);
      }

      String key = o.getPath();
      if (!map.containsKey(key)) {
        throw new Error("key not found: " + key);
      }

      byte[] ret = map.get(key);
      return Arrays.copyOf(ret, ret.length);
    }

    @Override
    public URI marshal(byte[] o) {
      ++lastID;
      String key = String.valueOf(lastID);
      map.put(key, Arrays.copyOf(o, o.length));

      try {
        return new URI(SCHEME, host, "/" + key, null);
      } catch (URISyntaxException ex) {
        throw new Error(ex);
      }
    }

  }

  @XmlRootElement
  @XmlType
  static final class Root {

    @XmlElement
    final List<RawRef> element = new LinkedList<>();
  }

  @XmlType
  static final class RawRef {

    @XmlJavaTypeAdapter(RawRepository.class)
    @XmlElement
    byte[] raw = null;
  }

}

Main.java

import java.io.*;
import javax.xml.bind.*;

public class _Run {

  public static void main(String[] args)
      throws Exception {
    JAXBContext context = JAXBContext.newInstance(Bindings.ALL_CLASSES);
    Marshaller marshaller = context.createMarshaller();
    marshaller.setProperty(Marshaller.JAXB_FORMATTED_OUTPUT, true);
    Unmarshaller unmarshaller = context.createUnmarshaller();

    Bindings.RawRepository adapter = new Bindings.RawRepository("myZipVFS");
    marshaller.setAdapter(adapter);

    Bindings.RawRef ta1 = new Bindings.RawRef();
    ta1.raw = "THIS IS A STRING".getBytes();
    Bindings.RawRef ta2 = new Bindings.RawRef();
    ta2.raw = "THIS IS AN OTHER STRING".getBytes();

    Bindings.Root root = new Bindings.Root();
    root.element.add(ta1);
    root.element.add(ta2);

    StringWriter out = new StringWriter();
    marshaller.marshal(root, out);

    System.out.println(out.toString());
  }

}

Output

<root>
    <element>
        <raw>storage://myZipVFS/1</raw>
    </element>
    <element>
        <raw>storage://myZipVFS/2</raw>
    </element>
</root>
like image 44
java.is.for.desktop Avatar answered Oct 07 '22 03:10

java.is.for.desktop