I need to store a huge amount of binary data into a file, but I want also to read/write the header of that file in XML format.
Yes, I could just store the binary data into some XML value and let it be serialized using base64 encoding. But this wouldn't be space-efficient.
Can I "mix" XML data and raw binary data in a more-or-less standardized way?
I was thinking about two options:
Is there a way to do this using JAXB?
Or is there a way to take some existing XML data and append binary data to it, in such a way that the the boundary is recognized?
Isn't the concept I'm looking for somehow used by / for SOAP?
Or is it used in the email standard? (Separation of binary attachments)
Scheme of what I'm trying to achieve:
[meta-info-about-boundary][XML-data][boundary][raw-binary-data]
Thank you!
You can leverage AttachementMarshaller & AttachmentUnmarshaller for this. This is the bridge used by JAXB/JAX-WS to pass binary content as attachments. You can leverage this same mechanism to do what you want.
PROOF OF CONCEPT
Below is how it could be implemented. This should work with any JAXB impl (it works for me with EclipseLink JAXB (MOXy), and the reference implementation).
Message Format
[xml_length][xml][attach1_length][attach1]...[attachN_length][attachN]
Root
This is an object with multiple byte[] properties.
import javax.xml.bind.annotation.XmlRootElement;
@XmlRootElement
public class Root {
private byte[] foo;
private byte[] bar;
public byte[] getFoo() {
return foo;
}
public void setFoo(byte[] foo) {
this.foo = foo;
}
public byte[] getBar() {
return bar;
}
public void setBar(byte[] bar) {
this.bar = bar;
}
}
Demo
This class has is used to demonstrate how MessageWriter and MessageReader are used:
import java.io.FileInputStream;
import java.io.FileOutputStream;
import javax.xml.bind.JAXBContext;
public class Demo {
public static void main(String[] args) throws Exception {
JAXBContext jc = JAXBContext.newInstance(Root.class);
Root root = new Root();
root.setFoo("HELLO WORLD".getBytes());
root.setBar("BAR".getBytes());
MessageWriter writer = new MessageWriter(jc);
FileOutputStream outStream = new FileOutputStream("file.xml");
writer.write(root, outStream);
outStream.close();
MessageReader reader = new MessageReader(jc);
FileInputStream inStream = new FileInputStream("file.xml");
Root root2 = (Root) reader.read(inStream);
inStream.close();
System.out.println(new String(root2.getFoo()));
System.out.println(new String(root2.getBar()));
}
}
MessageWriter
Is responsible for writing the message to the desired format:
import java.io.ByteArrayOutputStream;
import java.io.ObjectOutputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.List;
import javax.activation.DataHandler;
import javax.xml.bind.JAXBContext;
import javax.xml.bind.Marshaller;
import javax.xml.bind.attachment.AttachmentMarshaller;
public class MessageWriter {
private JAXBContext jaxbContext;
public MessageWriter(JAXBContext jaxbContext) {
this.jaxbContext = jaxbContext;
}
/**
* Write the message in the following format:
* [xml_length][xml][attach1_length][attach1]...[attachN_length][attachN]
*/
public void write(Object object, OutputStream stream) {
try {
Marshaller marshaller = jaxbContext.createMarshaller();
marshaller.setProperty(Marshaller.JAXB_FRAGMENT, true);
BinaryAttachmentMarshaller attachmentMarshaller = new BinaryAttachmentMarshaller();
marshaller.setAttachmentMarshaller(attachmentMarshaller);
ByteArrayOutputStream xmlStream = new ByteArrayOutputStream();
marshaller.marshal(object, xmlStream);
byte[] xml = xmlStream.toByteArray();
xmlStream.close();
ObjectOutputStream messageStream = new ObjectOutputStream(stream);
messageStream.write(xml.length); //[xml_length]
messageStream.write(xml); // [xml]
for(Attachment attachment : attachmentMarshaller.getAttachments()) {
messageStream.write(attachment.getLength()); // [attachX_length]
messageStream.write(attachment.getData(), attachment.getOffset(), attachment.getLength()); // [attachX]
}
messageStream.flush();
} catch(Exception e) {
throw new RuntimeException(e);
}
}
private static class BinaryAttachmentMarshaller extends AttachmentMarshaller {
private static final int THRESHOLD = 10;
private List<Attachment> attachments = new ArrayList<Attachment>();
public List<Attachment> getAttachments() {
return attachments;
}
@Override
public String addMtomAttachment(DataHandler data, String elementNamespace, String elementLocalName) {
return null;
}
@Override
public String addMtomAttachment(byte[] data, int offset, int length, String mimeType, String elementNamespace, String elementLocalName) {
if(data.length < THRESHOLD) {
return null;
}
int id = attachments.size() + 1;
attachments.add(new Attachment(data, offset, length));
return "cid:" + String.valueOf(id);
}
@Override
public String addSwaRefAttachment(DataHandler data) {
return null;
}
@Override
public boolean isXOPPackage() {
return true;
}
}
public static class Attachment {
private byte[] data;
private int offset;
private int length;
public Attachment(byte[] data, int offset, int length) {
this.data = data;
this.offset = offset;
this.length = length;
}
public byte[] getData() {
return data;
}
public int getOffset() {
return offset;
}
public int getLength() {
return length;
}
}
}
MessageReader
Is responsible for reading the message:
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.ObjectInputStream;
import java.io.OutputStream;
import java.util.HashMap;
import java.util.Map;
import javax.activation.DataHandler;
import javax.activation.DataSource;
import javax.xml.bind.JAXBContext;
import javax.xml.bind.Unmarshaller;
import javax.xml.bind.attachment.AttachmentUnmarshaller;
public class MessageReader {
private JAXBContext jaxbContext;
public MessageReader(JAXBContext jaxbContext) {
this.jaxbContext = jaxbContext;
}
/**
* Read the message from the following format:
* [xml_length][xml][attach1_length][attach1]...[attachN_length][attachN]
*/
public Object read(InputStream stream) {
try {
ObjectInputStream inputStream = new ObjectInputStream(stream);
int xmlLength = inputStream.read(); // [xml_length]
byte[] xmlIn = new byte[xmlLength];
inputStream.read(xmlIn); // [xml]
BinaryAttachmentUnmarshaller attachmentUnmarshaller = new BinaryAttachmentUnmarshaller();
int id = 1;
while(inputStream.available() > 0) {
int length = inputStream.read(); // [attachX_length]
byte[] data = new byte[length]; // [attachX]
inputStream.read(data);
attachmentUnmarshaller.getAttachments().put("cid:" + String.valueOf(id++), data);
}
Unmarshaller unmarshaller = jaxbContext.createUnmarshaller();
unmarshaller.setAttachmentUnmarshaller(attachmentUnmarshaller);
ByteArrayInputStream byteInputStream = new ByteArrayInputStream(xmlIn);
Object object = unmarshaller.unmarshal(byteInputStream);
byteInputStream.close();
inputStream.close();
return object;
} catch(Exception e) {
throw new RuntimeException(e);
}
}
private static class BinaryAttachmentUnmarshaller extends AttachmentUnmarshaller {
private Map<String, byte[]> attachments = new HashMap<String, byte[]>();
public Map<String, byte[]> getAttachments() {
return attachments;
}
@Override
public DataHandler getAttachmentAsDataHandler(String cid) {
byte[] bytes = attachments.get(cid);
return new DataHandler(new ByteArrayDataSource(bytes));
}
@Override
public byte[] getAttachmentAsByteArray(String cid) {
return attachments.get(cid);
}
@Override
public boolean isXOPPackage() {
return true;
}
}
private static class ByteArrayDataSource implements DataSource {
private byte[] bytes;
public ByteArrayDataSource(byte[] bytes) {
this.bytes = bytes;
}
public String getContentType() {
return "application/octet-stream";
}
public InputStream getInputStream() throws IOException {
return new ByteArrayInputStream(bytes);
}
public String getName() {
return null;
}
public OutputStream getOutputStream() throws IOException {
return null;
}
}
}
For More Information
I followed the concept suggested by Blaise Doughan, but without attachment marshallers:
I let an XmlAdapter
convert a byte[]
to a URI
-reference and back, while references point to separate files, where raw data is stored. The XML file and all binary files are then put into a zip.
It is similar to the approach of OpenOffice and the ODF format, which in fact is a zip with few XMLs and binary files.
(In the example code, no actual binary files are written, and no zip is created.)
import java.net.*;
import java.util.*;
import javax.xml.bind.annotation.*;
import javax.xml.bind.annotation.adapters.*;
final class Bindings {
static final String SCHEME = "storage";
static final Class<?>[] ALL_CLASSES = new Class<?>[]{
Root.class, RawRef.class
};
static final class RawRepository
extends XmlAdapter<URI, byte[]> {
final SortedMap<String, byte[]> map = new TreeMap<>();
final String host;
private int lastID = 0;
RawRepository(String host) {
this.host = host;
}
@Override
public byte[] unmarshal(URI o) {
if (!SCHEME.equals(o.getScheme())) {
throw new Error("scheme is: " + o.getScheme()
+ ", while expected was: " + SCHEME);
} else if (!host.equals(o.getHost())) {
throw new Error("host is: " + o.getHost()
+ ", while expected was: " + host);
}
String key = o.getPath();
if (!map.containsKey(key)) {
throw new Error("key not found: " + key);
}
byte[] ret = map.get(key);
return Arrays.copyOf(ret, ret.length);
}
@Override
public URI marshal(byte[] o) {
++lastID;
String key = String.valueOf(lastID);
map.put(key, Arrays.copyOf(o, o.length));
try {
return new URI(SCHEME, host, "/" + key, null);
} catch (URISyntaxException ex) {
throw new Error(ex);
}
}
}
@XmlRootElement
@XmlType
static final class Root {
@XmlElement
final List<RawRef> element = new LinkedList<>();
}
@XmlType
static final class RawRef {
@XmlJavaTypeAdapter(RawRepository.class)
@XmlElement
byte[] raw = null;
}
}
import java.io.*;
import javax.xml.bind.*;
public class _Run {
public static void main(String[] args)
throws Exception {
JAXBContext context = JAXBContext.newInstance(Bindings.ALL_CLASSES);
Marshaller marshaller = context.createMarshaller();
marshaller.setProperty(Marshaller.JAXB_FORMATTED_OUTPUT, true);
Unmarshaller unmarshaller = context.createUnmarshaller();
Bindings.RawRepository adapter = new Bindings.RawRepository("myZipVFS");
marshaller.setAdapter(adapter);
Bindings.RawRef ta1 = new Bindings.RawRef();
ta1.raw = "THIS IS A STRING".getBytes();
Bindings.RawRef ta2 = new Bindings.RawRef();
ta2.raw = "THIS IS AN OTHER STRING".getBytes();
Bindings.Root root = new Bindings.Root();
root.element.add(ta1);
root.element.add(ta2);
StringWriter out = new StringWriter();
marshaller.marshal(root, out);
System.out.println(out.toString());
}
}
<root>
<element>
<raw>storage://myZipVFS/1</raw>
</element>
<element>
<raw>storage://myZipVFS/2</raw>
</element>
</root>
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With