Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Extract IPTC information from JPEG using Javascript

I'm trying to extract IPTC photo caption information from a JPEG file using Javascript. (I know I can do this server-side, but I'm looking specifically for a Javascript solution.)

I found this script, which extracts EXIF information ... but I'm not sure how to adapt it to grab IPTC data.

Are there any existing scripts that offer such functionality? If not, how would you modify the EXIF script to also parse IPTC data?

UPDATE

I've modified the EXIF script I linked above. It sorta does what I want, but it's not grabbing the right data 100 percent of the time.

After line 401, I added:

else if (iMarker == 237) {
        // 0xED = Application-specific 13 (Photoshop IPTC)                
        if (bDebug) log("Found 0xFFED marker");   
        return readIPTCData(oFile, iOffset + 4, getShortAt(oFile, iOffset+2, true)-2);                       
}

And then elsewhere in the script, I added this function:

function readIPTCData(oFile, iStart, iLength) {
    exif = new Array();

if (getStringAt(oFile, iStart, 9) != "Photoshop") {
    if (bDebug) log("Not valid Photoshop data! " + getStringAt(oFile, iStart, 9));
    return false;
}

var output = '';
var count = 0;
two = new Array();
for (i=0; i<iLength; i++) {
   if (getByteAt(oFile, iStart + i) == 2 && getByteAt(oFile, iStart + i + 1) == 120) {
      var caption = getString2At(oFile, iStart + i + 2, 800);
   }
   if (getByteAt(oFile, iStart + i) == 2 && getByteAt(oFile, iStart + i + 1) == 80) {
      var credit = getString2At(oFile, iStart + i + 2, 300);
   }       
}

exif['ImageDescription'] = caption;
exif['Artist'] = credit;

return exif;

}

So let me now modify my question slightly. How can the function above be improved?

like image 318
jawns317 Avatar asked Apr 29 '11 13:04

jawns317


2 Answers

For what it's worth, I extrapolated on this a bit... I haven't done a whole lot of testing, but the few test images I have seem to work.

    var bDebug = false;

    var fieldMap = {
        120 : 'caption',
        110 : 'credit',
        25 : 'keywords',
        85 : 'byline',
        122 : 'captionWriter',
        105 : 'headline',
        116 : 'copyright',
        15 : 'category'
    };

    function readIPTCData(oFile, iStart, iLength) {
        var data = {};

        if (oFile.getStringAt(iStart, 9) != "Photoshop") {
            if (bDebug) log("Not valid Photoshop data! " + oFile.getStringAt(iStart, 9));
            return false;
        }

        var fileLength = oFile.getLength();

        var length, offset, fieldStart, title, value;
        var FILE_SEPARATOR_CHAR = 28,
            START_OF_TEXT_CHAR = 2;

        for (var i = 0; i < iLength; i++) {

            fieldStart = iStart + i;
            if(oFile.getByteAt(fieldStart) == START_OF_TEXT_CHAR && oFile.getByteAt(fieldStart + 1) in fieldMap) {
                length = 0;
                offset = 2;

                while(
                    fieldStart + offset < fileLength &&
                    oFile.getByteAt(fieldStart + offset) != FILE_SEPARATOR_CHAR &&
                    oFile.getByteAt(fieldStart + offset + 1) != START_OF_TEXT_CHAR) { offset++; length++; }

                if(!length) { continue; }

                title = fieldMap[oFile.getByteAt(fieldStart + 1)];
                value = oFile.getStringAt(iStart + i + 2, length) || '';
                value = value.replace('\000','').trim();

                data[title] = value;
                i+=length-1;
            }
        }

        return data;

    }

    function findIPTCinJPEG(oFile) {
        var aMarkers = [];

        if (oFile.getByteAt(0) != 0xFF || oFile.getByteAt(1) != 0xD8) {
            return false; // not a valid jpeg
        }

        var iOffset = 2;
        var iLength = oFile.getLength();
        while (iOffset < iLength) {
            if (oFile.getByteAt(iOffset) != 0xFF) {
                if (bDebug) console.log("Not a valid marker at offset " + iOffset + ", found: " + oFile.getByteAt(iOffset));
                return false; // not a valid marker, something is wrong
            }

            var iMarker = oFile.getByteAt(iOffset+1);

            if (iMarker == 237) {
                if (bDebug) console.log("Found 0xFFED marker");
                return readIPTCData(oFile, iOffset + 4, oFile.getShortAt(iOffset+2, true)-2);

            } else {
                iOffset += 2 + oFile.getShortAt(iOffset+2, true);
            }

        }

    }

    IPTC.readFromBinaryFile = function(oFile) {
        return findIPTCinJPEG(oFile);
    }
like image 73
user67821 Avatar answered Oct 17 '22 11:10

user67821


I'd like to suggest library exifr that works in both Node.js and browser. And it also supports the new HEIC image format.

exifr.parse(input, {iptc: true}).then(output => {
  console.log('IPTC', output)
})

It parses multiple data formats (TIFF/EXIF, ICC, IPTC, XMP, JFIF) but IPTC isn't enabled by default so you need to enabled it in options as seen in the example.

like image 1
Mike Kovařík Avatar answered Oct 17 '22 10:10

Mike Kovařík