Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

compress pdf with large images via java

Looking for a way to compress images in a pdf and to output a pdf for archiving. I cannot compress the images before creation as it would compromise the quality of the print.

The size of each pdf is around 8MB with the bulk of this being made up of 2 images. Images are in png format and are brought into pdf during generation(3rd party generator used)

Is there a way to compress these in java without using a 3rd party tool. I have tried with pdfbox, itext and a 3rd party exe(neevia), the 3rd party tool the only one that has given me any results so far(Down to around half a MB) but I do not want to relinquish control to an exe. Sample code is below.

import java.io.BufferedReader;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.Arrays;

import org.apache.pdfbox.exceptions.COSVisitorException;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.common.PDStream;

import com.itextpdf.text.DocumentException;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.PdfStamper;
import com.itextpdf.text.pdf.PdfWriter;

public class compressPDF {

public static void main (String[] args) throws IOException, DocumentException, COSVisitorException {


    /*
     * Using PDF Box
     */

    PDDocument doc; // = new PDDocument();

    doc = PDDocument.load("C:/_dev_env_/TEMP/compressPDF/TRPT_135002_1470_20131212_121423.PDF");

    PDStream stream= new PDStream(doc);
    stream.addCompression();

    doc.save("C:/_dev_env_/TEMP/compressPDF/compressed_pdfBox.pdf");

    doc.close();

    /*
     * Using itext
     */

    PdfReader reader = new PdfReader("C:/_dev_env_/TEMP/compressPDF/TRPT_135002_1470_20131212_121423.PDF");

    PdfStamper stamper = new PdfStamper(reader, new FileOutputStream("C:/_dev_env_/TEMP/compressPDF/compressed_Itext.pdf"), PdfWriter.VERSION_1_5);
    stamper.setFullCompression();
    stamper.getWriter().setCompressionLevel(50);
    int total = reader.getNumberOfPages() + 1;
    for (int i = 1; i < total; i++) {
        reader.setPageContent(i, reader.getPageContent(i));
    }
    stamper.close();
    reader.close();

    /*
     * Using 3rd party - Neevia 
     */
    try {
    Process process = new ProcessBuilder("C:/Program Files (x86)/neeviaPDF.com/PDFcompress/cmdLine/CLcompr.exe","C:/_dev_env_/TEMP/compressPDF/TRPT_135002_1470_20131212_121423.PDF", "C:/_dev_env_/TEMP/compressPDF/compressed_Neevia.pdf").start();
    InputStream is = process.getInputStream();
    InputStreamReader isr = new InputStreamReader(is);
    BufferedReader br = new BufferedReader(isr);
    String line;

    System.out.printf("Output of running %s is:", Arrays.toString(args));

    while ((line = br.readLine()) != null) {
      System.out.println(line);
    }
    } catch (Exception e) {
        System.out.println(e);
    } finally {
        System.out.println("Created!!");
    }

}

}
like image 536
Daniel Mulcahy Avatar asked Dec 16 '13 15:12

Daniel Mulcahy


People also ask

How do I compress a PDF file that is too large?

In the latest version of Adobe Acrobat, open the PDF you wish to re-save as a smaller file, choose File, Save as Other, and then Reduced Size PDF. You'll be prompted to select the version compatibility you need and then you can click OK to save.

How do I compress a PDF in Javascript?

As suggested by several sources, one method is to resize the image file, generate the PDF file with jspdf and start uploading the PDF file. // Compress image and convert to pdf function resizeBase64Img(base64, width, height) { var canvas = document. createElement("canvas"); canvas. width = width; canvas.


2 Answers

I used code below for a proof of concept... Works a treat :) Thanks to Bruno for setting me on the right path :)

package compressPDF;

import java.awt.Graphics2D;
import java.awt.geom.AffineTransform;
import java.awt.image.BufferedImage;
import java.io.ByteArrayOutputStream;
import java.io.FileOutputStream;
import java.io.IOException;

import javax.imageio.ImageIO;

import com.itextpdf.text.DocumentException;
import com.itextpdf.text.pdf.PRStream;
import com.itextpdf.text.pdf.PdfName;
import com.itextpdf.text.pdf.PdfNumber;
import com.itextpdf.text.pdf.PdfObject;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.PdfStamper;
import com.itextpdf.text.pdf.parser.PdfImageObject;

public class ResizeImage {

/** The resulting PDF file. */
//public static String RESULT = "results/part4/chapter16/resized_image.pdf";
/** The multiplication factor for the image. */
public static float FACTOR = 0.5f;

/**
 * Manipulates a PDF file src with the file dest as result
 * @param src the original PDF
 * @param dest the resulting PDF
 * @throws IOException
 * @throws DocumentException 
 */
public void manipulatePdf(String src, String dest) throws IOException, DocumentException {
    PdfName key = new PdfName("ITXT_SpecialId");
    PdfName value = new PdfName("123456789");
    // Read the file
    PdfReader reader = new PdfReader(src);
    int n = reader.getXrefSize();
    PdfObject object;
    PRStream stream;
    // Look for image and manipulate image stream
    for (int i = 0; i < n; i++) {
        object = reader.getPdfObject(i);
        if (object == null || !object.isStream())
            continue;
        stream = (PRStream)object;
       // if (value.equals(stream.get(key))) {
        PdfObject pdfsubtype = stream.get(PdfName.SUBTYPE);
        System.out.println(stream.type());
        if (pdfsubtype != null && pdfsubtype.toString().equals(PdfName.IMAGE.toString())) {
            PdfImageObject image = new PdfImageObject(stream);
            BufferedImage bi = image.getBufferedImage();
            if (bi == null) continue;
            int width = (int)(bi.getWidth() * FACTOR);
            int height = (int)(bi.getHeight() * FACTOR);
            BufferedImage img = new BufferedImage(width, height, BufferedImage.TYPE_INT_RGB);
            AffineTransform at = AffineTransform.getScaleInstance(FACTOR, FACTOR);
            Graphics2D g = img.createGraphics();
            g.drawRenderedImage(bi, at);
            ByteArrayOutputStream imgBytes = new ByteArrayOutputStream();
            ImageIO.write(img, "JPG", imgBytes);
            stream.clear();
            stream.setData(imgBytes.toByteArray(), false, PRStream.BEST_COMPRESSION);
            stream.put(PdfName.TYPE, PdfName.XOBJECT);
            stream.put(PdfName.SUBTYPE, PdfName.IMAGE);
            stream.put(key, value);
            stream.put(PdfName.FILTER, PdfName.DCTDECODE);
            stream.put(PdfName.WIDTH, new PdfNumber(width));
            stream.put(PdfName.HEIGHT, new PdfNumber(height));
            stream.put(PdfName.BITSPERCOMPONENT, new PdfNumber(8));
            stream.put(PdfName.COLORSPACE, PdfName.DEVICERGB);
        }
    }
    // Save altered PDF
    PdfStamper stamper = new PdfStamper(reader, new FileOutputStream(dest));
    stamper.close();
    reader.close();
}

/**
 * Main method.
 *
 * @param    args    no arguments needed
 * @throws DocumentException 
 * @throws IOException
 */
public static void main(String[] args) throws IOException, DocumentException {
    //createPdf(RESULT);
    new ResizeImage().manipulatePdf("C:/_dev_env_/TEMP/compressPDF/TRPT_135002_1470_20131212_121423.PDF", "C:/_dev_env_/TEMP/compressPDF/compressTest.pdf");
}

}
like image 76
Daniel Mulcahy Avatar answered Sep 28 '22 01:09

Daniel Mulcahy


Just to update the excellent answer from @Daniel, I update his code to be compatible with iText7.

package opencde.builder.compresspdf;

import java.awt.Graphics2D;
import java.awt.geom.AffineTransform;
import java.awt.image.BufferedImage;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.Iterator;

import javax.imageio.ImageIO;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.itextpdf.io.image.ImageDataFactory;
import com.itextpdf.kernel.pdf.PdfDictionary;
import com.itextpdf.kernel.pdf.PdfDocument;
import com.itextpdf.kernel.pdf.PdfName;
import com.itextpdf.kernel.pdf.PdfPage;
import com.itextpdf.kernel.pdf.PdfReader;
import com.itextpdf.kernel.pdf.PdfStream;
import com.itextpdf.kernel.pdf.PdfWriter;
import com.itextpdf.kernel.pdf.xobject.PdfImageXObject;
import com.itextpdf.layout.element.Image;

public class ResizeImageV7 {
    
    // Logging
    private static Logger logger = LoggerFactory.getLogger(ResizeImageV7.class);

    /**
     * Manipulates a PDF file src with the file dest as result
     * 
     * @param src  the original PDF
     * @param dest the resulting PDF
     * @param resizeFactor factor to multiplicate to resize image
     * @throws IOException
     */
    public void manipulatePdf(String src, String dest,Float resizeFactor) throws IOException {
        
        //Get source pdf
        PdfDocument pdfDoc = new PdfDocument(new PdfReader(src), new PdfWriter(dest));

        // Iterate over all pages to get all images.
        for (int i = 1; i <= pdfDoc.getNumberOfPages(); i++)
        {
            PdfPage page = pdfDoc.getPage(i);
            PdfDictionary pageDict = page.getPdfObject();
            PdfDictionary resources = pageDict.getAsDictionary(PdfName.Resources);
            // Get images
            PdfDictionary xObjects = resources.getAsDictionary(PdfName.XObject);
            for (Iterator<PdfName> iter = xObjects.keySet().iterator() ; iter.hasNext(); ) {
                // Get image
                PdfName imgRef = iter.next();
                PdfStream stream = xObjects.getAsStream(imgRef);
                PdfImageXObject image = new PdfImageXObject(stream);
                BufferedImage bi = image.getBufferedImage();
                if (bi == null)
                    continue;
                
                // Create new image
                int width = (int) (bi.getWidth() * resizeFactor);
                int height = (int) (bi.getHeight() * resizeFactor);
                BufferedImage img = new BufferedImage(width, height, BufferedImage.TYPE_INT_RGB);
                AffineTransform at = AffineTransform.getScaleInstance(resizeFactor, resizeFactor);
                Graphics2D g = img.createGraphics();
                g.drawRenderedImage(bi, at);
                ByteArrayOutputStream imgBytes = new ByteArrayOutputStream();
                
                // Write new image
                ImageIO.write(img, "JPG", imgBytes);
                Image imgNew =new Image(ImageDataFactory.create(imgBytes.toByteArray()));
                
                // Replace the original image with the resized image
                xObjects.put(imgRef, imgNew.getXObject().getPdfObject());
            }          
        }
        
        pdfDoc.close();
    }

    /**
     * Main method.
     *
     * @param src  the original PDF
     * @param dest the resulting PDF
     * @param resizeFactor factor to multiplicate to resize image
     * @throws IOException
     */
    public static void main(String[] args) throws IOException {
        //Get input parametres
        if (args.length<3 ) {
            System.out.println("Source PDF, Destination PDF and Resize Factor must be provided as parametres");
        } else {
            String sourcePDF=args[0];
            String destPDF=args[1];
            Float resizeFactor=Float.valueOf(new String(args[2]));
            logger.info("Inovking Resize with args, source:" + sourcePDF
                    + " destination:" + destPDF 
                    + " factor:" + resizeFactor);
            //Call method to resize images
            new ResizeImageV7().manipulatePdf(sourcePDF,destPDF,resizeFactor);
            logger.info("PDF resized");
        }
    }

}
like image 29
Marti Pàmies Solà Avatar answered Sep 28 '22 01:09

Marti Pàmies Solà