Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

How to generate PDF documents from rpt in a multi-threaded approach?

I have a rpt file, using which i will be generating multiple reports in pdf format. Using the Engine class from inet clear reports. The process takes very long as I have nearly 10000 reports to be generated. Can I use the Mutli-thread or some other approach to speed up the process?

Any help of how it can be done would be helpful

My partial code.

 //Loops
 Engine eng = new Engine(Engine.EXPORT_PDF);
 eng.setReportFile(rpt); //rpt is the report name
 if (cn.isClosed() || cn == null ) {
    cn = ds.getConnection();
 }
 eng.setConnection(cn);
 System.out.println(" After set connection");
 eng.setPrompt(data[i], 0);
 ReportProperties repprop = eng.getReportProperties();
 repprop.setPaperOrient(ReportProperties.DEFAULT_PAPER_ORIENTATION, ReportProperties.PAPER_FANFOLD_US);
 eng.execute();
 System.out.println(" After excecute");
 try {
      PDFExportThread pdfExporter = new PDFExportThread(eng, sFileName, sFilePath);
      pdfExporter.execute();
 } catch (Exception e) {
      e.printStackTrace();
 }

PDFExportThread execute

 public void execute() throws IOException {
      FileOutputStream fos = null;
      try {
           String FileName = sFileName + "_" + (eng.getPageCount() - 1);
           File file = new File(sFilePath + FileName + ".pdf");
           if (!file.getParentFile().exists()) {
                file.getParentFile().mkdirs();
           }
           if (!file.exists()) {
                file.createNewFile();
           }
           fos = new FileOutputStream(file);
           for (int k = 1; k <= eng.getPageCount(); k++) {
                fos.write(eng.getPageData(k));
           }
           fos.flush();
           fos.close();
      } catch (Exception e) {
           e.printStackTrace();
      } finally {
           if (fos != null) {
                fos.close();
                fos = null;
           }
      }
 }
like image 235
Ace Avatar asked Dec 23 '15 15:12

Ace


2 Answers

This is a very basic code. A ThreadPoolExecutor with a fixed size threads in a pool is the backbone.

Some considerations:

  1. The thread pool size should be equal or less than the DB connection pool size. And, it should be of an optimal number which is reasonable for parallel Engines.
  2. The main thread should wait for sufficient time before killing all threads. I have put 1 hour as the wait time, but that's just an example.
  3. You'll need to have proper Exception handling.
  4. From the API doc, I saw stopAll and shutdown methods from the Engine class. So, I'm invoking that as soon as our work is done. That's again, just an example.

Hope this helps.


import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.sql.Connection;
import java.util.concurrent.Executors;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;

public class RunEngine {
    public static void main(String[] args) throws Exception {
        final String rpt = "/tmp/rpt/input/rpt-1.rpt";
        final String sFilePath = "/tmp/rpt/output/";
        final String sFileName = "pdfreport";
        final Object[] data = new Object[10];

        ThreadPoolExecutor executor = (ThreadPoolExecutor) Executors.newFixedThreadPool(10);
        for (int i = 0; i < data.length; i++) {
            PDFExporterRunnable runnable = new PDFExporterRunnable(rpt, data[i], sFilePath, sFileName, i);
            executor.execute(runnable);
        }
        executor.shutdown();
        executor.awaitTermination(1L, TimeUnit.HOURS);
        Engine.stopAll();
        Engine.shutdown();
    }
    private static class PDFExporterRunnable implements Runnable {
        private final String rpt;
        private final Object data;
        private final String sFilePath;
        private final String sFileName;
        private final int runIndex;


        public PDFExporterRunnable(String rpt, Object data, String sFilePath,
                String sFileName, int runIndex) {
            this.rpt = rpt;
            this.data = data;
            this.sFilePath = sFilePath;
            this.sFileName = sFileName;
            this.runIndex = runIndex;
        }

        @Override
        public void run() {
            // Loops
            Engine eng = new Engine(Engine.EXPORT_PDF);
            eng.setReportFile(rpt); // rpt is the report name
            Connection cn = null;

            /*
             * DB connection related code. Check and use.
             */
            //if (cn.isClosed() || cn == null) {
                //cn = ds.getConnection();
            //}
            eng.setConnection(cn);
            System.out.println(" After set connection");

            eng.setPrompt(data, 0);
            ReportProperties repprop = eng.getReportProperties();
            repprop.setPaperOrient(ReportProperties.DEFAULT_PAPER_ORIENTATION,
                    ReportProperties.PAPER_FANFOLD_US);
            eng.execute();
            System.out.println(" After excecute");
            FileOutputStream fos = null;
            try {
                String FileName = sFileName + "_" + runIndex;
                File file = new File(sFilePath + FileName + ".pdf");
                if (!file.getParentFile().exists()) {
                    file.getParentFile().mkdirs();
                }
                if (!file.exists()) {
                    file.createNewFile();
                }
                fos = new FileOutputStream(file);
                for (int k = 1; k <= eng.getPageCount(); k++) {
                    fos.write(eng.getPageData(k));
                }
                fos.flush();
                fos.close();
            } catch (Exception e) {
                e.printStackTrace();
            } finally {
                if (fos != null) {
                    try {
                        fos.close();
                    } catch (IOException e) {
                        e.printStackTrace();
                    }
                    fos = null;
                }
            }
        }
    }
    /*
     * Dummy classes to avoid compilation errors.
     */
    private static class ReportProperties {
        public static final String PAPER_FANFOLD_US = null;
        public static final String DEFAULT_PAPER_ORIENTATION = null;
        public void setPaperOrient(String defaultPaperOrientation, String paperFanfoldUs) {
        }
    }

    private static class Engine {
        public static final int EXPORT_PDF = 1;
        public Engine(int exportType) {
        }
        public static void shutdown() {
        }
        public static void stopAll() {
        }
        public void setPrompt(Object singleData, int i) {
        }
        public byte[] getPageData(int k) {
            return null;
        }
        public int getPageCount() {
            return 0;
        }
        public void execute() {
        }
        public ReportProperties getReportProperties() {
            return null;
        }
        public void setConnection(Connection cn) {
        }
        public void setReportFile(String reportFile) {
        }
    }
}
like image 195
Vaspar Avatar answered Oct 13 '22 01:10

Vaspar


I will offer this "answer" as a possible quick & dirty solution to get you started on a parallelization effort.

One way or another you're going to build a render farm. I don't think there is a trivial way to do this in java; I would love to have someone post an answer that show how to parallelize your example in just a few lines of code. But until that happens this will hopefully help you make some progress.

You're going to have limited scaling in the same JVM instance. But... let's see how far you get with that and see if it helps enough.

Design challenge #1: restarting.

You will probably want a place to keep the status for each of your reports e.g. "units of work".

You want this in case you need to re-start everything (maybe your server crashes) and you don't want to re-run all of the reports thus far.

Lots of ways you can do this; database, check to see if a "completed" file exists in your report folder (not sufficient for the *.pdf to exist, as that may be incomplete... for xyz_200.pdf you could maybe make an empty xyz_200.done or xyz_200.err file to help with re-running any problem children... and by the time you code up that file manipulation/checking/initialization logic, seems like it may have been easier to add a column to your database which holds the list of work to-be-done).

Design consideration #2: maximizing throughput (avoiding overload).

You don't want to saturate you system and run one thousand reports in parallel. Maybe 10.
Maybe 100.
Probably not 5,000.
You will need to do some sizing research and see what gets you near 80 to 90% system utilization.

Design consideration #3: scaling across multiple servers

Overly complex, outside the scope of a Stack Exchange answer. You'd have to spin up JVM's on multiple systems that are running something like the workers below, and a report-manager that can pull work items from a shared "queue" structure, again a database table is probably easier here than doing something file-based (or a network feed).

Sample Code

Caution: None of this code is well tested, it almost certainly has an abundance of typos, logic errors and poor design. Use at your own risk.

So anyway... I do want to give you the basic idea of a rudimentary task runner. Replace your "// Loops" example in the question with code like the following:

main loop (original code example)

This is more or less doing what your example code did, modified to push most of the work into ReportWorker (new class, see below). Lots of stuff seems to be packed into your original question's example of "// Loop", so I'm not trying to reverse engineer that.

fwiw, it was unclear to me where "rpt" and "data[i]" are coming from so I hacked up some test data.

public class Main {

   public static boolean complete( String data ) {
      return false; // for testing nothing is complete.
   }

    public static void main(String args[] ) {

    String data[] = new String[] { 
         "A",
         "B",
         "C",
         "D",
         "E" };
    String rpt = "xyz";

    // Loop
    ReportManager reportMgr = new ReportManager();  // a new helper class (see below), it assigns/monitors work.
    long startTime = System.currentTimeMillis();
    for( int i = 0; i < data.length; ++i ) {
       // complete is something you should write that knows if a report "unit of  work"
       // finished successfully.
       if( !complete( data[i] ) ) {
          reportMgr.assignWork(  rpt, data[i] ); // so... where did values for your "rpt" variable come from?
       }
    }
    reportMgr.waitForWorkToFinish(); // out of new work to assign, let's wait until everything in-flight complete.
    long endTime = System.currentTimeMillis();
    System.out.println("Done.  Elapsed time = " + (endTime - startTime)/1000 +" seconds.");

   }

}

ReportManager

This class is not thread safe, just have your original loop keep calling assignWork() until you're out of reports to assign then keep calling it until all work is done, e.g. waitForWorkToFinish(), as shown above. (fwiw, I don't think you could say any of the classes here are especially thread safe).

public class ReportManager {

   public int polling_delay = 500; // wait 0.5 seconds for testing.
   //public int polling_delay = 60 * 1000; // wait 1 minute.
   // not high throughput millions of reports / second, we'll run at a slower tempo.
   public int nWorkers = 3; // just 3 for testing.
   public int assignedCnt = 0;
   public ReportWorker workers[];

   public ReportManager() {
      // initialize our manager.
      workers = new ReportWorker[ nWorkers ];
      for( int i = 0; i < nWorkers; ++i ) {
         workers[i] = new ReportWorker( i );
         System.out.println("Created worker #"+i);
      }
   }

   private ReportWorker handleWorkerError( int i  ) {
      // something went wrong, update our "report" status as one of the reports failed.
      System.out.println("handlerWokerError(): failure in "+workers[i]+", resetting worker.");
      workers[i].teardown();
      workers[i] = new ReportWorker( i ); // just replace everything.
      return workers[i]; // the new worker will, incidentally, be avaialble.
   }

   private ReportWorker handleWorkerComplete( int i ) {
      // this unit of work was completed, update our "report" status tracker as success.
      System.out.println("handleWorkerComplete(): success in "+workers[i]+", resetting worker.");
      workers[i].teardown();
      workers[i] = new ReportWorker( i ); // just replace everything.
      return workers[i]; // the new worker will, incidentally, be avaialble.
   }

   private int activeWorkerCount() {
      int activeCnt = 0;
      for( int i = 0; i < nWorkers; ++i ) {
         ReportWorker worker = workers[i];
         System.out.println("activeWorkerCount() i="+i+", checking worker="+worker);
         if( worker.hasError() ) {
            worker = handleWorkerError( i );
         }
         if( worker.isComplete() ) {
            worker = handleWorkerComplete( i );
         }
         if( worker.isInitialized() || worker.isRunning() ) {
            ++activeCnt;
         }
      }
      System.out.println("activeWorkerCount() activeCnt="+activeCnt);
      return activeCnt;
   }

   private ReportWorker getAvailableWorker() {
      // check each worker to see if anybody recently completed...
      // This (rather lazily) creates completely new ReportWorker instances.
      // You might want to try pooling (salvaging and reinitializing them)
      // to see if that helps your performance.

      System.out.println("\n-----");
      ReportWorker firstAvailable = null;
      for( int i = 0; i < nWorkers; ++i ) {
         ReportWorker worker = workers[i];
         System.out.println("getAvailableWorker(): i="+i+" worker="+worker);
         if( worker.hasError() ) {
            worker = handleWorkerError( i );
         }
         if( worker.isComplete() ) {
            worker = handleWorkerComplete( i );
         }
         if( worker.isAvailable() && firstAvailable==null ) {
            System.out.println("Apparently worker "+worker+" is 'available'");
            firstAvailable  = worker;
            System.out.println("getAvailableWorker(): i="+i+" now firstAvailable = "+firstAvailable);
         }
      }
      return firstAvailable;  // May (or may not) be null.
   }

   public void assignWork(  String rpt, String data ) {
      ReportWorker worker = getAvailableWorker();
      while( worker == null ) {
         System.out.println("assignWork: No workers available, sleeping for "+polling_delay);
         try { Thread.sleep( polling_delay ); }
         catch( InterruptedException e ) { System.out.println("assignWork: sleep interrupted, ignoring exception "+e); }
         // any workers avaialble now?
         worker = getAvailableWorker();
      }
      ++assignedCnt;
      worker.initialize( rpt, data ); // or whatever else you need.
      System.out.println("assignment #"+assignedCnt+" given to "+worker);
      Thread t = new Thread( worker );
      t.start( ); // that is pretty much it, let it go.
   }

   public void waitForWorkToFinish() {
      int active = activeWorkerCount();
      while( active >= 1 ) {
         System.out.println("waitForWorkToFinish(): #active workers="+active+", waiting...");
         // wait a minute....
         try { Thread.sleep( polling_delay ); }
         catch( InterruptedException e ) { System.out.println("assignWork: sleep interrupted, ignoring exception "+e); }
         active = activeWorkerCount();
      }
   }
}

ReportWorker

public class ReportWorker implements Runnable {
      int test_delay = 10*1000; //sleep for 10 seconds.
      // (actual code would be generating PDF output)

      public enum StatusCodes { UNINITIALIZED,
          INITIALIZED,
          RUNNING,
          COMPLETE,
          ERROR };


      int id = -1;
      StatusCodes status = StatusCodes.UNINITIALIZED;
      boolean initialized = false;
      public String rpt = "";
      public String data = "";
      //Engine eng;
      //PDFExportThread pdfExporter;
      //DataSource_type cn;

      public boolean isInitialized() { return initialized; }
      public boolean isAvailable()   { return status == StatusCodes.UNINITIALIZED; }
      public boolean isRunning()     { return status == StatusCodes.RUNNING; }
      public boolean isComplete()    { return status == StatusCodes.COMPLETE; }
      public boolean hasError()      { return status == StatusCodes.ERROR; }


      public ReportWorker( int id ) {
          this.id = id;
      }

      public String toString( ) {
         return "ReportWorker."+id+"("+status+")/"+rpt+"/"+data;
      }

      // the example code doesn't make clear if there is a relationship between rpt & data[i].
      public void initialize( String rpt, String data /* data[i] in original code */  ) {
         try {
            this.rpt = rpt;
            this.data = data;
            /* uncomment this part where you have the various classes availble.
             * I have it commented out for testing.
            cn = ds.getConnection();   
            Engine eng = new Engine(Engine.EXPORT_PDF);
            eng.setReportFile(rpt); //rpt is the report name
            eng.setConnection(cn);
            eng.setPrompt(data, 0);
            ReportProperties repprop = eng.getReportProperties();
            repprop.setPaperOrient(ReportProperties.DEFAULT_PAPER_ORIENTATION, ReportProperties.PAPER_FANFOLD_US);
            */
            status = StatusCodes.INITIALIZED;
            initialized = true; // want this true even if we're running.
         } catch( Exception e ) {
            status = StatusCodes.ERROR;
            throw new RuntimeException("initialze(rpt="+rpt+", data="+data+")", e);
         }
      }

      public void run() {
         status = StatusCodes.RUNNING;
         System.out.println("run().BEGIN: "+this);
         try {
            // delay for testing.
            try { Thread.sleep( test_delay ); }
            catch( InterruptedException e ) { System.out.println(this+".run(): test interrupted, ignoring "+e); }
            /* uncomment this part where you have the various classes availble.
             * I have it commented out for testing.
            eng.execute();
            PDFExportThread pdfExporter = new PDFExportThread(eng, sFileName, sFilePath);
            pdfExporter.execute();
            */
            status = StatusCodes.COMPLETE;
            System.out.println("run().END: "+this);
         } catch( Exception e ) {
            System.out.println("run().ERROR: "+this);
            status = StatusCodes.ERROR;
            throw new RuntimeException("run(rpt="+rpt+", data="+data+")", e);
         }
      }

      public void teardown() {
         if( ! isInitialized() || isRunning() ) {
            System.out.println("Warning: ReportWorker.teardown() called but I am uninitailzied or running.");
            // should never happen, fatal enough to throw an exception?
         }

         /* commented out for testing.
           try { cn.close(); } 
           catch( Exception e ) { System.out.println("Warning: ReportWorker.teardown() ignoring error on connection close: "+e); }
           cn = null;
         */
         // any need to close things on eng?
         // any need to close things on pdfExporter?
      }
}
like image 38
jgreve Avatar answered Oct 13 '22 01:10

jgreve