Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

How to render whole pdf document using pdf.js library?

I tried rendering PDF document using pdf.js library. I know only basics in javascript and I am new to promises, so at first I followed advice on this page: Render .pdf to single Canvas using pdf.js and ImageData (2. answer).
But as a result, I rendered my document with all pages blank. All pictures and colors are fine, but not even a line of text. I also tried some other tutorials, but either I get the same result, or the document is completely missing. Right now, my code looks like this: (It's almost identical to the tutorial)

function loadPDFJS(pid, pageUrl){

    PDFJS.disableWorker = true;
    PDFJS.workerSrc = 'pdfjs/build/pdf.worker.js';

    var canvas = document.createElement('canvas');
    var ctx = canvas.getContext('2d');
    var pages = [];
    var currentPage = 1;

    var url = '/search/nimg/IMG_FULL/' + pid + '#page=1';

    PDFJS.getDocument(url).then(function (pdf) {

        if(currentPage <= pdf.numPages) getPage();

        function getPage() {
            pdf.getPage(currentPage).then(function(page){
                var scale = 1.5;
                var viewport = page.getViewport(scale);

                canvas.height = viewport.height;
                canvas.width = viewport.width;

                var renderContext = {
                    canvasContext: ctx,
                    viewport: viewport
                };

                page.render(renderContext).then(function() {
                    pages.push(canvas.toDataURL());
                    if(currentPage < pdf.numPages) {
                        currentPage++;
                        getPage();
                    } else {
                        done();
                    }
                });
            });
        }
    });

    function done() {
        for(var i = 0; i < pages.length; i++){
            drawPage(i, addPage);
        }
    }

    function addPage(img){
        document.body.appendChild(img);
    }

    function drawPage(index, callback){
        var img = new Image;
        img.onload = function() {
            ctx.drawImage(this, 0, 0, ctx.canvas.width, ctx.canvas.height);
            callback(this);
        }
        img.src = pages[index];
    }
}
like image 605
Martina Avatar asked Aug 06 '14 14:08

Martina


People also ask

Can you render a PDF file?

This being the case, all PDF viewers must be built on top of rendering functionality, although PDFs can also be rendered for printing or saving in the desired image format.

How is PDF rendered?

With PDF. js, PDFs are downloaded via AJAX and rendered in a <canvas> element using native drawing commands. To improve performance, a lot of the processing work happens in a web worker, where the work of the core layer usually takes place.


3 Answers

K so I just looked at my code again and I started all over. I made it simpler and I finally got it to work. Now it looks like this:

var canvasContainer = document.getElementById('pdfImageImg');
function loadPDFJS(pid, pageUrl){

    PDFJS.workerSrc = 'pdfjs/build/pdf.worker.js';

    var currentPage = 1;
    var pages = [];
    var url = '/search/nimg/IMG_FULL/' + pid + '#page=1';

    PDFJS.getDocument(url).then(function(pdf) {
            pdf.getPage(currentPage).then(renderPage);

    function renderPage(page) {
        var height = 700;
        var viewport = page.getViewport(1);
        var scale = height / viewport.height;
        var scaledViewport = page.getViewport(scale);

        var canvas = document.createElement('canvas');
        var context = canvas.getContext('2d');
        canvas.height = scaledViewport.height;
        canvas.width = scaledViewport.width;

        var renderContext = {
            canvasContext: context,
            viewport: scaledViewport
        };
        page.render(renderContext).then(function () {
            if(currentPage < pdf.numPages) {
                pages[currentPage] = canvas;
                currentPage++;
                pdf.getPage(currentPage).then(renderPage);
            } else {
                for (var i = 1; i < pages.length; i++) {
                    document.getElementById('pdfImageImg').appendChild(pages[i]);
                }
            }
        });
    }

    });
}
like image 61
Martina Avatar answered Nov 09 '22 08:11

Martina


Thank you @user3913960, your concept worked for me. I found some issues in your code which I fixed. Here is the code:

function loadPDFJS(pageUrl) {
    PDFJS.workerSrc = 'resources/js/pdfjs/pdf.worker.js';
    var currentPage = 1;
    var pages = [];
    var globalPdf = null;
    var container = document.getElementById('pdf-container');
    function renderPage(page) {
        //
        // Prepare canvas using PDF page dimensions
        //
        var canvas = document.createElement('canvas');
        // Link: http://stackoverflow.com/a/13039183/1577396
        // Canvas width should be set to the window's width for appropriate
        // scaling factor of the document with respect to the canvas width
        var viewport = page.getViewport(window.screen.width / page.getViewport(1.0).width);
        // append the created canvas to the container
        container.appendChild(canvas);
        // Get context of the canvas
        var context = canvas.getContext('2d');
        canvas.height = viewport.height;
        canvas.width = viewport.width;
        //
        // Render PDF page into canvas context
        //
        var renderContext = {
            canvasContext: context,
            viewport: viewport
        };
        page.render(renderContext).then(function () {
            if (currentPage < globalPdf.numPages) {
                pages[currentPage] = canvas;
                currentPage++;
                globalPdf.getPage(currentPage).then(renderPage);
            } else {
                // Callback function here, which will trigger when all pages are loaded
            }
        });
    }
    PDFJS.getDocument(pageUrl).then(function (pdf) {
        if(!globalPdf){
            globalPdf = pdf;
        }
        pdf.getPage(currentPage).then(renderPage);
    });
}
loadPDFJS("somepdffilenamehere.pdf");
like image 38
Mr_Green Avatar answered Nov 09 '22 10:11

Mr_Green


The pdfjs-dist library contains parts for building PDF viewer. You can use PDFPageView to render all pages. Based on https://github.com/mozilla/pdf.js/blob/master/examples/components/pageviewer.html :

var url = "https://cdn.mozilla.net/pdfjs/tracemonkey.pdf";
var container = document.getElementById('container');
// Load document
PDFJS.getDocument(url).then(function (doc) {
  var promise = Promise.resolve();
  for (var i = 0; i < doc.numPages; i++) {
    // One-by-one load pages
    promise = promise.then(function (id) {
      return doc.getPage(id + 1).then(function (pdfPage) {
// Add div with page view.
var SCALE = 1.0; 
var pdfPageView = new PDFJS.PDFPageView({
      container: container,
      id: id,
      scale: SCALE,
      defaultViewport: pdfPage.getViewport(SCALE),
      // We can enable text/annotations layers, if needed
      textLayerFactory: new PDFJS.DefaultTextLayerFactory(),
      annotationLayerFactory: new PDFJS.DefaultAnnotationLayerFactory()
    });
    // Associates the actual page with the view, and drawing it
    pdfPageView.setPdfPage(pdfPage);
    return pdfPageView.draw();        
      });
    }.bind(null, i));
  }
  return promise;
});
#container > *:not(:first-child) {
  border-top: solid 1px black; 
}
<link href="https://npmcdn.com/pdfjs-dist/web/pdf_viewer.css" rel="stylesheet"/>
<script src="https://npmcdn.com/pdfjs-dist/web/compatibility.js"></script>
<script src="https://npmcdn.com/pdfjs-dist/build/pdf.js"></script>
<script src="https://npmcdn.com/pdfjs-dist/web/pdf_viewer.js"></script>

<div id="container" class="pdfViewer singlePageView"></div>

See also How to display whole PDF (not only one page) with PDF.JS?

like image 44
async5 Avatar answered Nov 09 '22 10:11

async5