Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

How can I wait for the page to be ready in PhantomJS?

I'm using PhantomJS to log into a site an do something. The site used OAuth for logging in. Clicking on the "Login" button on the, takes you to the OAuth service. There you enter your credentials and clicking "Submit", you get redirected back to the original site. My script works fine but relies on timeouts which doesn't seem too robust.

How can I rewrite this code so that instead of using setTimeout, I can wait until the page is ready. I often see errors that the page isnt' ready and therefore jQuery isn't initialized.

I'm not too good with Javascript so an example would be helpful. This is what I've hacked together after a ton of Googling. Here's my code:

var page = require('webpage').create();
var system = require('system');

page.settings.resourceTimeout = 10000;
page.onResourceTimeout = function(e) {
  console.log("Timed out loading resource " + e.url);
};

page.open('https://mysite.com/login', function(status) {
    if (status !== 'success') {
        console.log('Error opening url');
        phantom.exit(1);
    } else {
        setTimeout(function() {
            console.log('Successfully loaded page');
            page.evaluate(function() {
                $("#submit-field").click(); //Clicking the login button
            });

            console.log('Clicked login with OAuth button');
            setTimeout(function() {
                console.log('Addding the credentials');
                page.evaluate(function() {                
                    document.getElementById("username").value = '[email protected]';
                    document.getElementById("password").value = 'P@ssw0rd';
                    document.getElementById("Login").click();
                });
                console.log('Clicked login button');

                setTimeout(function() {
                    //Inject some jQuery into the page and invoke that here
                    console.log('Clicked the export button');
                }, 15000);
            }, 15000);
        });
    }
});
like image 732
Mridang Agarwalla Avatar asked Jun 10 '14 14:06

Mridang Agarwalla


2 Answers

It seems that the only way to do this was to use callbacks from the DOM to PhantomJS.

var page = require('webpage').create();
var system = require('system');

page.onInitialized = function() {
    page.onCallback = function(data) {
        console.log('Main page is loaded and ready');
        //Do whatever here
    };

    page.evaluate(function() {
        document.addEventListener('DOMContentLoaded', function() {
            window.callPhantom();
        }, false);
        console.log("Added listener to wait for page ready");
    });

};

page.open('https://www.google.com', function(status) {});
like image 134
Mridang Agarwalla Avatar answered Nov 08 '22 02:11

Mridang Agarwalla


An alternate method would be to extend the phantomjs waitfor.js example.

I use this personnal blend of method. This is my main.js file:

'use strict';

var wasSuccessful = phantom.injectJs('./lib/waitFor.js');
var page = require('webpage').create();

page.open('http://foo.com', function(status) {
  if (status === 'success') {
    page.includeJs('https://cdnjs.cloudflare.com/ajax/libs/jquery/3.1.1/jquery.min.js', function() {
      waitFor(function() {
        return page.evaluate(function() {
          if ('complete' === document.readyState) {
            return true;
          }

          return false;
        });
      }, function() {
        var fooText = page.evaluate(function() {
          return $('#foo').text();
        });

        phantom.exit();
      });
    });
  } else {
    console.log('error');
    phantom.exit(1);
  }
});

And the lib/waitFor.js file (which is just a copy and paste of the waifFor() function from the phantomjs waitfor.js example):

function waitFor(testFx, onReady, timeOutMillis) {
    var maxtimeOutMillis = timeOutMillis ? timeOutMillis : 3000, //< Default Max Timout is 3s
        start = new Date().getTime(),
        condition = false,
        interval = setInterval(function() {
            if ( (new Date().getTime() - start < maxtimeOutMillis) && !condition ) {
                // If not time-out yet and condition not yet fulfilled
                condition = (typeof(testFx) === "string" ? eval(testFx) : testFx()); //< defensive code
            } else {
                if(!condition) {
                    // If condition still not fulfilled (timeout but condition is 'false')
                    console.log("'waitFor()' timeout");
                    phantom.exit(1);
                } else {
                    // Condition fulfilled (timeout and/or condition is 'true')
                    // console.log("'waitFor()' finished in " + (new Date().getTime() - start) + "ms.");
                    typeof(onReady) === "string" ? eval(onReady) : onReady(); //< Do what it's supposed to do once the condi>
                    clearInterval(interval); //< Stop this interval
                }
            }
        }, 250); //< repeat check every 250ms
}

This method is not asynchronous but at least am I assured that all the resources were loaded before I try using them.

like image 38
Daishi Avatar answered Nov 08 '22 02:11

Daishi