Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

How to for loop in casperjs

I am trying to click a 'next' button N number of times and grab the page source each time. I understand that I can run an arbitrary function on the remote website, so instead of click() I just use the remote function nextPage() How do I run the following, an arbitrary number of times:

var casper = require('casper').create();

casper.start('http://www.example.com', function() {

    this.echo(this.getHTML());
    this.echo('-------------------------');

    var numTimes = 4, count = 2;

    casper.repeat(numTimes, function() {
        this.thenEvaluate(function() {
            nextPage(++count);
        });

        this.then(function() {
            this.echo(this.getHTML());
            this.echo('-------------------------');
        });
    });

});

'i' here is an index I tried to use in a javascript for loop.

So tl;dr: I want lick 'next', print pages source, click 'next', print page source, click 'next'... continue that N number of times.

like image 714
jpmorris Avatar asked Sep 16 '13 18:09

jpmorris


2 Answers

First, you can pass a value to the remote page context (i.e. to thenEvaluate function like this:

    this.thenEvaluate(function(remoteCount) {
        nextPage(remoteCount);
    }, ++count);

However, Casper#repeat might not be a good function to use here as the loop would NOT wait for each page load and then capture the content.

You may rather devise a event based chaining.

The work-flow of the code would be:

  1. Have a global variable (or at-least a variable accessible to the functions mentioned below) to store the count and the limit.

  2. listen to the load.finished event and grab the HTML here and then call the next page.

A simplified code can be:

var casper = require('casper').create();

var limit = 5, count = 1;

casper.on('load.finished', function (status) {
    if (status !== 'success') {
        this.echo ("Failed to load page.");
    }
    else {
        this.echo(this.getHTML());
        this.echo('-------------------------');
    }



    if(++count > limit) {
        this.echo ("Finished!");

    }
    else {
        this.evaluate(function(remoteCount) {
            nextPage(remoteCount);
            // [Edit the line below was added later]
            console.log(remoteCount);
            return remoteCount;
        }, count);

    }

});

casper.start('http://www.example.com').run();

NOTE: If you pages with high load of JS processes etc. you may also want to add a wait before calling the nextPage :

this.wait( 
   1000, // in ms
   function () {
        this.evaluate(function(remoteCount) {
            nextPage(remoteCount);
        }, count);
   }
);     

[EDIT ADDED] The following event listeners will help you debug.

// help is tracing page's console.log 
casper.on('remote.message', function(msg) { 
    console.log('[Remote Page] ' + msg); 
}); 

// Print out all the error messages from the web page 
casper.on("page.error", function(msg, trace) { 
    casper.echo("[Remote Page Error] " + msg, "ERROR"); 
    casper.echo("[Remote Error trace] " + JSON.stringify(trace, undefined, 4)); 
});
like image 172
sudipto Avatar answered Nov 14 '22 23:11

sudipto


You could try using Casper#repeat

This should do, for the most part, what you want:

var numTimes = 10, count = 1;

casper.repeat(numTimes, function() {
    this.thenEvaluate(function(count) {
        nextPage(count);
    }, ++count);

    this.then(function() {
        this.echo(this.getHTML());
        this.echo('-------------------------');
    });
});
like image 45
hexid Avatar answered Nov 14 '22 21:11

hexid