Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Group nested links by first-level iframe using casperjs

I have the following code:

var casper = require("casper").create({
        //  verbose: true,
        //    logLevel: "debug",
        webSecurityEnabled : false
    });
var links = [];
function get_links(obj) {
    return obj.evaluate(function () {
        var i,
            l = document.querySelectorAll("a"),
            l2 = [];
        for (i = 0; i < l.length; i++) {
            l2[i] = l[i].href;
        }
        return l2
    });
}
function unique(arr) {
    var obj = {};
    for (var i = 0; i < arr.length; i++) {
        if (/http(.*)?/.test(arr[i])) {
            var str = arr[i];
            obj[str] = true;
        }
    }
    return Object.keys(obj);
}

function getLinksFromIframes(callback) {
    this.echo("Here we come: " + this.getCurrentUrl() + "\n");
    function to_frame(obj) {
        var iframes = to_evaluate(obj);
        iframes.forEach(function (index) {
            this.withFrame(index, function () {
                this.echo("We are here: " + this.getCurrentUrl());
                var l = unique(get_links(this));
                var i;
                for (i = 0; i < l.length; i++) {
                    console.log(l[i]);
                    links.push(l[i])
                }
                links = unique(links);
                console.log("");
                to_frame(this)
            });
        }, obj);
    }
    function to_evaluate(obj) {
        return obj.evaluate(function () {
            var iframes = [];
            [].forEach.call(document.querySelectorAll("iframe"), function (iframe, i) {
                iframes.push(i);
            });
            return iframes;
        })
    }
    to_frame(this);
    this.then(function () {
        callback.call(this);
    });
}

casper.start("http://domu-test-2/node/1", function () {
    getLinksFromIframes.call(this, function () {
        console.log("Done!\n");
        var i;
        for (i = 0; i < links.length; i++) {
            console.log(links[i]);
        }
    });
}).then(function () {}).run();

And now the question is:

if I want get links by the first-level iframe, how should I refactor the getLinksFromIframes() function. Currently they share a 'global' variable links. I think definitely the links will be list of link list and initialize new list within withFrame function, then pass this new reference to child iframes. So how should I pass it and 'backtrace' all links in nested iframes?

like image 814
Yijin Avatar asked Dec 10 '16 20:12

Yijin


People also ask

How to set the target of an iFrame to open links?

But with the attribute on the link (the element or elements), you can specify where the links will open. The first step is to give your iframe a unique name with the name attribute. Then, it's a matter of pointing your links at that frame using the ID as the value of the target attribute:

What is the_parent attribute in iFrames?

When you set the target attribute to _parent, the link will open in the web page that is holding the iframe. In most situations with iframes, this target will open links in the same way that the _parent target does.

What is the_top target in iFrames?

But if there is an iframe inside an iframe, the _top target opens links in the highest-level window in the series, removing all the iframes. The most commonly used target, this opens the link in an entirely new window, similar to a popup. When you build a web page with iframes, it's a good idea to give each one a specific name.

What is a “nested link?

DigitalOcean joining forces with CSS-Tricks! Special welcome offer: get $100 of free credit . The other day I posted an image, quite literally as a thought exercise, about how you might accomplish “nested” links. That is, a big container that is linked to one URL that contains a smaller container or text link inside of it that goes to another URL.


1 Answers

If I get it correctly, you want to select which iframe from you get the links and only from that iframe. If that's the case, then you can simple use switchToChildFrame to switch to the desired iframe and then just get the links by calling get_links(obj).

My example has 3 pages. An index.html that load iframe1.html inside an iframe and iframe1.html has another iframe inside that loads iframe2.html. Each file has 3 links inside:

index.html

<a href="link1/from/index">Link 1 from index</a>
<a href="link2/from/index">Link 2 from index</a>
<a href="link3/from/index">Link 3 from index</a>

<iframe src="iframe1.html"></iframe>

iframe1.html

<a href="link1/from/iframe1">Link 1 from iframe 1</a>
<a href="link2/from/iframe1">Link 2 from iframe 1</a>
<a href="link3/from/iframe1">Link 3 from iframe 1</a>

<iframe src="iframe2.html"></iframe>

iframe2.html

<a href="link1/from/iframe2">Link 1 from iframe 2</a>
<a href="link2/from/iframe2">Link 2 from iframe 2</a>
<a href="link3/from/iframe2">Link 3 from iframe 2</a>

and the refactored getLinksFromIframes function would be like this:

function getLinksFromIframes(callback) {
    this.echo("Here we come: " + this.getCurrentUrl() + "\n");

    function to_frame(obj) {
        obj.echo("We are here: " + obj.getCurrentUrl());
        var l = unique(get_links(obj));
        var i;
        for (i = 0; i < l.length; i++) {
            console.log(l[i]);
            links.push(l[i])
        }
        links = unique(links);
        console.log("");
    }
    function to_evaluate(obj) {
        return obj.evaluate(function () {
            var iframes = [];
            [].forEach.call(document.querySelectorAll("iframe"), function (iframe, i) {
                iframes.push(i);
            });
            return iframes;
        })
    }

    // Leave both switchToChildFrame as comments to get the "index.html" links
    this.page.switchToChildFrame(0); // Uncomment to get the links of "iframe1.html"
    //this.page.switchToChildFrame(0); // Uncomment to get the links of "iframe2.html"
    to_frame(this);

    this.then(function () {
        callback.call(this);
    });
}

RESULTS

If you comment both switchToChildFrame you'll get the links of the index.html:

casperjs caspers-read-iframes.js
Here we come: http://pjs.lytrax.net/node/1/

We are here: http://pjs.lytrax.net/node/1/
http://pjs.lytrax.net/node/1/link1/from/index
http://pjs.lytrax.net/node/1/link2/from/index
http://pjs.lytrax.net/node/1/link3/from/index

Done!

http://pjs.lytrax.net/node/1/link1/from/index
http://pjs.lytrax.net/node/1/link2/from/index
http://pjs.lytrax.net/node/1/link3/from/index

If you uncomment the first switchToChildFrame, you'll get the links of the first level iframe1.html:

casperjs caspers-read-iframes.js
Here we come: http://pjs.lytrax.net/node/1/

We are here: http://pjs.lytrax.net/node/1/iframe1.html
http://pjs.lytrax.net/node/1/link1/from/iframe1
http://pjs.lytrax.net/node/1/link2/from/iframe1
http://pjs.lytrax.net/node/1/link3/from/iframe1

Done!

http://pjs.lytrax.net/node/1/link1/from/iframe1
http://pjs.lytrax.net/node/1/link2/from/iframe1
http://pjs.lytrax.net/node/1/link3/from/iframe1

And if you uncomment both the first and the second switchToChildFrame, you'll get the links of the second level iframe2.html:

casperjs caspers-read-iframes.js
Here we come: http://pjs.lytrax.net/node/1/

We are here: http://pjs.lytrax.net/node/1/iframe2.html
http://pjs.lytrax.net/node/1/link1/from/iframe2
http://pjs.lytrax.net/node/1/link2/from/iframe2
http://pjs.lytrax.net/node/1/link3/from/iframe2

Done!

http://pjs.lytrax.net/node/1/link1/from/iframe2
http://pjs.lytrax.net/node/1/link2/from/iframe2
http://pjs.lytrax.net/node/1/link3/from/iframe2
like image 52
Christos Lytras Avatar answered Sep 21 '22 21:09

Christos Lytras