I have the following code:
var casper = require("casper").create({
// verbose: true,
// logLevel: "debug",
webSecurityEnabled : false
});
var links = [];
function get_links(obj) {
return obj.evaluate(function () {
var i,
l = document.querySelectorAll("a"),
l2 = [];
for (i = 0; i < l.length; i++) {
l2[i] = l[i].href;
}
return l2
});
}
function unique(arr) {
var obj = {};
for (var i = 0; i < arr.length; i++) {
if (/http(.*)?/.test(arr[i])) {
var str = arr[i];
obj[str] = true;
}
}
return Object.keys(obj);
}
function getLinksFromIframes(callback) {
this.echo("Here we come: " + this.getCurrentUrl() + "\n");
function to_frame(obj) {
var iframes = to_evaluate(obj);
iframes.forEach(function (index) {
this.withFrame(index, function () {
this.echo("We are here: " + this.getCurrentUrl());
var l = unique(get_links(this));
var i;
for (i = 0; i < l.length; i++) {
console.log(l[i]);
links.push(l[i])
}
links = unique(links);
console.log("");
to_frame(this)
});
}, obj);
}
function to_evaluate(obj) {
return obj.evaluate(function () {
var iframes = [];
[].forEach.call(document.querySelectorAll("iframe"), function (iframe, i) {
iframes.push(i);
});
return iframes;
})
}
to_frame(this);
this.then(function () {
callback.call(this);
});
}
casper.start("http://domu-test-2/node/1", function () {
getLinksFromIframes.call(this, function () {
console.log("Done!\n");
var i;
for (i = 0; i < links.length; i++) {
console.log(links[i]);
}
});
}).then(function () {}).run();
And now the question is:
if I want get links by the first-level iframe, how should I refactor the getLinksFromIframes()
function. Currently they share a 'global' variable links
. I think definitely the links
will be list of link list and initialize new list within withFrame
function, then pass this new reference to child iframes. So how should I pass it and 'backtrace' all links in nested iframes?
But with the attribute on the link (the element or elements), you can specify where the links will open. The first step is to give your iframe a unique name with the name attribute. Then, it's a matter of pointing your links at that frame using the ID as the value of the target attribute:
When you set the target attribute to _parent, the link will open in the web page that is holding the iframe. In most situations with iframes, this target will open links in the same way that the _parent target does.
But if there is an iframe inside an iframe, the _top target opens links in the highest-level window in the series, removing all the iframes. The most commonly used target, this opens the link in an entirely new window, similar to a popup. When you build a web page with iframes, it's a good idea to give each one a specific name.
DigitalOcean joining forces with CSS-Tricks! Special welcome offer: get $100 of free credit . The other day I posted an image, quite literally as a thought exercise, about how you might accomplish “nested” links. That is, a big container that is linked to one URL that contains a smaller container or text link inside of it that goes to another URL.
If I get it correctly, you want to select which iframe from you get the links and only from that iframe. If that's the case, then you can simple use switchToChildFrame
to switch to the desired iframe and then just get the links by calling get_links(obj)
.
My example has 3 pages. An index.html
that load iframe1.html
inside an iframe and iframe1.html
has another iframe inside that loads iframe2.html
. Each file has 3 links inside:
index.html
<a href="link1/from/index">Link 1 from index</a>
<a href="link2/from/index">Link 2 from index</a>
<a href="link3/from/index">Link 3 from index</a>
<iframe src="iframe1.html"></iframe>
iframe1.html
<a href="link1/from/iframe1">Link 1 from iframe 1</a>
<a href="link2/from/iframe1">Link 2 from iframe 1</a>
<a href="link3/from/iframe1">Link 3 from iframe 1</a>
<iframe src="iframe2.html"></iframe>
iframe2.html
<a href="link1/from/iframe2">Link 1 from iframe 2</a>
<a href="link2/from/iframe2">Link 2 from iframe 2</a>
<a href="link3/from/iframe2">Link 3 from iframe 2</a>
and the refactored getLinksFromIframes
function would be like this:
function getLinksFromIframes(callback) {
this.echo("Here we come: " + this.getCurrentUrl() + "\n");
function to_frame(obj) {
obj.echo("We are here: " + obj.getCurrentUrl());
var l = unique(get_links(obj));
var i;
for (i = 0; i < l.length; i++) {
console.log(l[i]);
links.push(l[i])
}
links = unique(links);
console.log("");
}
function to_evaluate(obj) {
return obj.evaluate(function () {
var iframes = [];
[].forEach.call(document.querySelectorAll("iframe"), function (iframe, i) {
iframes.push(i);
});
return iframes;
})
}
// Leave both switchToChildFrame as comments to get the "index.html" links
this.page.switchToChildFrame(0); // Uncomment to get the links of "iframe1.html"
//this.page.switchToChildFrame(0); // Uncomment to get the links of "iframe2.html"
to_frame(this);
this.then(function () {
callback.call(this);
});
}
RESULTS
If you comment both switchToChildFrame
you'll get the links of the index.html
:
casperjs caspers-read-iframes.js
Here we come: http://pjs.lytrax.net/node/1/
We are here: http://pjs.lytrax.net/node/1/
http://pjs.lytrax.net/node/1/link1/from/index
http://pjs.lytrax.net/node/1/link2/from/index
http://pjs.lytrax.net/node/1/link3/from/index
Done!
http://pjs.lytrax.net/node/1/link1/from/index
http://pjs.lytrax.net/node/1/link2/from/index
http://pjs.lytrax.net/node/1/link3/from/index
If you uncomment the first switchToChildFrame
, you'll get the links of the first level iframe1.html
:
casperjs caspers-read-iframes.js
Here we come: http://pjs.lytrax.net/node/1/
We are here: http://pjs.lytrax.net/node/1/iframe1.html
http://pjs.lytrax.net/node/1/link1/from/iframe1
http://pjs.lytrax.net/node/1/link2/from/iframe1
http://pjs.lytrax.net/node/1/link3/from/iframe1
Done!
http://pjs.lytrax.net/node/1/link1/from/iframe1
http://pjs.lytrax.net/node/1/link2/from/iframe1
http://pjs.lytrax.net/node/1/link3/from/iframe1
And if you uncomment both the first and the second switchToChildFrame
, you'll get the links of the second level iframe2.html
:
casperjs caspers-read-iframes.js
Here we come: http://pjs.lytrax.net/node/1/
We are here: http://pjs.lytrax.net/node/1/iframe2.html
http://pjs.lytrax.net/node/1/link1/from/iframe2
http://pjs.lytrax.net/node/1/link2/from/iframe2
http://pjs.lytrax.net/node/1/link3/from/iframe2
Done!
http://pjs.lytrax.net/node/1/link1/from/iframe2
http://pjs.lytrax.net/node/1/link2/from/iframe2
http://pjs.lytrax.net/node/1/link3/from/iframe2
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With