Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

open tabs with phantomjs like real browser

PhantomJS is a headless WebKit browser. I can open a url with this and get content of a page that updates every second.

But I need to get the content of many (100) pages at the same time.

All pages must be opened concurrently and refresh every second.

It's possible for one page, but I don't know how to retrieve from multiple pages at once.

This is the example code from the PhantomJS website:

console.log('Loading a web page');
var page = require('webpage').create();
var url = 'http://www.phantomjs.org/';
page.open(url, function (status) {
  //Page is loaded!
  phantom.exit();
});

May I use many PhantomJS instances at one time ? I doesn't seem the best way. Does any body know how to open just one PhantomJS instance and get content from several pages?

like image 457
MOB Avatar asked Jan 12 '14 10:01

MOB


People also ask

What browser does PhantomJS use?

PhantomJS uses WebKit that has a similar browsing environment like the famous browsers – Google Chrome, Mozilla Firefox, Safari, etc.

Is PhantomJS headless browser?

PhantomJS is a discontinued headless browser used for automating web page interaction. PhantomJS provides a JavaScript API enabling automated navigation, screenshots, user behavior and assertions making it a common tool used to run browser-based unit tests in a headless system like a continuous integration environment.

Is PhantomJS fast?

PhantomJS is a headless web browser scriptable with JavaScript. It runs on Windows, macOS, Linux, and FreeBSD. Using QtWebKit as the back-end, it offers fast and native support for various web standards: DOM handling, CSS selector, JSON, Canvas, and SVG.


1 Answers

Here is the code, I used before to parse the items for the E-shop and putting HTML code for each page of these items

I hope that it will help you!

var RenderUrlsToFile, system, url_string_for_array;
var arrayOfUrls = new Array();

system = require("system");

RenderUrlsToFile = function(urls, callbackPerUrl, callbackFinal) {
var getFilename, next, page, retrieve, urlIndex, webpage, link_name, sex;

var fs = {};
fs = require('fs');

urlIndex = 0;
webpage = require("webpage");
page = null;
// getFilename = function() {
//     return "parsed/" + urlIndex + ".png";
// };
next = function(status, url, file) {
    page.close();
    callbackPerUrl(status, url, file);
    return retrieve();
};
retrieve = function() {
    var url;
    if (urls.length > 0) {
        url = urls.shift();
        urlIndex++;
        page = webpage.create();
        page.viewportSize = {
            width: 800,
            height: 600
        };
        page.settings.userAgent = "Phantom.js bot";
        return page.open("http://" + url, function(status) {
            var file;
            // file = getFilename();
            if (status === "success") {
                return window.setTimeout((function() {
                    // page.render(file);

                    var js = page.evaluate(function () {
                            return document;
                        });

                    fs.write('your_file_path'.html', js.all[0].outerHTML, 'w');

                    return next(status, url, file);
                }), 100);
            } else {
                return next(status, url, file);
            }
        });

    } else {
        return callbackFinal();
    }
};
return retrieve();
};

if (system.args.length > 1) {
arrayOfUrls = Array.prototype.slice.call(system.args, 1);
} else {

------------MAIN PART OF CODE FOR YOUR QUESTION------

For example: I need to parse the items on the E-shop, so I take the first page and then I set "for" for the exactly numbe of pages

url_string_for_array = "www.lamoda.ru/c/559/accs-muzhskieaksessuary/?genders=men&page=1";

for(var k=2; k<20; k++)
    {
        url_string_for_array += ",www.lamoda.ru/c/559/accs-muzhskieaksessuary/?genders=men&page="+k;
    }

arrayOfUrls = url_string_for_array.split(',');
}

RenderUrlsToFile(arrayOfUrls, (function(status, url, file) {
if (status !== "success") {
    return console.log("Unable to render '" + url + "'");
} else {
    return console.log("Rendered '" + url + "'");
}
}), function() {
return phantom.exit();
});
like image 154
Costa Rassco Avatar answered Oct 12 '22 22:10

Costa Rassco