admin管理员组

文章数量:1410682

PhantomJS is a headless WebKit browser. I can open a url with this and get content of a page that updates every second.

But I need to get the content of many (100) pages at the same time.

All pages must be opened concurrently and refresh every second.

It's possible for one page, but I don't know how to retrieve from multiple pages at once.

This is the example code from the PhantomJS website:

console.log('Loading a web page');
var page = require('webpage').create();
var url = '/';
page.open(url, function (status) {
  //Page is loaded!
  phantom.exit();
});

May I use many PhantomJS instances at one time ? I doesn't seem the best way. Does any body know how to open just one PhantomJS instance and get content from several pages?

PhantomJS is a headless WebKit browser. I can open a url with this and get content of a page that updates every second.

But I need to get the content of many (100) pages at the same time.

All pages must be opened concurrently and refresh every second.

It's possible for one page, but I don't know how to retrieve from multiple pages at once.

This is the example code from the PhantomJS website:

console.log('Loading a web page');
var page = require('webpage').create();
var url = 'http://www.phantomjs/';
page.open(url, function (status) {
  //Page is loaded!
  phantom.exit();
});

May I use many PhantomJS instances at one time ? I doesn't seem the best way. Does any body know how to open just one PhantomJS instance and get content from several pages?

Share Improve this question edited Jan 12, 2014 at 10:57 Unsigned 9,9464 gold badges45 silver badges75 bronze badges asked Jan 12, 2014 at 10:03 MOBMOB 8532 gold badges13 silver badges28 bronze badges 4
  • Can you create more page instances? Perhaps an array? – Unsigned Commented Jan 12, 2014 at 10:05
  • im not sure ... beacuse page must opened for long time ... also im a beginer in javascript .. can explane me ? – MOB Commented Jan 12, 2014 at 10:08
  • @Unsigned can show me some example ? – MOB Commented Jan 12, 2014 at 10:24
  • You may not actually be able to pull pages in parallel reliably, as it appears that PhantomJS pages share the same network manager, causing the finished signals to overlap and bee meaningless. If you aren't aware of arrays yet, this might be a bit advanced for a learning project. Google "javascript array" for details. – Unsigned Commented Jan 12, 2014 at 10:50
Add a ment  | 

1 Answer 1

Reset to default 4

Here is the code, I used before to parse the items for the E-shop and putting HTML code for each page of these items

I hope that it will help you!

var RenderUrlsToFile, system, url_string_for_array;
var arrayOfUrls = new Array();

system = require("system");

RenderUrlsToFile = function(urls, callbackPerUrl, callbackFinal) {
var getFilename, next, page, retrieve, urlIndex, webpage, link_name, sex;

var fs = {};
fs = require('fs');

urlIndex = 0;
webpage = require("webpage");
page = null;
// getFilename = function() {
//     return "parsed/" + urlIndex + ".png";
// };
next = function(status, url, file) {
    page.close();
    callbackPerUrl(status, url, file);
    return retrieve();
};
retrieve = function() {
    var url;
    if (urls.length > 0) {
        url = urls.shift();
        urlIndex++;
        page = webpage.create();
        page.viewportSize = {
            width: 800,
            height: 600
        };
        page.settings.userAgent = "Phantom.js bot";
        return page.open("http://" + url, function(status) {
            var file;
            // file = getFilename();
            if (status === "success") {
                return window.setTimeout((function() {
                    // page.render(file);

                    var js = page.evaluate(function () {
                            return document;
                        });

                    fs.write('your_file_path'.html', js.all[0].outerHTML, 'w');

                    return next(status, url, file);
                }), 100);
            } else {
                return next(status, url, file);
            }
        });

    } else {
        return callbackFinal();
    }
};
return retrieve();
};

if (system.args.length > 1) {
arrayOfUrls = Array.prototype.slice.call(system.args, 1);
} else {

------------MAIN PART OF CODE FOR YOUR QUESTION------

For example: I need to parse the items on the E-shop, so I take the first page and then I set "for" for the exactly numbe of pages

url_string_for_array = "www.lamoda.ru/c/559/accs-muzhskieaksessuary/?genders=men&page=1";

for(var k=2; k<20; k++)
    {
        url_string_for_array += ",www.lamoda.ru/c/559/accs-muzhskieaksessuary/?genders=men&page="+k;
    }

arrayOfUrls = url_string_for_array.split(',');
}

RenderUrlsToFile(arrayOfUrls, (function(status, url, file) {
if (status !== "success") {
    return console.log("Unable to render '" + url + "'");
} else {
    return console.log("Rendered '" + url + "'");
}
}), function() {
return phantom.exit();
});

本文标签: javascriptopen tabs with phantomjs like real browserStack Overflow