I'm using Node v0.10.11 on Ubuntu 12.04. I can't figure out what I'm missing to make streams of URLs work with the request module. This program is trying to go to a mailing list site, find the download links for each month, then download the pages for each month.
Mikael's readme says "The first argument can be either an url or an options object. The only required option is URI, all others are optional. uri || url - fully qualified uri or a parsed url object from url.parse()"
If I call url.parse(www.targeturl.com)
, I get
Error: options.uri is a required argument
If I don't use url.parse
, I get
Error: Invalid URI "www.freelists.org/archive/si-list/06-2013"
(this link works perfectly fine in my browsers)
I've cut the code down to 42 lines. Any advice welcome
var request = require('request'),
url = require('url'),
stream = require('stream'),
cheerio = require('cheerio'), // a reduced jQuery style DOM library
Transform = require('stream').Transform
var DomStripStream = function(target) {
this.target = target;
stream.Transform.call(this,{objectMode: true});
}
DomStripStream.prototype = Object.create(
Transform.prototype, {constructor: {value: DomStripStream}}
)
DomStripStream.prototype.write = function () {
this._transform.apply(this, arguments);
};
DomStripStream.prototype.end = function () {
this._transform.apply(this, arguments);
this.emit("end");
};
DomStripStream.prototype._transform = function(chunk, encoding, callback) {
chunk = chunk ? chunk.toString() : "";
$ = cheerio.load(chunk);
domLinks = $(this.target);
$(domLinks).each(function (i, link) {
currLink = 'www.freelists.org' + $(link).attr('href')
// currLink = url.parse(currLink)
request(currLink, function (error, response, body) {
console.log(error);
})
});
}
var fs = require("fs"),
output = fs.createWriteStream("out.txt"),
mainPage = new DomStripStream('td a')
request('http://www.freelists.org/archive/si-list').
pipe(mainPage).
pipe(output);
add http:// or https:// in the url
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With