var http = require('http');
var urlOpts = {host: 'www.nodejs.org', path: '/', port: '80'};
http.get(urlOpts, function (response) {
response.on('data', function (chunk) {
var str=chunk.toString();
var re = new RegExp("(<\s*title[^>]*>(.+?)<\s*/\s*title)\>", "g")
console.log(str.match(re));
});
});
Output
user@dev ~ $ node app.js [ 'node.js' ] null null
I only need to get the title.
I would suggest using RegEx.exec
instead of String.match
. You can also define the regular expression using the literal syntax, and only once:
var http = require('http');
var urlOpts = {host: 'www.nodejs.org', path: '/', port: '80'};
var re = /(<\s*title[^>]*>(.+?)<\s*\/\s*title)>/gi;
http.get(urlOpts, function (response) {
response.on('data', function (chunk) {
var str=chunk.toString();
var match = re.exec(str);
if (match && match[2]) {
console.log(match[2]);
}
});
});
The code also assumes that the title
will be completely in one chunk, and not split between two chunks. It would probably be best to keep an aggregation of chunks, in case the title
is split between chunks. You may also want to stop looking for the title
once you've found it.
Try this:
var re = new RegExp("<title>(.*?)</title>", "i");
console.log(str.match(re)[1]);
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With