During my NodeJS learning journey I found this sample code in a book (NodeJS in Practice) which uses streams to find some matches in data coming from another stream.
var Writable = require('stream').Writable;
var util = require('util');
module.exports = CountStream;
util.inherits(CountStream, Writable);
function CountStream(matchText, options) {
Writable.call(this, options);
this.count = 0;
this.matcher = new RegExp(matchText, 'ig');
}
CountStream.prototype._write = function(chunk, encoding, cb) {
var matches = chunk.toString().match(this.matcher);
if (matches) {
this.count += matches.length;
}
cb();
};
CountStream.prototype.end = function() {
this.emit('total', this.count);
};
And the code which uses the stream:
var CountStream = require('./countstream');
var countStream = new CountStream('book');
var http = require('http');
http.get('http://www.manning.com', function(res) {
res.pipe(countStream);
});
countStream.on('total', function(count) {
console.log('Total matches:', count);
});
Isn't it possible to lose some matches, if a match breaks in two chunks of data?
For example first chunk of data contain 'This a bo' and the other chunk contains 'ok of mine.' which no one has not the book independently but the whole data contains a book.
What would be the best solution to find all matches?
So, Like I explain in my comments, if you know the max length of strings matched by your regex (to compute the max length, see the very good answer at https://stackoverflow.com/a/31173778/4114922), you could cache the previous chunk and concatenate it to the new chunk. With this method, I think you're not going to lose any match.
var Writable = require('stream').Writable;
var util = require('util');
module.exports = CountStream;
util.inherits(CountStream, Writable);
function CountStream(matchText, maxPatternLength, options) {
Writable.call(this, options);
this.count = 0;
this.matcher = new RegExp(matchText, 'ig');
this.previousCache = undefined;
this.maxPatternLength = maxPatternLength;
}
CountStream.prototype._write = function(chunk, encoding, cb) {
var text;
if(this.previousCache === undefined) {
text = chunk.toString();
}
else {
text = this.previousCache + chunk.toString();
}
var matches = text.match(this.matcher);
if (matches) {
this.count += matches.length;
}
this.previousCache = text.substring(text.length - this.maxPatternLength);
cb();
};
CountStream.prototype.end = function() {
this.emit('total', this.count);
};
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With