Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Is it possible for this code to lose some matches?

During my NodeJS learning journey I found this sample code in a book (NodeJS in Practice) which uses streams to find some matches in data coming from another stream.

var Writable = require('stream').Writable;
var util = require('util');
module.exports = CountStream;
util.inherits(CountStream, Writable);

function CountStream(matchText, options) {
    Writable.call(this, options);
    this.count = 0;
    this.matcher = new RegExp(matchText, 'ig');
}

CountStream.prototype._write = function(chunk, encoding, cb) {
    var matches = chunk.toString().match(this.matcher);
    if (matches) {
        this.count += matches.length;
    }
    cb();
};

CountStream.prototype.end = function() {
    this.emit('total', this.count);
};

And the code which uses the stream:

var CountStream = require('./countstream');
var countStream = new CountStream('book');
var http = require('http');

http.get('http://www.manning.com', function(res) {
    res.pipe(countStream);
});

countStream.on('total', function(count) {
    console.log('Total matches:', count);
});

Isn't it possible to lose some matches, if a match breaks in two chunks of data?

For example first chunk of data contain 'This a bo' and the other chunk contains 'ok of mine.' which no one has not the book independently but the whole data contains a book.

What would be the best solution to find all matches?

like image 210
mehrandvd Avatar asked Jun 28 '15 12:06

mehrandvd


1 Answers

So, Like I explain in my comments, if you know the max length of strings matched by your regex (to compute the max length, see the very good answer at https://stackoverflow.com/a/31173778/4114922), you could cache the previous chunk and concatenate it to the new chunk. With this method, I think you're not going to lose any match.

var Writable = require('stream').Writable;
var util = require('util');
module.exports = CountStream;
util.inherits(CountStream, Writable);

function CountStream(matchText, maxPatternLength, options) {
    Writable.call(this, options);
    this.count = 0;
    this.matcher = new RegExp(matchText, 'ig');

    this.previousCache = undefined;
    this.maxPatternLength = maxPatternLength;
}

CountStream.prototype._write = function(chunk, encoding, cb) {
    var text;
    if(this.previousCache === undefined) {
        text = chunk.toString();
    }
    else {
        text = this.previousCache + chunk.toString();
    }
    var matches = text.match(this.matcher);
    if (matches) {
        this.count += matches.length;
    }

    this.previousCache = text.substring(text.length - this.maxPatternLength);

    cb();
};

CountStream.prototype.end = function() {
    this.emit('total', this.count);
};
like image 192
Sébastien Doncker Avatar answered Nov 03 '22 00:11

Sébastien Doncker