Given N
sorted arrays of integers (no duplicates), I'd like to calculate the first limit
integers in their intersection.
For example, given the following arrays:
[2, 5, 7, 8, 10, 12, 13, 15, 20, 24]
[3, 4, 5, 6, 9, 10, 11, 17, 20]
[1, 2, 3, 5, 6, 10, 12, 20, 23, 29]
the intersection is [5, 10, 20]
, so if limit = 2
, the result should be [5, 10]
.
The given arrays should not be mutated.
My attempt is below. Playground here.
Is there a more efficient (faster) way to achieve this?
Would appreciate a jsperf comparison.
function intersection(sortedArrays, limit) {
var arraysCount = sortedArrays.length;
var indices = sortedArrays.map(function(array) { return 0; });
var values, maxValue, valuesAreSame, reachedEnd, i, result = [];
while (true) {
reachedEnd = indices.some(function(index, i) {
return index === sortedArrays[i].length;
});
if (reachedEnd) {
return result;
}
values = sortedArrays.map(function(array, i) { return array[indices[i]]; });
valuesAreSame = values.every(function(value, i) { return value === values[0]; });
if (valuesAreSame) {
result[result.length] = values[0];
if (result.length === limit) {
return result;
}
for (i = 0; i < arraysCount; i++) {
indices[i]++;
}
} else {
maxValue = Math.max.apply(null, values);
for (i = 0; i < arraysCount; i++) {
if (values[i] < maxValue) {
indices[i]++;
}
}
}
}
}
console.log(intersection([[0, 3, 8, 11], [1, 3, 11, 15]], 1));
// => [3]
The first challenge is to make the function correct. Once it's correct, we can worry about the speed.
There are a few things which could trip-up a function like this:
Your original function can handle repeated numbers, such as [[9,9,9,9],[9,9,9]], but gets stuck in an infinite loop if any value is NaN, and handles a limit of 0 as if there were no limit at all.
Here's my (Mk3) attempt:
function intersection( arrs, limit ) {
var result = [], posns = [];
var j, v, next, n = arrs.length, count = 1;
if( !n || limit <= 0 ) {
return result; // nothing to do
}
if( n === 1 ) {
// special case needed because main loop cannot handle this
for( j = 0; j < arrs[0].length && result.length < limit; ++ j ) {
v = arrs[0][j];
if( v === v ) {
result.push( v );
}
}
return result;
}
for( j = 0; j < n; ++ j ) {
if( !arrs[j].length ) {
return result; // no intersection
}
posns[j] = 0;
}
next = arrs[n-1][0];
++ posns[n-1];
while( true ) {
for( j = 0; j < n; ++ j ) {
do {
if( posns[j] >= arrs[j].length ) {
return result; // ran out of values
}
v = arrs[j][posns[j]++];
} while( v < next || v !== v );
if( v !== next ) {
count = 1;
next = v;
} else if( (++ count) >= n ) {
result.push( next );
if( result.length >= limit ) {
return result; // limit reached
}
if( posns[j] >= arrs[j].length ) {
return result; // ran out of values
}
next = arrs[j][posns[j]++];
count = 1;
}
}
}
}
(fiddle: http://jsfiddle.net/kn2wz2sc/4/)
This works in much the same way as your original method, but with several optimisations. It always knows which number it is looking for next, and will quickly iterate through each array until it finds a number which is at least that big. If the number is too big, it will update the number it is looking for.
In Mk2 I took some inspiration from Casey's method of counting matches as it goes instead of checking from 0-n each time, which allows it to short-cut some comparisons (and since Casey is now using indices, both methods have become very similar). In Mk3 I've made some more micro-optimisations, incrementing the indexes eagerly so that it doesn't need an inner loop.
This is safe against all the criteria I listed above (it ignores NaN since NaN!=NaN and therefore will never be in the intersection), isn't limited to numbers, and will exit quickly once any limit is reached.
A jsperf shows that Mk3 is the fastest method so far: http://jsperf.com/sorted-intersect/5 (and it's still safe against duplicates and NaN).
Here's another algorithm, where the idea is that we count how many times we see each number. Once we see it arrs.length
times, we know that it's in the intersection. If it's missing from even one list, it's not in the intersection, and we can skip to the next number in that list. It turns out to be a lot faster!
This method mutates the array, but is easier to read.
function intersection(arrs, limit) {
var intersections = [];
// Keep track of how many times we've seen the largest element seen so far.
var largest = -Infinity;
var count = 0;
while (intersections.length < limit) {
for (var i = 0; i < arrs.length; i++) {
// Drop elements less than `largest`.
while (arrs[i].length && arrs[i][0] < largest)
arrs[i].shift();
// Ignore repeated elements (not needed if you don't have repeated elements).
while (arrs[i].length >= 2 && arrs[i][0] == largest && arrs[i][1] == largest)
arrs[i].shift();
// If we ran out of elements, we're done.
if (!arrs[i].length)
return intersections;
// Look at the next element.
var next = arrs[i].shift();
if (next == largest)
count++;
else {
count = 1;
largest = next;
}
// Once we see it enough times, we can be sure it's in the intersection!
if (count == arrs.length)
intersections.push(largest);
}
}
return intersections;
}
This method doesn't, but it's harder to read.
function intersection(arrs, limit) {
var intersections = [];
var indices = [];
for (var i = 0; i < arrs.length; i++)
indices[i] = 0;
// Keep track of how many times we've seen the largest element seen so far.
var largest = -Infinity;
var count = 0;
while (intersections.length < limit) {
for (var i = 0; i < arrs.length; i++) {
// Skip past elements less than `largest`.
while (indices[i] < arrs[i].length && arrs[i][indices[i]] < largest)
indices[i]++;
// If we ran out of elements, we're done.
if (indices[i] >= arrs[i].length)
return intersections;
// Look at the next element.
var next = arrs[i][indices[i]++];
if (next == largest)
count++;
else {
count = 1;
largest = next;
}
// Once we see it enough times, we can be sure it's in the intersection!
if (count == arrs.length)
intersections.push(largest);
}
}
return intersections;
}
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With