Can anyone please tell me which algorithm is internally used by ruby to remove duplicates from an ruby array using Array#uniq
method?
From the docs:
static VALUE
rb_ary_uniq(VALUE ary)
{
VALUE hash, uniq, v;
long i;
if (RARRAY_LEN(ary) <= 1)
return rb_ary_dup(ary);
if (rb_block_given_p()) {
hash = ary_make_hash_by(ary);
uniq = ary_new(rb_obj_class(ary), RHASH_SIZE(hash));
st_foreach(RHASH_TBL(hash), push_value, uniq);
}
else {
hash = ary_make_hash(ary);
uniq = ary_new(rb_obj_class(ary), RHASH_SIZE(hash));
for (i=0; i<RARRAY_LEN(ary); i++) {
st_data_t vv = (st_data_t)(v = rb_ary_elt(ary, i));
if (st_delete(RHASH_TBL(hash), &vv, 0)) {
rb_ary_push(uniq, v);
}
}
}
ary_recycle_hash(hash);
return uniq;
It has O(N)
complexity
Amortized O(n) as it uses Hash internally.
This depends on which "internals" you are talking about. There are 7 production-ready Ruby implementations in current use, and the Ruby Language Specification does not prescribe any particular algorithm. So, it really depends on the implementation.
E.g., this is the implementation Rubinius uses:
Rubinius.check_frozen
if block_given?
im = Rubinius::IdentityMap.from(self, &block)
else
im = Rubinius::IdentityMap.from(self)
end
return if im.size == size
array = im.to_array
@tuple = array.tuple
@start = array.start
@total = array.total
self
And this is the one from JRuby:
RubyHash hash = makeHash();
if (realLength == hash.size()) return makeShared();
RubyArray result = new RubyArray(context.runtime, getMetaClass(), hash.size());
int j = 0;
try {
for (int i = 0; i < realLength; i++) {
IRubyObject v = elt(i);
if (hash.fastDelete(v)) result.values[j++] = v;
}
} catch (ArrayIndexOutOfBoundsException aioob) {
concurrentModification();
}
result.realLength = j;
return result;
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With