Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

HashMap with 8 million entries becomes slow

I have a HashMap with 8 million Point2D's mapping to a LinkedList.

private Map<Point2D,LinkedList<RoadEdge>> adjacencyList;

Everything works as it should, but it takes very long time for me to get data from the HashMap. Is there any alternative, that I can use to optimize getting data out?

I am willing to compromise the time it takes to put() in favor of the time it takes to get().

like image 870
Hedam Avatar asked May 12 '17 10:05

Hedam


2 Answers

To prove a point, here is a jmh test that searches for an entry in maps that contain 10, 10_000 and 10_000_000 entries. You can see from the results that the search is constant - O(1). The problem is elsewhere in your code. Even if you don't understand the code the results are just readable numbers(see at the end).

import java.math.BigDecimal;
import java.math.RoundingMode;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.ThreadLocalRandom;
import java.util.concurrent.TimeUnit;

import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Level;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.TearDown;
import org.openjdk.jmh.annotations.Warmup;
import org.openjdk.jmh.runner.Runner;
import org.openjdk.jmh.runner.RunnerException;
import org.openjdk.jmh.runner.options.Options;
import org.openjdk.jmh.runner.options.OptionsBuilder;

import javafx.geometry.Point2D;

@BenchmarkMode(org.openjdk.jmh.annotations.Mode.AverageTime)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@Warmup(iterations = 5, time = 2, timeUnit = TimeUnit.SECONDS)
@Measurement(iterations = 5, time = 2, timeUnit = TimeUnit.SECONDS)
public class TwoMapsTest {
    public static void main(String[] args) throws RunnerException {

        Options opt = new OptionsBuilder().include(EightMillionsTest.class.getSimpleName())
                .jvmArgs("-ea", "-Xms10g", "-Xmx10g")
                .shouldFailOnError(true)
                .build();
        new Runner(opt).run();
    }

    // the check bellow assert map.size() == numberOfElements; can absolutely fail
    // fingers crossed that it does not.

    @State(Scope.Thread)
    public static class Map_10 {

        int numberOfElements = 10;

        public Map<Point2D, Integer> map = new HashMap<>();

        public Point2D mightBePresent = null;

        public Point2D isPresent = null;

        @Setup(Level.Iteration)
        public void setUp() {
            int randomInsideHowManyBoundry = ThreadLocalRandom.current().nextInt(numberOfElements);
            for (int i = 0; i < numberOfElements; ++i) {

                double[] d = generateTwoPoints(-3.0, 3.9999, 4);
                Point2D p = new Point2D(d[0], d[1]);

                if (isPresent == null && i == randomInsideHowManyBoundry) {
                    isPresent = new Point2D(d[0], d[1]);
                }
                map.put(p, i);
            }

            assert map.containsKey(isPresent);
            assert map.size() == numberOfElements;
        }

        @TearDown(Level.Iteration)
        public void tearDown() {
            map.clear();
        }
    }

    @State(Scope.Thread)
    public static class Map_10_000 {

        int numberOfElements = 10_000;

        public Map<Point2D, Integer> map = new HashMap<>();

        public Point2D mightBePresent = null;

        public Point2D isPresent = null;

        @Setup(Level.Iteration)
        public void setUp() {
            int randomInsideHowManyBoundry = ThreadLocalRandom.current().nextInt(numberOfElements);
            for (int i = 0; i < numberOfElements; ++i) {

                double[] d = generateTwoPoints(-3.0, 3.9999, 4);
                Point2D p = new Point2D(d[0], d[1]);

                if (isPresent == null && i == randomInsideHowManyBoundry) {
                    isPresent = new Point2D(d[0], d[1]);
                }
                map.put(p, i);
            }

            assert map.containsKey(isPresent);
            assert map.size() == numberOfElements;
        }

        @TearDown(Level.Iteration)
        public void tearDown() {
            map.clear();
        }
    }

    @State(Scope.Thread)
    public static class Map_10_000_000 {

        int numberOfElements = 10_000_000;

        public Map<Point2D, Integer> map = new HashMap<>();

        public Point2D mightBePresent = null;

        public Point2D isPresent = null;

        @Setup(Level.Iteration)
        public void setUp() {
            int randomInsideHowManyBoundry = ThreadLocalRandom.current().nextInt(10_000_000);
            for (int i = 0; i < 10_000_000; ++i) {

                double[] d = generateTwoPoints(-3.0, 3.9999, 4);
                Point2D p = new Point2D(d[0], d[1]);

                if (isPresent == null && i == randomInsideHowManyBoundry) {
                    isPresent = new Point2D(d[0], d[1]);
                }
                map.put(p, i);
            }

            assert map.containsKey(isPresent);
            assert map.size() == numberOfElements;
        }

        @TearDown(Level.Iteration)
        public void tearDown() {
            map.clear();
        }
    }

    @Fork(1)
    @Benchmark
    public int mightBePresentMap_10(Map_10 map) {
        Integer x = map.map.get(map.mightBePresent);
        return x == null ? -1 : x;
    }

    @Fork(1)
    @Benchmark
    public int isPresentMap_10(Map_10 map) {
        Integer x = map.map.get(map.isPresent);
        return x == null ? -1 : x;
    }

    @Fork(1)
    @Benchmark
    public int mightBePresentMap_10_000(Map_10_000 map) {
        Integer x = map.map.get(map.mightBePresent);
        return x == null ? -1 : x;
    }

    @Fork(1)
    @Benchmark
    public int isPresentMap_10_000(Map_10_000 map) {
        Integer x = map.map.get(map.isPresent);
        return x == null ? -1 : x;
    }

    @Fork(1)
    @Benchmark
    public int mightBePresentMap_10_000_000(Map_10_000_000 map) {
        Integer x = map.map.get(map.mightBePresent);
        return x == null ? -1 : x;
    }

    @Fork(1)
    @Benchmark
    public int isPresentMap_10_000_000(Map_10_000_000 map) {
        Integer x = map.map.get(map.isPresent);
        return x == null ? -1 : x;
    }

    private static double[] generateTwoPoints(double upperBound, double lowerBound, int precision) {
        double x = ThreadLocalRandom.current().nextDouble(lowerBound, upperBound);
        x = BigDecimal.valueOf(x).setScale(precision, RoundingMode.HALF_UP).doubleValue();

        double y = ThreadLocalRandom.current().nextDouble(lowerBound, upperBound);
        y = BigDecimal.valueOf(x).setScale(precision, RoundingMode.HALF_UP).doubleValue();

        return new double[] { x, y };
    }
}

And the actual results:

  Benchmark                                          (howManyEntries)  Mode  Cnt   Score   Error  Units
hashmap8Millions.EightMillionsTest.isPresent                      1  avgt    5   8.787 ± 0.259  ns/op
hashmap8Millions.EightMillionsTest.isPresent                     10  avgt    5   9.988 ± 0.283  ns/op
hashmap8Millions.EightMillionsTest.isPresent                    256  avgt    5   9.146 ± 2.081  ns/op
hashmap8Millions.EightMillionsTest.isPresent                  10000  avgt    5   8.871 ± 0.574  ns/op
hashmap8Millions.EightMillionsTest.isPresent                1000000  avgt    5   8.894 ± 0.676  ns/op
hashmap8Millions.EightMillionsTest.isPresent               10000000  avgt    5  10.884 ± 5.623  ns/op
hashmap8Millions.EightMillionsTest.mightBePresent                 1  avgt    5   4.607 ± 0.175  ns/op
hashmap8Millions.EightMillionsTest.mightBePresent                10  avgt    5   4.713 ± 0.944  ns/op
hashmap8Millions.EightMillionsTest.mightBePresent               256  avgt    5   5.283 ± 0.511  ns/op
hashmap8Millions.EightMillionsTest.mightBePresent             10000  avgt    5   8.944 ± 0.144  ns/op
hashmap8Millions.EightMillionsTest.mightBePresent           1000000  avgt    5  10.256 ± 0.121  ns/op
hashmap8Millions.EightMillionsTest.mightBePresent          10000000  avgt    5   8.764 ± 0.504  ns/op

Notice that these are nano-seconds, this is by far not slow...

like image 190
Eugene Avatar answered Nov 19 '22 12:11

Eugene


First of all, check the hash function of your key (Point2D). It should be well distributed or you'll have a lot of collisions. See Eugene's answer for an explanation on the beauty that is the HashMap.

Secondly, depending on how you want to access the data (by which I mean you try to get the same object a lot), you can use a cache, but do this only if you can't change the hashCode.

Here is my implementation of a LRU cache from another project. It might speed some things up (it's not wonderful code and does unchecked casting, but should work in most cases).

package util;

import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.Map;

public class CachedHashMap<K, V> extends HashMap<K, V>{

private static final long serialVersionUID = 36215319740415714L;

private LRUCache<K, V> cache;
private LRUCache<K, Boolean> containsCache;

public CachedHashMap(int cacheSize){
    containsCache = new LRUCache<>(2*cacheSize);
    cache = new LRUCache<>(cacheSize);
}

@Override
public V put(K key, V value){

    cache.put(key, value);
    containsCache.put(key, true);
    return super.put(key, value);

}

@Override
public boolean containsKey(Object key){

    if(containsCache.containsKey(key)){
        return containsCache.get(key);
    }

    boolean contains = super.containsKey(key);

    containsCache.put((K) key, contains);

    return contains;

}

@Override
public V remove(Object key){
    containsCache.remove(key);
    cache.remove(key);
    return super.remove(key);
}

@Override
public V get(Object key){
    if(containsCache.containsKey(key)){
        if(!containsCache.get(key))
            return null;
    }

    V value = cache.get(key);
    if(value != null)
        return value;

    value = super.get(key);

    K keyCast = (K) key;
    if(value != null){
        containsCache.put(keyCast, true);
        cache.put(keyCast, value);
    }else{
        containsCache.put(keyCast, false);
    }

    return value;
}

class LRUCache<A, B> extends LinkedHashMap<A, B> {
private static final long serialVersionUID = -5958373482978477783L;

private int cacheSize;

  public LRUCache(int cacheSize) {
    super(16, 0.75f, true);
    this.cacheSize = cacheSize;
  }

  protected boolean removeEldestEntry(Map.Entry<A, B> eldest) {
    return size() >= cacheSize;
  }
}

}
like image 23
Thijs Steel Avatar answered Nov 19 '22 12:11

Thijs Steel