StringMap was suffering because the string's hashCode was not cached. Address this by preferring the regular String.hashCode until hash collision problems start to occur.

This commit is contained in:
Jesse Wilson 2012-07-10 18:46:01 +00:00
parent b36bedb423
commit 4aaa4bf20c
2 changed files with 116 additions and 9 deletions

View File

@ -47,6 +47,12 @@ public final class StringMap<V> extends AbstractMap<String, V> {
*/
private static final int MAXIMUM_CAPACITY = 1 << 30;
/**
* Max number of collisions in a single bucket before falling back to
* an unpredictable hash code.
*/
private static final int MAX_COLLISIONS = 512;
/**
* A dummy entry in the circular linked list of entries in the map.
* The first real entry is header.nxt, and the last is header.prv.
@ -82,6 +88,12 @@ public final class StringMap<V> extends AbstractMap<String, V> {
*/
private int threshold;
/**
* True to use String.hashCode(), which is cached per-string. False to use
* less predictable (but uncached) hash algorithm.
*/
private boolean useFastHash = true;
// Views - lazily initialized
private Set<String> keySet;
private Set<Entry<String, V>> entrySet;
@ -116,7 +128,7 @@ public final class StringMap<V> extends AbstractMap<String, V> {
return null;
}
int hash = hash(key);
int hash = useFastHash ? fastHash(key) : unpredictableHash(key);
LinkedEntry<V>[] tab = table;
for (LinkedEntry<V> e = tab[hash & (tab.length - 1)]; e != null; e = e.next) {
String eKey = e.key;
@ -132,10 +144,12 @@ public final class StringMap<V> extends AbstractMap<String, V> {
throw new NullPointerException("key == null");
}
int hash = hash(key);
int collisionCount = 0;
int hash = useFastHash ? fastHash(key) : unpredictableHash(key);
LinkedEntry<V>[] tab = table;
int index = hash & (tab.length - 1);
for (LinkedEntry<V> e = tab[index]; e != null; e = e.next) {
collisionCount++;
if (e.hash == hash && key.equals(e.key)) {
V oldValue = e.value;
e.value = value;
@ -149,6 +163,26 @@ public final class StringMap<V> extends AbstractMap<String, V> {
index = hash & (tab.length - 1);
}
addNewEntry(key, value, hash, index);
/*
* If we suffer a very large number of collisions, fall back from the cached
* String.hashCode() to an (uncached) hash code that isn't predictable.
*/
if (collisionCount >= MAX_COLLISIONS) {
LinkedEntry<V> entry = header.nxt;
// clear the table
Arrays.fill(table, null);
size = 0;
header.nxt = header.prv = header;
useFastHash = false;
// fill it up in iteration order
for (; entry != header; entry = entry.nxt) {
put(entry.key, entry.value);
}
}
return null;
}
@ -227,7 +261,7 @@ public final class StringMap<V> extends AbstractMap<String, V> {
if (key == null || !(key instanceof String)) {
return null;
}
int hash = hash((String) key);
int hash = useFastHash ? fastHash(key) : unpredictableHash((String) key);
LinkedEntry<V>[] tab = table;
int index = hash & (tab.length - 1);
for (LinkedEntry<V> e = tab[index], prev = null;
@ -350,7 +384,7 @@ public final class StringMap<V> extends AbstractMap<String, V> {
return false;
}
int hash = hash((String) key);
int hash = useFastHash ? fastHash(key) : unpredictableHash((String) key);
LinkedEntry<V>[] tab = table;
int index = hash & (tab.length - 1);
for (LinkedEntry<V> e = tab[index], prev = null; e != null; prev = e, e = e.next) {
@ -482,8 +516,16 @@ public final class StringMap<V> extends AbstractMap<String, V> {
}
}
private static int fastHash(Object key) {
int h = key.hashCode();
// Apply Doug Lea's supplemental hash function to avoid collisions for
// hashes that do not differ in lower or upper bits.
h ^= (h >>> 20) ^ (h >>> 12);
return h ^ (h >>> 7) ^ (h >>> 4);
}
private static final int seed = new Random().nextInt();
private static int hash(String key) {
private static int unpredictableHash(String key) {
// Ensuring that the hash is unpredictable and well distributed.
//
// Finding unpredictable hash functions is a bit of a dark art as we need to balance
@ -502,10 +544,8 @@ public final class StringMap<V> extends AbstractMap<String, V> {
h = h3 ^ (h3 >>> 6); // h3 / 64
}
/*
* Apply Doug Lea's supplemental hash function to avoid collisions for
* hashes that do not differ in lower or upper bits.
*/
// Apply Doug Lea's supplemental hash function to avoid collisions for
// hashes that do not differ in lower or upper bits.
h ^= (h >>> 20) ^ (h >>> 12);
return h ^ (h >>> 7) ^ (h >>> 4);
}

View File

@ -0,0 +1,67 @@
/*
* Copyright (C) 2010 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.gson.internal;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import junit.framework.TestCase;
public final class StringMapTest extends TestCase {
public void testFallbackFromTooManyCollisions() {
int count = 10000;
StringMap<Integer> map = new StringMap<Integer>();
int index = 0;
List<String> collidingStrings = collidingStrings(1 << 20, count);
for (String string : collidingStrings) {
map.put(string, index++);
}
assertEquals(collidingStrings.size(), map.size());
Iterator<Map.Entry<String, Integer>> iterator = map.entrySet().iterator();
for (int i = 0; i < count; i++) {
Map.Entry<String, Integer> entry = iterator.next();
assertEquals(collidingStrings.get(i), entry.getKey());
assertEquals(Integer.valueOf(i), entry.getValue());
}
}
/**
* @param h0 the hash code of the generated strings
*/
private List<String> collidingStrings(int h0, int count) {
List<String> result = new ArrayList<String>(count);
int p1 = 31;
int p0 = 31 * 31;
int maxChar = Character.MAX_VALUE;
for (char c0 = 0; c0 <= maxChar && c0 <= h0 / p0; c0++) {
int h1 = h0 - c0 * p0;
for (char c1 = 0; c1 <= maxChar && c1 <= h1 / p1; c1++) {
int h2 = h1 - c1 * p1;
char c2 = (char) h2;
if (h2 != c2) {
continue;
}
result.add(new String(new char[] { c0, c1, c2 } ));
if (result.size() == count) {
return result;
}
}
}
throw new IllegalArgumentException("Couldn't find " + count + " strings with hashCode " + h0);
}
}