From 4aaa4bf20c43807ee78ab7e58e2e430e195647f8 Mon Sep 17 00:00:00 2001 From: Jesse Wilson Date: Tue, 10 Jul 2012 18:46:01 +0000 Subject: [PATCH] StringMap was suffering because the string's hashCode was not cached. Address this by preferring the regular String.hashCode until hash collision problems start to occur. --- .../com/google/gson/internal/StringMap.java | 58 +++++++++++++--- .../google/gson/internal/StringMapTest.java | 67 +++++++++++++++++++ 2 files changed, 116 insertions(+), 9 deletions(-) create mode 100644 gson/src/test/java/com/google/gson/internal/StringMapTest.java diff --git a/gson/src/main/java/com/google/gson/internal/StringMap.java b/gson/src/main/java/com/google/gson/internal/StringMap.java index e917542d..26accb8c 100644 --- a/gson/src/main/java/com/google/gson/internal/StringMap.java +++ b/gson/src/main/java/com/google/gson/internal/StringMap.java @@ -47,6 +47,12 @@ public final class StringMap extends AbstractMap { */ private static final int MAXIMUM_CAPACITY = 1 << 30; + /** + * Max number of collisions in a single bucket before falling back to + * an unpredictable hash code. + */ + private static final int MAX_COLLISIONS = 512; + /** * A dummy entry in the circular linked list of entries in the map. * The first real entry is header.nxt, and the last is header.prv. @@ -82,6 +88,12 @@ public final class StringMap extends AbstractMap { */ private int threshold; + /** + * True to use String.hashCode(), which is cached per-string. False to use + * less predictable (but uncached) hash algorithm. + */ + private boolean useFastHash = true; + // Views - lazily initialized private Set keySet; private Set> entrySet; @@ -116,7 +128,7 @@ public final class StringMap extends AbstractMap { return null; } - int hash = hash(key); + int hash = useFastHash ? fastHash(key) : unpredictableHash(key); LinkedEntry[] tab = table; for (LinkedEntry e = tab[hash & (tab.length - 1)]; e != null; e = e.next) { String eKey = e.key; @@ -132,10 +144,12 @@ public final class StringMap extends AbstractMap { throw new NullPointerException("key == null"); } - int hash = hash(key); + int collisionCount = 0; + int hash = useFastHash ? fastHash(key) : unpredictableHash(key); LinkedEntry[] tab = table; int index = hash & (tab.length - 1); for (LinkedEntry e = tab[index]; e != null; e = e.next) { + collisionCount++; if (e.hash == hash && key.equals(e.key)) { V oldValue = e.value; e.value = value; @@ -149,6 +163,26 @@ public final class StringMap extends AbstractMap { index = hash & (tab.length - 1); } addNewEntry(key, value, hash, index); + + /* + * If we suffer a very large number of collisions, fall back from the cached + * String.hashCode() to an (uncached) hash code that isn't predictable. + */ + if (collisionCount >= MAX_COLLISIONS) { + LinkedEntry entry = header.nxt; + + // clear the table + Arrays.fill(table, null); + size = 0; + header.nxt = header.prv = header; + useFastHash = false; + + // fill it up in iteration order + for (; entry != header; entry = entry.nxt) { + put(entry.key, entry.value); + } + } + return null; } @@ -227,7 +261,7 @@ public final class StringMap extends AbstractMap { if (key == null || !(key instanceof String)) { return null; } - int hash = hash((String) key); + int hash = useFastHash ? fastHash(key) : unpredictableHash((String) key); LinkedEntry[] tab = table; int index = hash & (tab.length - 1); for (LinkedEntry e = tab[index], prev = null; @@ -350,7 +384,7 @@ public final class StringMap extends AbstractMap { return false; } - int hash = hash((String) key); + int hash = useFastHash ? fastHash(key) : unpredictableHash((String) key); LinkedEntry[] tab = table; int index = hash & (tab.length - 1); for (LinkedEntry e = tab[index], prev = null; e != null; prev = e, e = e.next) { @@ -482,8 +516,16 @@ public final class StringMap extends AbstractMap { } } + private static int fastHash(Object key) { + int h = key.hashCode(); + // Apply Doug Lea's supplemental hash function to avoid collisions for + // hashes that do not differ in lower or upper bits. + h ^= (h >>> 20) ^ (h >>> 12); + return h ^ (h >>> 7) ^ (h >>> 4); + } + private static final int seed = new Random().nextInt(); - private static int hash(String key) { + private static int unpredictableHash(String key) { // Ensuring that the hash is unpredictable and well distributed. // // Finding unpredictable hash functions is a bit of a dark art as we need to balance @@ -502,10 +544,8 @@ public final class StringMap extends AbstractMap { h = h3 ^ (h3 >>> 6); // h3 / 64 } - /* - * Apply Doug Lea's supplemental hash function to avoid collisions for - * hashes that do not differ in lower or upper bits. - */ + // Apply Doug Lea's supplemental hash function to avoid collisions for + // hashes that do not differ in lower or upper bits. h ^= (h >>> 20) ^ (h >>> 12); return h ^ (h >>> 7) ^ (h >>> 4); } diff --git a/gson/src/test/java/com/google/gson/internal/StringMapTest.java b/gson/src/test/java/com/google/gson/internal/StringMapTest.java new file mode 100644 index 00000000..169c3bcf --- /dev/null +++ b/gson/src/test/java/com/google/gson/internal/StringMapTest.java @@ -0,0 +1,67 @@ +/* + * Copyright (C) 2010 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.google.gson.internal; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import junit.framework.TestCase; + +public final class StringMapTest extends TestCase { + public void testFallbackFromTooManyCollisions() { + int count = 10000; + StringMap map = new StringMap(); + int index = 0; + List collidingStrings = collidingStrings(1 << 20, count); + for (String string : collidingStrings) { + map.put(string, index++); + } + assertEquals(collidingStrings.size(), map.size()); + Iterator> iterator = map.entrySet().iterator(); + for (int i = 0; i < count; i++) { + Map.Entry entry = iterator.next(); + assertEquals(collidingStrings.get(i), entry.getKey()); + assertEquals(Integer.valueOf(i), entry.getValue()); + } + } + + /** + * @param h0 the hash code of the generated strings + */ + private List collidingStrings(int h0, int count) { + List result = new ArrayList(count); + int p1 = 31; + int p0 = 31 * 31; + int maxChar = Character.MAX_VALUE; + for (char c0 = 0; c0 <= maxChar && c0 <= h0 / p0; c0++) { + int h1 = h0 - c0 * p0; + for (char c1 = 0; c1 <= maxChar && c1 <= h1 / p1; c1++) { + int h2 = h1 - c1 * p1; + char c2 = (char) h2; + if (h2 != c2) { + continue; + } + result.add(new String(new char[] { c0, c1, c2 } )); + if (result.size() == count) { + return result; + } + } + } + throw new IllegalArgumentException("Couldn't find " + count + " strings with hashCode " + h0); + } +}