Skip to content

Commit

Permalink
Merge pull request #125 from dynatrace-oss/polymurhash
Browse files Browse the repository at this point in the history
basic implementation of PolymurHash 2.0
  • Loading branch information
oertl authored Dec 4, 2023
2 parents 6640c87 + fc1ce68 commit dca7e71
Show file tree
Hide file tree
Showing 21 changed files with 15,481 additions and 27 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ hash4j currently implements the following hash algorithms:
* [version 4.3](https://github.com/avaneev/komihash/releases/tag/4.3) (compatible with [version 4.7](https://github.com/avaneev/komihash/releases/tag/4.7))
* [version 5.0](https://github.com/avaneev/komihash/releases/tag/5.0) (compatible with [version 5.1](https://github.com/avaneev/komihash/releases/tag/5.1) and [version 5.7](https://github.com/avaneev/komihash/releases/tag/5.7))
* [FarmHash](https://github.com/google/farmhash) (farmhashna)
* [PolymurHash 2.0](https://github.com/orlp/polymur-hash)

All hash functions are thoroughly tested against the native reference implementations and also other libraries like [Guava Hashing](https://javadoc.io/doc/com.google.guava/guava/latest/com/google/common/hash/package-summary.html), [Zero-Allocation Hashing](https://github.com/OpenHFT/Zero-Allocation-Hashing), [Apache Commons Codec](https://commons.apache.org/proper/commons-codec/apidocs/index.html), or [crypto](https://github.com/appmattus/crypto) (see [CrossCheckTest.java](src/test/java/com/dynatrace/hash4j/hashing/CrossCheckTest.java)).

Expand Down
2 changes: 1 addition & 1 deletion licenses/ZLIB_POLYMURHASH.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,4 @@ the following restrictions:
2. Altered source versions must be plainly marked as such, and must not be
misrepresented as being the original software.

3. This notice may not be removed or altered from any source distribution.
3. This notice may not be removed or altered from any source distribution.
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@ void PolymurHash_2_0_ChecksumConfig::calculateHash(const uint8_t *seedBytes,
uint64_t seed0;
uint64_t seed1;
uint64_t tweak;
memcpy(&seed0, seedBytes, 8);
memcpy(&seed1, seedBytes + 8, 8);
memcpy(&tweak, seedBytes + 16, 8);
memcpy(&tweak, seedBytes, 8);
memcpy(&seed0, seedBytes + 8, 8);
memcpy(&seed1, seedBytes + 16, 8);
PolymurHashParams params0;
PolymurHashParams params1;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
import org.openjdk.jmh.annotations.*;
import org.openjdk.jmh.infra.Blackhole;

public class ModuloPerformanceTest {
public class ModuloAssignmentPerformanceTest {

@State(Scope.Thread)
public static class TestState {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
/*
* Copyright 2023 Dynatrace LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.dynatrace.hash4j.consistent;

import com.dynatrace.hash4j.random.PseudoRandomGenerator;
import com.dynatrace.hash4j.random.PseudoRandomGeneratorProvider;
import java.util.SplittableRandom;
import org.openjdk.jmh.annotations.*;
import org.openjdk.jmh.infra.Blackhole;

public class RandomAssignmentPerformanceTest {

private static final PseudoRandomGenerator PSEUDO_RANDOM_GENERATOR =
PseudoRandomGeneratorProvider.splitMix64_V1().create();

@State(Scope.Thread)
public static class TestState {

@Param({"1", "10", "100", "1000", "10000", "100000", "1000000"})
int numBuckets;

SplittableRandom random;

@Setup
public void init() {
random = new SplittableRandom(0x87c5950e6677341eL);
}
}

@Benchmark
@BenchmarkMode(Mode.AverageTime)
public void getBucket(TestState testState, Blackhole blackhole) {
PSEUDO_RANDOM_GENERATOR.reset(testState.random.nextLong());
int bucket = PSEUDO_RANDOM_GENERATOR.uniformInt(testState.numBuckets);
blackhole.consume(bucket);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import org.openjdk.jmh.annotations.*;
import org.openjdk.jmh.infra.Blackhole;

// test to measure costs for random value generation to simulate hash values
public class RandomNumberPerformanceTest {

@State(Scope.Thread)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
/*
* Copyright 2022-2023 Dynatrace LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.dynatrace.hash4j.hashing;

public class PolymurHash2_0PerformanceTest extends AbstactHasher64PerformanceTest {

private static final Hasher64 HASHER_INSTANCE =
Hashing.polymurHash2_0(0x2afe2c5c76d4017eL, 0x46223142eceb1893L);

@Override
protected Hasher64 getHasherInstance() {
return HASHER_INSTANCE;
}
}
14 changes: 11 additions & 3 deletions src/main/java/com/dynatrace/hash4j/hashing/AbstractHashStream.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright 2022 Dynatrace LLC
* Copyright 2022-2023 Dynatrace LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -305,12 +305,20 @@ public <T> HashStream putUnorderedIterable(
return this;
}

// maximum array length that can be allocated on VMs
// compare ArrayList implementation
private static final int SOFT_MAX_ARRAY_LENGTH = Integer.MAX_VALUE - 8;

// visible for testing
static int increaseArraySize(int currentSize) {
if (currentSize <= 0x3fffffff) {
if (currentSize <= (SOFT_MAX_ARRAY_LENGTH >>> 1)) {
return currentSize << 1; // increase by 100%
} else if (currentSize < SOFT_MAX_ARRAY_LENGTH) {
return SOFT_MAX_ARRAY_LENGTH;
} else if (currentSize < Integer.MAX_VALUE) {
return currentSize + 1;
} else {
return Integer.MAX_VALUE;
throw new OutOfMemoryError();
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -341,11 +341,6 @@ public long getAsLong() {

return finalizeGetAsLong(se1, se5, off, len);
}

@Override
public int getHashBitSize() {
return 64;
}
}

protected static long finish(long r2h, long r2l, long see5) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -519,11 +519,6 @@ public long getAsLong() {
}
return finish(a, b, s, byteCount);
}

@Override
public int getHashBitSize() {
return 64;
}
}

@Override
Expand Down
67 changes: 67 additions & 0 deletions src/main/java/com/dynatrace/hash4j/hashing/FarmHashNa.java
Original file line number Diff line number Diff line change
Expand Up @@ -792,6 +792,73 @@ private long processRemaining() {
hashLen16(b + c, b1 + c1, mul) + zLocal,
mul);
}

@Override
public HashStream64 putChars(CharSequence s) {
if (bufferCount + s.length() * 2L < 73) {
for (int idx = 0; idx < s.length(); idx += 1) {
setChar(buffer, bufferCount + (idx << 1), s.charAt(idx));
}
bufferCount += s.length() << 1;
return this;
}
int idx = 0;
while (bufferCount < 72) {
setChar(buffer, bufferCount, s.charAt(idx));
bufferCount += 2;
idx += 1;
}
processBuffer();
int a = bufferCount & 1;
bufferCount = 8 - a;
idx -= a;
int lenMinus32 = s.length() - 32;
if (idx < lenMinus32) {
while (true) {

long b0 = getLong(s, idx);
long b1 = getLong(s, idx + 4);
long b2 = getLong(s, idx + 8);
long b3 = getLong(s, idx + 12);
long b4 = getLong(s, idx + 16);
long b5 = getLong(s, idx + 20);
long b6 = getLong(s, idx + 24);
long b7 = getLong(s, idx + 28);

if (a != 0) {
b0 = (b0 >>> 8) | (b1 << 56);
b1 = (b1 >>> 8) | (b2 << 56);
b2 = (b2 >>> 8) | (b3 << 56);
b3 = (b3 >>> 8) | (b4 << 56);
b4 = (b4 >>> 8) | (b5 << 56);
b5 = (b5 >>> 8) | (b6 << 56);
b6 = (b6 >>> 8) | (b7 << 56);
b7 = (b7 >>> 8) | ((long) s.charAt(idx + 32) << 56);
}

processBufferWithoutInit(b0, b1, b2, b3, b4, b5, b6, b7);
idx += 32;
if (idx >= lenMinus32) {
setLong(buffer, 8, b0);
setLong(buffer, 16, b1);
setLong(buffer, 24, b2);
setLong(buffer, 32, b3);
setLong(buffer, 40, b4);
setLong(buffer, 48, b5);
setLong(buffer, 56, b6);
setLong(buffer, 64, b7);
break;
}
}
}

do {
setChar(buffer, bufferCount, s.charAt(idx));
bufferCount += 2;
idx += 1;
} while (idx < s.length());
return this;
}
}

@Override
Expand Down
6 changes: 5 additions & 1 deletion src/main/java/com/dynatrace/hash4j/hashing/HashSink.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright 2022 Dynatrace LLC
* Copyright 2022-2023 Dynatrace LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -595,6 +595,8 @@ public interface HashSink {
* @param elementHashFunction 64-bit hash function used for individual elements
* @param <T> the element type
* @return this
* @throws OutOfMemoryError if the allocation of a long array, that is able to keep a 64-bit hash
* for each element in the Iterable, fails
*/
<T> HashSink putUnorderedIterable(
Iterable<T> data, ToLongFunction<? super T> elementHashFunction);
Expand All @@ -607,6 +609,8 @@ <T> HashSink putUnorderedIterable(
* @param hasher a 64-bit hasher
* @param <T> the element type
* @return this
* @throws OutOfMemoryError if the allocation of a long array, that is able to keep a 64-bit hash
* for each element in the Iterable, fails
*/
<T> HashSink putUnorderedIterable(
Iterable<T> data, HashFunnel<? super T> funnel, Hasher64 hasher);
Expand Down
35 changes: 35 additions & 0 deletions src/main/java/com/dynatrace/hash4j/hashing/Hashing.java
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,41 @@ public static Hasher64 komihash5_0(long seed) {
return Komihash5_0.create(seed);
}

/**
* Returns a {@link Hasher64} implementing the 64-bit PolymurHash (version 2.0) algorithm using
* the given tweak and seed value.
*
* <p>This implementation is compatible with the C++ reference implementation of {@code
* polymur_hash} defined in <a
* href="https://github.com/orlp/polymur-hash/blob/c6cc6884459560443e696604e9db3b6bb61a9bfa/polymur-hash.h">polymur-hash.h</a>
* on an Intel x86 architecture.
*
* @param tweak a 64-bit tweak
* @param seed a 64-bit seed
* @return a hasher instance
*/
public static Hasher64 polymurHash2_0(long tweak, long seed) {
return PolymurHash2_0.create(tweak, seed);
}

/**
* Returns a {@link Hasher64} implementing the 64-bit PolymurHash (version 2.0) algorithm using
* the given tweak and seed values.
*
* <p>This implementation is compatible with the C++ reference implementation of {@code
* polymur_hash} defined in <a
* href="https://github.com/orlp/polymur-hash/blob/c6cc6884459560443e696604e9db3b6bb61a9bfa/polymur-hash.h">polymur-hash.h</a>
* on an Intel x86 architecture.
*
* @param tweak a 64-bit tweak
* @param kSeed a 64-bit kSeed
* @param sSeed a 64-bit kSeed
* @return a hasher instance
*/
public static Hasher64 polymurHash2_0(long tweak, long kSeed, long sSeed) {
return PolymurHash2_0.create(tweak, kSeed, sSeed);
}

/**
* Returns a {@link Hasher64} implementing the 64-bit Wyhash (version final 3) algorithm using a
* seed value of zero and the default secret.
Expand Down
6 changes: 3 additions & 3 deletions src/main/java/com/dynatrace/hash4j/hashing/Murmur3_128.java
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ public HashValue128 hashCharsTo128Bits(CharSequence s) {

final int len = s.length();
int i = 0;
for (; i + 8 <= len; i += 8) {
for (; i <= len - 8; i += 8) {
long b0 = getLong(s, i);
long b1 = getLong(s, i + 4);

Expand Down Expand Up @@ -488,7 +488,7 @@ public HashStream128 putChars(CharSequence s) {
processBuffer(buffer0, buffer1);
buffer1 = 0;
}
for (; i + 8 <= len; i += 8) {
for (; i <= len - 8; i += 8) {
long b0 = getLong(s, i);
long b1 = getLong(s, i + 4);
processBuffer(b0, b1);
Expand Down Expand Up @@ -546,7 +546,7 @@ public HashStream128 putChars(CharSequence s) {
buffer1 = (l >>> 8);
}

for (; i + 8 <= len; i += 8) {
for (; i <= len - 8; i += 8) {
long c0 = s.charAt(i);
long c1 = s.charAt(i + 1);
long c2 = s.charAt(i + 2);
Expand Down
Loading

0 comments on commit dca7e71

Please sign in to comment.