Skip to content

Commit

Permalink
Merge pull request #324 from dynatrace-oss/distinct-count-util
Browse files Browse the repository at this point in the history
make utility function for the deduplication for hash tokens public
  • Loading branch information
oertl authored Feb 4, 2025
2 parents 13e58de + b809887 commit a902c9e
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 20 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@

import java.util.Arrays;

class DistinctCountUtil {
/** A utility functions for distinct counting. */
public final class DistinctCountUtil {

private DistinctCountUtil() {}

Expand Down Expand Up @@ -191,13 +192,13 @@ static double solveMaximumLikelihoodEquation(
return x;
}

static int computeToken1(long hashValue) {
static int computeToken(long hashValue) {
int idx = (int) (hashValue >>> 38);
int nlz = Long.numberOfLeadingZeros(~(~hashValue << 26));
return (idx << 6) | nlz;
}

static long reconstructHash1(int token) {
static long reconstructHash(int token) {
long idx = token & 0xFFFFFFC0L;
return (0x3FFFFFFFFFL >>> token) | (idx << 32);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@ public HyperLogLog add(long hashValue) {
*/
@Override
public HyperLogLog addToken(int token) {
return add(DistinctCountUtil.reconstructHash1(token));
return add(DistinctCountUtil.reconstructHash(token));
}

/**
Expand All @@ -309,7 +309,7 @@ public HyperLogLog addToken(int token) {
* @return the 32-bit token
*/
public static int computeToken(long hashValue) {
return DistinctCountUtil.computeToken1(hashValue);
return DistinctCountUtil.computeToken(hashValue);
}

/**
Expand Down Expand Up @@ -353,7 +353,7 @@ public HyperLogLog add(long hashValue, StateChangeObserver stateChangeObserver)
*/
@Override
public HyperLogLog addToken(int token, StateChangeObserver stateChangeObserver) {
return add(DistinctCountUtil.reconstructHash1(token), stateChangeObserver);
return add(DistinctCountUtil.reconstructHash(token), stateChangeObserver);
}

// returns register change probability scaled by 2^64
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ public UltraLogLog add(long hashValue) {
*/
@Override
public UltraLogLog addToken(int token) {
return add(DistinctCountUtil.reconstructHash1(token));
return add(DistinctCountUtil.reconstructHash(token));
}

/**
Expand All @@ -226,7 +226,7 @@ public UltraLogLog addToken(int token) {
* @return the 32-bit token
*/
public static int computeToken(long hashValue) {
return DistinctCountUtil.computeToken1(hashValue);
return DistinctCountUtil.computeToken(hashValue);
}

/**
Expand Down Expand Up @@ -275,7 +275,7 @@ public UltraLogLog add(long hashValue, StateChangeObserver stateChangeObserver)
*/
@Override
public UltraLogLog addToken(int token, StateChangeObserver stateChangeObserver) {
return add(DistinctCountUtil.reconstructHash1(token), stateChangeObserver);
return add(DistinctCountUtil.reconstructHash(token), stateChangeObserver);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ void testSolveMaximumLikelihoodEquation() {
}

@Test
void testComputeToken1() {
void testComputeToken() {
SplittableRandom random = new SplittableRandom(0xbafc97ad730480acL);

int numCycles = 100;
Expand All @@ -175,20 +175,18 @@ void testComputeToken1() {
long mask = 0xFFFFFFC000000000L | (0x0000003FFFFFFFFFL >>> nlz);
long hash = (r | (0x0000002000000000L >>> nlz)) & mask;

int token = DistinctCountUtil.computeToken1(hash);
long reconstructedHash = DistinctCountUtil.reconstructHash1(token);
int tokenFromReconstructedHash = DistinctCountUtil.computeToken1(reconstructedHash);
int token = DistinctCountUtil.computeToken(hash);
long reconstructedHash = DistinctCountUtil.reconstructHash(token);
int tokenFromReconstructedHash = DistinctCountUtil.computeToken(reconstructedHash);
assertThat(reconstructedHash).isEqualTo(hash | (0x0000001FFFFFFFFFL >>> nlz));
assertThat(tokenFromReconstructedHash).isEqualTo(token);
}
}
}

private static TokenIterable fromSortedArray(int[] tokens) {
return new TokenIterable() {
@Override
public TokenIterator iterator() {
return new TokenIterator() {
return () ->
new TokenIterator() {
private int idx = 0;

@Override
Expand All @@ -201,8 +199,6 @@ public int nextToken() {
return tokens[idx++];
}
};
}
};
}

private static void testEstimationFromTokens(int distinctCount) {
Expand All @@ -215,7 +211,7 @@ private static void testEstimationFromTokens(int distinctCount) {

for (int i = 0; i < numIterations; ++i) {
for (int c = 0; c < distinctCount; ++c) {
tokens[c] = DistinctCountUtil.computeToken1(prg.nextLong());
tokens[c] = DistinctCountUtil.computeToken(prg.nextLong());
}
Arrays.sort(tokens);

Expand Down

0 comments on commit a902c9e

Please sign in to comment.