Skip to content

Commit

Permalink
feat(java): reduce metastring hashcode payload for small string(<=16 …
Browse files Browse the repository at this point in the history
…bytes) (#1909)

## What does this PR do?
If a meta string is less than 16 bytes, we skip write hashcode to reduce
space cost. This will bring a big gain since most metastring-encoded
ClassName/EnumName are less than 16 bytes . And package names are much
less compared to classname, thus can save space by dict encoding
sharing.

## Does this PR introduce any user-facing change?

<!--
If any user-facing interface changes, please [open an
issue](https://github.com/apache/fury/issues/new/choose) describing the
need to do so and update the document if necessary.
-->

- [ ] Does this PR introduce any public API change?
- [ ] Does this PR introduce any binary protocol compatibility change?

## Benchmark
    // old size 391
// Benchmark (bufferType) (objectType) (references) Mode Cnt Score Error
Units
// UserTypeDeserializeSuite.fury_deserialize array MEDIA_CONTENT false
thrpt 100 2751601.402 ± 28811.825 ops/s

    // new size: 377
// Benchmark (bufferType) (objectType) (references) Mode Cnt Score Error
Units
// UserTypeDeserializeSuite.fury_deserialize array MEDIA_CONTENT false
thrpt 100 2748329.241 ± 28163.821 ops/s
  • Loading branch information
chaokunyang authored Oct 27, 2024
1 parent b222660 commit 57a9eae
Show file tree
Hide file tree
Showing 23 changed files with 528 additions and 74 deletions.
1 change: 1 addition & 0 deletions LICENSE
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,7 @@ The text of each license is also included in licenses/LICENSE-[project].txt.
java/fury-core/src/main/java/org/apache/fury/collection/IdentityMap.java
java/fury-core/src/main/java/org/apache/fury/collection/IdentityObjectIntMap.java
java/fury-core/src/main/java/org/apache/fury/collection/LongMap.java
java/fury-core/src/main/java/org/apache/fury/collection/LongLongMap.java
java/fury-core/src/main/java/org/apache/fury/collection/ObjectIntMap.java
java/fury-core/src/main/java/org/apache/fury/type/Generics.java
java/fury-core/src/test/java/org/apache/fury/type/GenericsTest.java
Expand Down
29 changes: 20 additions & 9 deletions go/fury/type.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package fury

import (
"fmt"
"github.com/apache/fury/go/fury/meta"
"hash/fnv"
"reflect"
"regexp"
Expand Down Expand Up @@ -136,6 +137,7 @@ const (
NotSupportCrossLanguage = 0
useStringValue = 0
useStringId = 1
SMALL_STRING_THRESHOLD = 16
)

var (
Expand Down Expand Up @@ -551,14 +553,19 @@ func (r *typeResolver) writeMetaString(buffer *ByteBuffer, str string) error {
dynamicStringId := r.dynamicStringId
r.dynamicStringId += 1
r.dynamicStringToId[str] = dynamicStringId
buffer.WriteVarInt32(int32(len(str) << 1))
// TODO this hash should be unique, since we don't compare data equality for performance
h := fnv.New64a()
if _, err := h.Write([]byte(str)); err != nil {
return err
length := len(str)
buffer.WriteVarInt32(int32(length << 1))
if length <= SMALL_STRING_THRESHOLD {
buffer.WriteByte_(uint8(meta.UTF_8))
} else {
// TODO this hash should be unique, since we don't compare data equality for performance
h := fnv.New64a()
if _, err := h.Write([]byte(str)); err != nil {
return err
}
hash := int64(h.Sum64() & 0xffffffffffffff00)
buffer.WriteInt64(hash)
}
hash := int64(h.Sum64() & 0xffffffffffffff00)
buffer.WriteInt64(hash)
if len(str) > MaxInt16 {
return fmt.Errorf("too long string: %s", str)
}
Expand All @@ -573,8 +580,12 @@ func (r *typeResolver) readMetaString(buffer *ByteBuffer) (string, error) {
header := buffer.ReadVarInt32()
var length = int(header >> 1)
if header&0b1 == 0 {
// TODO support use computed hash
buffer.ReadInt64()
if length <= SMALL_STRING_THRESHOLD {
buffer.ReadByte_()
} else {
// TODO support use computed hash
buffer.ReadInt64()
}
str := string(buffer.ReadBinary(length))
dynamicStringId := r.dynamicStringId
r.dynamicStringId += 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
*/
@SuppressWarnings("unchecked")
public class FuryObjectMap<K, V> {
static final long MASK_NUMBER = 0x9E3779B97F4A7C15L;
static final Object dummy = new Object();

public int size;
Expand Down Expand Up @@ -135,7 +136,7 @@ public FuryObjectMap(int initialCapacity, float loadFactor) {
* {@code return item.hashCode() & mask;}
*/
protected int place(K item) {
return (int) (item.hashCode() * 0x9E3779B97F4A7C15L >>> shift);
return (int) (item.hashCode() * MASK_NUMBER >>> shift);
}

/**
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
/* Copyright (c) 2008-2023, Nathan Sweet
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided with the distribution.
* - Neither the name of Esoteric Software nor the names of its contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING,
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
* SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */

package org.apache.fury.collection;

import static org.apache.fury.collection.FuryObjectMap.MASK_NUMBER;

import org.apache.fury.annotation.Internal;
import org.apache.fury.util.Preconditions;

/**
* A fast linear hash probe based map whose key is two long values `(long k1, long k2)`. This map
* can avoid creating a java object for key to save memory/cpu cost.
*/
// The linear probed hash is derived from
// https://github.com/EsotericSoftware/kryo/blob/135df69526615bb3f6b34846e58ba3fec3b631c3/src/com/esotericsoftware/kryo/util/IntMap.java.
@SuppressWarnings("unchecked")
@Internal
public final class LongLongMap<V> {
private static final class LongLongKey {
private final long k1;

public LongLongKey(long k1, long k2) {
this.k1 = k1;
this.k2 = k2;
}

private final long k2;

@Override
public String toString() {
return "LongLongKey{" + "k1=" + k1 + ", k2=" + k2 + '}';
}
}

public int size;
LongLongKey[] keyTable;
V[] valueTable;
private final float loadFactor;
private int threshold;

private int shift;

private int mask;

/**
* Creates a new map with the specified initial capacity and load factor. This map will hold
* initialCapacity items before growing the backing table.
*
* @param initialCapacity If not a power of two, it is increased to the next nearest power of two.
*/
public LongLongMap(int initialCapacity, float loadFactor) {
Preconditions.checkArgument(
0 <= loadFactor && loadFactor <= 1, "loadFactor %s must be > 0 and < 1", loadFactor);
this.loadFactor = loadFactor;
int tableSize = FuryObjectMap.tableSize(initialCapacity, loadFactor);
threshold = (int) (tableSize * loadFactor);
mask = tableSize - 1;
shift = Long.numberOfLeadingZeros(mask);
keyTable = new LongLongKey[tableSize];
valueTable = (V[]) new Object[tableSize];
}

private int place(long k1, long k2) {
return (int) ((k1 * 31 + k2) * MASK_NUMBER >>> shift);
}

/**
* Returns the index of the key if already present, else -(index + 1) for the next empty index.
* This can be overridden in this pacakge to compare for equality differently than {@link
* Object#equals(Object)}.
*/
private int locateKey(long k1, long k2) {
LongLongKey[] keyTable = this.keyTable;
int mask = this.mask;
for (int i = place(k1, k2); ; i = i + 1 & mask) {
LongLongKey other = keyTable[i];
if (other == null) {
return -(i + 1); // Empty space is available.
}
if (other.k1 == k1 && other.k2 == k2) {
return i; // Same key was found.
}
}
}

public V put(long k1, long k2, V value) {
int i = locateKey(k1, k2);
if (i >= 0) { // Existing key was found.
V[] valueTable = this.valueTable;
V oldValue = valueTable[i];
valueTable[i] = value;
return oldValue;
}
i = -(i + 1); // Empty space was found.
keyTable[i] = new LongLongKey(k1, k2);
valueTable[i] = value;
if (++size >= threshold) {
resize(keyTable.length << 1);
}
return null;
}

public V get(long k1, long k2) {
LongLongKey[] keyTable = this.keyTable;
for (int i = place(k1, k2); ; i = i + 1 & mask) {
LongLongKey other = keyTable[i];
if (other == null) {
return null;
}
if (other.k1 == k1 && other.k2 == k2) {
return valueTable[i];
}
}
}

private void resize(int newSize) {
int oldCapacity = keyTable.length;
threshold = (int) (newSize * loadFactor);
mask = newSize - 1;
shift = Long.numberOfLeadingZeros(mask);
LongLongKey[] oldKeyTable = keyTable;
V[] oldValueTable = valueTable;
keyTable = new LongLongKey[newSize];
valueTable = (V[]) new Object[newSize];
if (size > 0) {
for (int i = 0; i < oldCapacity; i++) {
LongLongKey key = oldKeyTable[i];
if (key != null) {
for (int j = place(key.k1, key.k2); ; j = (j + 1) & mask) {
if (keyTable[j] == null) {
keyTable[j] = new LongLongKey(key.k1, key.k2);
valueTable[j] = oldValueTable[i];
break;
}
}
}
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@

package org.apache.fury.collection;

import static org.apache.fury.collection.FuryObjectMap.MASK_NUMBER;

import java.util.Arrays;

// Derived from
Expand Down Expand Up @@ -141,7 +143,7 @@ public LongMap(LongMap<? extends V> map) {
* {@code return item.hashCode() & mask;}
*/
protected int place(long item) {
return (int) (item * 0x9E3779B97F4A7C15L >>> shift);
return (int) (item * MASK_NUMBER >>> shift);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@

package org.apache.fury.collection;

import static org.apache.fury.collection.FuryObjectMap.MASK_NUMBER;

import java.util.HashMap;
import java.util.Map;
import java.util.function.BiConsumer;
Expand Down Expand Up @@ -59,7 +61,7 @@ public ObjectIntMap(int initialCapacity, float loadFactor) {
}

protected int place(K item) {
return (int) (item.hashCode() * 0x9E3779B97F4A7C15L >>> shift);
return (int) (item.hashCode() * MASK_NUMBER >>> shift);
}

int locateKey(K key) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ private static Object readFromChannel(
Fury fury, ReadableByteChannel channel, Function<MemoryBuffer, Object> action) {
try {
MemoryBuffer buf = fury.getBuffer();
buf.readerIndex(0);
ByteBuffer byteBuffer = ByteBuffer.allocate(4);
byteBuffer.order(ByteOrder.LITTLE_ENDIAN);
readByteBuffer(channel, byteBuffer, 4);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,13 @@ public static long getInt64(Object o, long pos) {
return Platform.IS_LITTLE_ENDIAN ? v : Long.reverseBytes(v);
}

public static void putInt64(byte[] o, int index, long value) {
if (!Platform.IS_LITTLE_ENDIAN) {
value = Long.reverseBytes(value);
}
Platform.putLong(o, Platform.BYTE_ARRAY_OFFSET + index, value);
}

public static void putFloat32(Object o, long pos, float value) {
int v = Float.floatToRawIntBits(value);
if (!Platform.IS_LITTLE_ENDIAN) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2174,6 +2174,42 @@ public void readBytes(byte[] dst) {
readBytes(dst, 0, dst.length);
}

/** Read {@code len} bytes into a long using little-endian order. */
public long readBytesAsInt64(int len) {
int readerIdx = readerIndex;
// use subtract to avoid overflow
int remaining = size - readerIdx;
if (remaining >= 8) {
readerIndex = readerIdx + len;
long v =
UNSAFE.getLong(heapMemory, address + readerIdx)
& (0xffffffffffffffffL >>> ((8 - len) * 8));
return LITTLE_ENDIAN ? v : Long.reverseBytes(v);
}
return slowReadBytesAsInt64(remaining, len);
}

private long slowReadBytesAsInt64(int remaining, int len) {
if (remaining < len) {
streamReader.fillBuffer(len - remaining);
}
int readerIdx = readerIndex;
readerIndex = readerIdx + len;
long result = 0;
byte[] heapMemory = this.heapMemory;
if (heapMemory != null) {
for (int i = 0, start = heapOffset + readerIdx; i < len; i++) {
result |= (((long) heapMemory[start + i]) & 0xff) << (i * 8);
}
} else {
long start = address + readerIdx;
for (int i = 0; i < len; i++) {
result |= ((long) UNSAFE.getByte(null, start + i) & 0xff) << (i * 8);
}
}
return result;
}

public int read(ByteBuffer dst) {
int readerIdx = readerIndex;
int len = dst.remaining();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1619,6 +1619,8 @@ public void writeClassInternal(MemoryBuffer buffer, ClassInfo classInfo) {
if (classInfo.classId != NO_CLASS_ID) {
buffer.writeVarUint32(classInfo.classId << 1);
} else {
// let the lowermost bit of next byte be set, so the deserialization can know
// whether need to read class by name in advance
metaStringResolver.writeMetaStringBytesWithFlag(buffer, classInfo.packageNameBytes);
metaStringResolver.writeMetaStringBytes(buffer, classInfo.classNameBytes);
}
Expand All @@ -1634,6 +1636,8 @@ public Class<?> readClassInternal(MemoryBuffer buffer) {
int header = buffer.readVarUint32Small14();
final ClassInfo classInfo;
if ((header & 0b1) != 0) {
// let the lowermost bit of next byte be set, so the deserialization can know
// whether need to read class by name in advance
MetaStringBytes packageBytes = metaStringResolver.readMetaStringBytesWithFlag(buffer, header);
MetaStringBytes simpleClassNameBytes = metaStringResolver.readMetaStringBytes(buffer);
classInfo = loadBytesToClassInfo(packageBytes, simpleClassNameBytes);
Expand Down
Loading

0 comments on commit 57a9eae

Please sign in to comment.