Skip to content

Commit

Permalink
Merge pull request #2077 from ClickHouse/support_variant_type
Browse files Browse the repository at this point in the history
Support variant type
  • Loading branch information
chernser authored Feb 5, 2025
2 parents 6788dbc + a34707e commit badbdcf
Show file tree
Hide file tree
Showing 18 changed files with 1,254 additions and 53 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,19 @@

import java.io.Serializable;
import java.lang.reflect.Array;
import java.math.BigInteger;
import java.time.OffsetDateTime;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.TimeZone;

/**
Expand All @@ -65,6 +71,7 @@ public final class ClickHouseColumn implements Serializable {
private static final String KEYWORD_OBJECT = ClickHouseDataType.Object.name();
private static final String KEYWORD_MAP = ClickHouseDataType.Map.name();
private static final String KEYWORD_NESTED = ClickHouseDataType.Nested.name();
private static final String KEYWORD_VARIANT = ClickHouseDataType.Variant.name();

private int columnCount;
private int columnIndex;
Expand Down Expand Up @@ -92,6 +99,14 @@ public final class ClickHouseColumn implements Serializable {

private ClickHouseValue template;

private Map<Class<?>, Integer> classToVariantOrdNumMap;

private Map<Class<?>, Integer> arrayToVariantOrdNumMap;

private Map<Class<?>, Integer> mapKeyToVariantOrdNumMap;
private Map<Class<?>, Integer> mapValueToVariantOrdNumMap;


private static ClickHouseColumn update(ClickHouseColumn column) {
column.enumConstants = ClickHouseEnum.EMPTY;
int size = column.parameters.size();
Expand Down Expand Up @@ -273,6 +288,9 @@ private static ClickHouseColumn update(ClickHouseColumn column) {
case Nothing:
column.template = ClickHouseEmptyValue.INSTANCE;
break;
case Variant:
column.template = ClickHouseTupleValue.of();
break;
default:
break;
}
Expand Down Expand Up @@ -398,7 +416,8 @@ protected static int readColumn(String args, int startIndex, int len, String nam
fixedLength = false;
estimatedLength++;
} else if (args.startsWith(matchedKeyword = KEYWORD_TUPLE, i)
|| args.startsWith(matchedKeyword = KEYWORD_OBJECT, i)) {
|| args.startsWith(matchedKeyword = KEYWORD_OBJECT, i)
|| args.startsWith(matchedKeyword = KEYWORD_VARIANT, i)) {
int index = args.indexOf('(', i + matchedKeyword.length());
if (index < i) {
throw new IllegalArgumentException(ERROR_MISSING_NESTED_TYPE);
Expand All @@ -410,12 +429,22 @@ protected static int readColumn(String args, int startIndex, int len, String nam
if (c == ')') {
break;
} else if (c != ',' && !Character.isWhitespace(c)) {
String columnName = "";
i = readColumn(args, i, endIndex, "", nestedColumns);
}
}
if (nestedColumns.isEmpty()) {
throw new IllegalArgumentException("Tuple should have at least one nested column");
}

List<ClickHouseDataType> variantDataTypes = new ArrayList<>();
if (matchedKeyword.equals(KEYWORD_VARIANT)) {
nestedColumns.sort(Comparator.comparing(o -> o.getDataType().name()));
nestedColumns.forEach(c -> {
c.columnName = "v." + c.getDataType().name();
variantDataTypes.add(c.dataType);
});
}
column = new ClickHouseColumn(ClickHouseDataType.valueOf(matchedKeyword), name,
args.substring(startIndex, endIndex + 1), nullable, lowCardinality, null, nestedColumns);
for (ClickHouseColumn n : nestedColumns) {
Expand All @@ -424,6 +453,39 @@ protected static int readColumn(String args, int startIndex, int len, String nam
fixedLength = false;
}
}
column.classToVariantOrdNumMap = ClickHouseDataType.buildVariantMapping(variantDataTypes);

for (int ordNum = 0; ordNum < nestedColumns.size(); ordNum++) {
ClickHouseColumn nestedColumn = nestedColumns.get(ordNum);
if (nestedColumn.getDataType() == ClickHouseDataType.Array) {
Set<Class<?>> classSet = ClickHouseDataType.DATA_TYPE_TO_CLASS.get(nestedColumn.arrayBaseColumn.dataType);
if (classSet != null) {
if (column.arrayToVariantOrdNumMap == null) {
column.arrayToVariantOrdNumMap = new HashMap<>();
}
for (Class<?> c : classSet) {
column.arrayToVariantOrdNumMap.put(c, ordNum);
}
}
} else if (nestedColumn.getDataType() == ClickHouseDataType.Map) {
Set<Class<?>> keyClassSet = ClickHouseDataType.DATA_TYPE_TO_CLASS.get(nestedColumn.getKeyInfo().getDataType());
Set<Class<?>> valueClassSet = ClickHouseDataType.DATA_TYPE_TO_CLASS.get(nestedColumn.getValueInfo().getDataType());
if (keyClassSet != null && valueClassSet != null) {
if (column.mapKeyToVariantOrdNumMap == null) {
column.mapKeyToVariantOrdNumMap = new HashMap<>();
}
if (column.mapValueToVariantOrdNumMap == null) {
column.mapValueToVariantOrdNumMap = new HashMap<>();
}
for (Class<?> c : keyClassSet) {
column.mapKeyToVariantOrdNumMap.put(c, ordNum);
}
for (Class<?> c : valueClassSet) {
column.mapValueToVariantOrdNumMap.put(c, ordNum);
}
}
}
}
}

if (column == null) {
Expand Down Expand Up @@ -627,6 +689,52 @@ public boolean isAggregateFunction() {

}

public int getVariantOrdNum(Object value) {
if (value != null && value.getClass().isArray()) {
// TODO: add cache by value class
Class<?> c = value.getClass();
while (c.isArray()) {
c = c.getComponentType();
}
return arrayToVariantOrdNumMap.getOrDefault(c, -1);
} else if (value != null && value instanceof List<?>) {
// TODO: add cache by instance of the list
Object tmpV = ((List) value).get(0);
Class<?> valueClass = tmpV.getClass();
while (tmpV instanceof List<?>) {
tmpV = ((List) tmpV).get(0);
valueClass = tmpV.getClass();
}
return arrayToVariantOrdNumMap.getOrDefault(valueClass, -1);
} else if (value != null && value instanceof Map<?,?>) {
// TODO: add cache by instance of map
Map<?, ?> map = (Map<?, ?>) value;
if (!map.isEmpty()) {
for (Map.Entry<?, ?> e : map.entrySet()) {
if (e.getValue() != null) {
int keyOrdNum = mapKeyToVariantOrdNumMap.getOrDefault(e.getKey().getClass(), -1);
int valueOrdNum = mapValueToVariantOrdNumMap.getOrDefault(e.getValue().getClass(), -1);

if (keyOrdNum == valueOrdNum) {
return valueOrdNum; // exact match
} else if (keyOrdNum != -1 && valueOrdNum != -1) {
if (ClickHouseDataType.DATA_TYPE_TO_CLASS.get(nested.get(keyOrdNum).getValueInfo().getDataType()).contains(e.getValue().getClass())){
return keyOrdNum; // can write to map found by key class because values are compatible
} else {
return valueOrdNum;
}
}

break;
}
}
}
return -1;
} else {
return classToVariantOrdNumMap.getOrDefault(value.getClass(), -1);
}
}

public boolean isArray() {
return dataType == ClickHouseDataType.Array;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,22 +1,35 @@
package com.clickhouse.data;

import java.lang.reflect.Array;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.net.Inet4Address;
import java.net.Inet6Address;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.time.ZonedDateTime;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.UUID;
import java.util.stream.Collectors;

import com.clickhouse.data.value.ClickHouseGeoMultiPolygonValue;
import com.clickhouse.data.value.ClickHouseGeoPointValue;
import com.clickhouse.data.value.ClickHouseGeoPolygonValue;
import com.clickhouse.data.value.ClickHouseGeoRingValue;
import com.clickhouse.data.value.UnsignedByte;
import com.clickhouse.data.value.UnsignedInteger;
import com.clickhouse.data.value.UnsignedLong;
Expand Down Expand Up @@ -101,7 +114,111 @@ public enum ClickHouseDataType {
Nothing(Object.class, false, true, false, 0, 0, 0, 0, 0, true),
SimpleAggregateFunction(String.class, true, true, false, 0, 0, 0, 0, 0, false),
// implementation-defined intermediate state
AggregateFunction(String.class, true, true, false, 0, 0, 0, 0, 0, true);
AggregateFunction(String.class, true, true, false, 0, 0, 0, 0, 0, true),
Variant(List.class, true, true, false, 0, 0, 0, 0, 0, true),

;

public static final List<ClickHouseDataType> ORDERED_BY_RANGE_INT_TYPES =
Collections.unmodifiableList(Arrays.asList(
Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Int128, UInt128, Int256, UInt256
));

public static final List<ClickHouseDataType> ORDERED_BY_RANGE_DECIMAL_TYPES =
Collections.unmodifiableList(Arrays.asList(
Float32, Float64, Decimal32, Decimal64, Decimal128, Decimal256, Decimal
));

public static Map<Class<?>, Integer> buildVariantMapping(List<ClickHouseDataType> variantDataTypes) {
Map<Class<?>, Integer> variantMapping = new HashMap<>();

TreeMap<ClickHouseDataType, Integer> intTypesMappings = new TreeMap<>(Comparator.comparingInt(ORDERED_BY_RANGE_INT_TYPES::indexOf));
TreeMap<ClickHouseDataType, Integer> decTypesMappings = new TreeMap<>(Comparator.comparingInt(ORDERED_BY_RANGE_DECIMAL_TYPES::indexOf));

for (int ordNum = 0; ordNum < variantDataTypes.size(); ordNum++) {
ClickHouseDataType dataType = variantDataTypes.get(ordNum);
Set<Class<?>> classSet = DATA_TYPE_TO_CLASS.get(dataType);

final int finalOrdNum = ordNum;
if (classSet != null) {
if (ORDERED_BY_RANGE_INT_TYPES.contains(dataType)) {
intTypesMappings.put(dataType, ordNum);
} else if (ORDERED_BY_RANGE_DECIMAL_TYPES.contains(dataType)) {
decTypesMappings.put(dataType, ordNum);
} else {
classSet.forEach(c -> variantMapping.put(c, finalOrdNum));
}
}
}

// add integers
for (java.util.Map.Entry<ClickHouseDataType, Integer> entry : intTypesMappings.entrySet()) {
DATA_TYPE_TO_CLASS.get(entry.getKey()).forEach(c -> variantMapping.put(c, entry.getValue()));
}
// add decimals
for (java.util.Map.Entry<ClickHouseDataType, Integer> entry : decTypesMappings.entrySet()) {
DATA_TYPE_TO_CLASS.get(entry.getKey()).forEach(c -> variantMapping.put(c, entry.getValue()));
}

return variantMapping;
}

static final Map<ClickHouseDataType, Set<Class<?>>> DATA_TYPE_TO_CLASS = dataTypeClassMap();
static Map<ClickHouseDataType, Set<Class<?>>> dataTypeClassMap() {
Map<ClickHouseDataType, Set<Class<?>>> map = new HashMap<>();

// We allow to write short to UInt8 even it may not fit. It is done because we have to allow users to utilize UInt* data types.
List<Class<?>> allNumberClassesOrderedBySize = Arrays.asList(byte.class, Byte.class, short.class, Short.class, int.class, Integer.class, long.class, Long.class, BigInteger.class);
Set<Class<?>> setOfAllNumberClasses = Collections.unmodifiableSet(new HashSet<>(allNumberClassesOrderedBySize));
map.put(UInt256, setOfAllNumberClasses);
map.put(Int256, setOfAllNumberClasses);
map.put(UInt128, setOfAllNumberClasses);
map.put(Int128, setOfAllNumberClasses);
map.put(UInt64, setOfAllNumberClasses);

map.put(Int64, setOf(byte.class, Byte.class, short.class, Short.class, int.class, Integer.class, long.class, Long.class));
map.put(UInt32, setOf(byte.class, Byte.class, short.class, Short.class, int.class, Integer.class, long.class, Long.class ));
map.put(Int32, setOf(byte.class, Byte.class, short.class, Short.class, int.class, Integer.class));
map.put(UInt16, setOf(byte.class, Byte.class, short.class, Short.class, int.class, Integer.class));
map.put(Int16, setOf(byte.class, Byte.class, short.class, Short.class));
map.put(UInt8, setOf(byte.class, Byte.class, short.class, Short.class));
map.put(Int8, setOf(byte.class, Byte.class));

map.put(Bool, setOf(boolean.class, Boolean.class));
map.put(String, setOf(String.class));
map.put(Float64, setOf(float.class, Float.class, double.class, Double.class));
map.put(Float32, setOf(float.class, Float.class));
map.put(Decimal, setOf(float.class, Float.class, double.class, Double.class, BigDecimal.class));
map.put(Decimal256, setOf(float.class, Float.class, double.class, Double.class, BigDecimal.class));
map.put(Decimal128, setOf(float.class, Float.class, double.class, Double.class, BigDecimal.class));
map.put(Decimal64, setOf(float.class, Float.class, double.class, Double.class));
map.put(Decimal32, setOf(float.class, Float.class));

map.put(IPv4, setOf(Inet4Address.class));
map.put(IPv6, setOf(Inet6Address.class));
map.put(UUID, setOf(java.util.UUID.class));

map.put(Point, setOf(double[].class, ClickHouseGeoPointValue.class));
map.put(Ring, setOf(double[][].class, ClickHouseGeoRingValue.class));
map.put(Polygon, setOf(double[][][].class, ClickHouseGeoPolygonValue.class));
map.put(MultiPolygon, setOf(double[][][][].class, ClickHouseGeoMultiPolygonValue.class));

map.put(Date, setOf(LocalDateTime.class, LocalDate.class, ZonedDateTime.class));
map.put(Date32, setOf(LocalDateTime.class, LocalDate.class, ZonedDateTime.class));
map.put(DateTime64, setOf(LocalDateTime.class, ZonedDateTime.class));
map.put(DateTime32, setOf(LocalDateTime.class, ZonedDateTime.class));
map.put(DateTime, setOf(LocalDateTime.class, ZonedDateTime.class));

map.put(Enum8, setOf(java.lang.String.class,byte.class, Byte.class, short.class, Short.class, int.class, Integer.class, long.class, Long.class));
map.put(Enum16, setOf(java.lang.String.class,byte.class, Byte.class, short.class, Short.class, int.class, Integer.class, long.class, Long.class));
map.put(Array, setOf(List.class, Object[].class, byte[].class, short[].class, int[].class, long[].class, boolean[].class));
return map;
}

private static Set<Class<?>> setOf(Class<?>... args) {
return Collections.unmodifiableSet(new HashSet<>(Arrays.stream(args).collect(Collectors.toList())));
}


/**
* Immutable set(sorted) for all aliases.
Expand Down
Loading

0 comments on commit badbdcf

Please sign in to comment.