Skip to content

Commit

Permalink
[Feature][core] support arrow transfers data to SeatunnelRow in arrow…
Browse files Browse the repository at this point in the history
… format
  • Loading branch information
hawk9821 committed Dec 5, 2024
1 parent e6f92fd commit 6d0c464
Show file tree
Hide file tree
Showing 26 changed files with 1,515 additions and 1,204 deletions.
12 changes: 12 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,7 @@
<enableSourceJarCreation>true</enableSourceJarCreation>

<hadoop-aws.version>3.1.4</hadoop-aws.version>
<arrow.version>15.0.1</arrow.version>

</properties>

Expand Down Expand Up @@ -489,6 +490,17 @@
<scope>provided</scope>
</dependency>

<dependency>
<groupId>org.apache.arrow</groupId>
<artifactId>arrow-vector</artifactId>
<version>${arrow.version}</version>
</dependency>
<dependency>
<groupId>org.apache.arrow</groupId>
<artifactId>arrow-memory-netty</artifactId>
<version>${arrow.version}</version>
</dependency>

</dependencies>
</dependencyManagement>

Expand Down
7 changes: 7 additions & 0 deletions seatunnel-common/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,13 @@
<classifier>optional</classifier>
</dependency>

<dependency>
<groupId>org.apache.seatunnel</groupId>
<artifactId>seatunnel-arrow</artifactId>
<version>${project.version}</version>
<classifier>optional</classifier>
</dependency>

<dependency>
<groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.seatunnel.connectors.seatunnel.common.source.arrow.converter;

import org.apache.seatunnel.shade.org.apache.arrow.vector.FieldVector;
import org.apache.seatunnel.shade.org.apache.arrow.vector.types.Types;

import java.util.Map;
import java.util.function.Function;

public interface Converter<T extends FieldVector> {

String ARRAY_KEY = "ARRAY";
String MAP_KEY = "KEY";
String MAP_VALUE = "VALUE";

Object convert(int rowIndex, T fieldVector);

default Object convert(int rowIndex, T fieldVector, Map<String, Function> genericsConverters) {
throw new UnsupportedOperationException("Unsupported generics convert");
}

boolean support(Types.MinorType type);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.seatunnel.connectors.seatunnel.common.source.arrow.converter;

import org.apache.seatunnel.shade.org.apache.arrow.vector.FieldVector;
import org.apache.seatunnel.shade.org.apache.arrow.vector.types.Types;

public class DefaultConverter implements Converter<FieldVector> {

@Override
public Object convert(int rowIndex, FieldVector fieldVector) {
return fieldVector.isNull(rowIndex) ? null : fieldVector.getObject(rowIndex);
}

@Override
public boolean support(Types.MinorType type) {
return false;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.seatunnel.connectors.seatunnel.common.source.arrow.converter;

import org.apache.seatunnel.shade.org.apache.arrow.vector.complex.FixedSizeListVector;
import org.apache.seatunnel.shade.org.apache.arrow.vector.types.Types;

import java.time.LocalDateTime;
import java.time.ZoneId;
import java.time.ZoneOffset;
import java.util.List;
import java.util.Map;
import java.util.function.Function;
import java.util.stream.Collectors;

public class FixedSizeListConverter implements Converter<FixedSizeListVector> {
@Override
public Object convert(int rowIndex, FixedSizeListVector fieldVector) {
return fieldVector.isNull(rowIndex) ? null : fieldVector.getObject(rowIndex);
}

@Override
public Object convert(
int rowIndex,
FixedSizeListVector fieldVector,
Map<String, Function> genericsConverters) {
if (fieldVector.isNull(rowIndex)) {
return null;
}
List<?> listData = fieldVector.getObject(rowIndex);
Function converter = genericsConverters.get(ARRAY_KEY);
return listData.stream()
.map(
item -> {
if (item instanceof LocalDateTime) {
LocalDateTime localDateTime =
((LocalDateTime) item)
.atZone(ZoneOffset.UTC)
.withZoneSameInstant(ZoneId.systemDefault())
.toLocalDateTime();
return converter.apply(localDateTime);
} else {
return converter.apply(item);
}
})
.collect(Collectors.toList());
}

@Override
public boolean support(Types.MinorType type) {
return Types.MinorType.FIXED_SIZE_LIST == type;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.seatunnel.connectors.seatunnel.common.source.arrow.converter;

import org.apache.seatunnel.shade.org.apache.arrow.vector.complex.LargeListVector;
import org.apache.seatunnel.shade.org.apache.arrow.vector.types.Types;

import java.time.LocalDateTime;
import java.time.ZoneId;
import java.time.ZoneOffset;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.function.Function;
import java.util.stream.Collectors;

public class LargeListConverter implements Converter<LargeListVector> {
@Override
public Object convert(int rowIndex, LargeListVector fieldVector) {
return fieldVector.isNull(rowIndex) ? null : fieldVector.getObject(rowIndex);
}

@Override
public Object convert(
int rowIndex, LargeListVector fieldVector, Map<String, Function> genericsConverters) {
if (fieldVector.isNull(rowIndex)) {
return null;
}
if (fieldVector.isEmpty(rowIndex)) {
return Collections.emptyList();
}
List<?> listData = fieldVector.getObject(rowIndex);
Function converter = genericsConverters.get(ARRAY_KEY);
return listData.stream()
.map(
item -> {
if (item instanceof LocalDateTime) {
LocalDateTime localDateTime =
((LocalDateTime) item)
.atZone(ZoneOffset.UTC)
.withZoneSameInstant(ZoneId.systemDefault())
.toLocalDateTime();
return converter.apply(localDateTime);
} else {
return converter.apply(item);
}
})
.collect(Collectors.toList());
}

@Override
public boolean support(Types.MinorType type) {
return Types.MinorType.LARGELIST == type;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.seatunnel.connectors.seatunnel.common.source.arrow.converter;

import org.apache.seatunnel.shade.org.apache.arrow.vector.complex.ListVector;
import org.apache.seatunnel.shade.org.apache.arrow.vector.types.Types;

import java.time.LocalDateTime;
import java.time.ZoneId;
import java.time.ZoneOffset;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.function.Function;
import java.util.stream.Collectors;

public class ListConverter implements Converter<ListVector> {
@Override
public Object convert(int rowIndex, ListVector fieldVector) {
return fieldVector.isNull(rowIndex) ? null : fieldVector.getObject(rowIndex);
}

@Override
public Object convert(
int rowIndex, ListVector fieldVector, Map<String, Function> genericsConverters) {
if (fieldVector.isNull(rowIndex)) {
return null;
}
if (fieldVector.isEmpty(rowIndex)) {
return Collections.emptyList();
}
List<?> listData = fieldVector.getObject(rowIndex);
Function converter = genericsConverters.get(ARRAY_KEY);
return listData.stream()
.map(
item -> {
if (item instanceof LocalDateTime) {
LocalDateTime localDateTime =
((LocalDateTime) item)
.atZone(ZoneOffset.UTC)
.withZoneSameInstant(ZoneId.systemDefault())
.toLocalDateTime();
return converter.apply(localDateTime);
} else {
return converter.apply(item);
}
})
.collect(Collectors.toList());
}

@Override
public boolean support(Types.MinorType type) {
return Types.MinorType.LIST == type;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.seatunnel.connectors.seatunnel.common.source.arrow.converter;

import org.apache.seatunnel.shade.org.apache.arrow.vector.complex.MapVector;
import org.apache.seatunnel.shade.org.apache.arrow.vector.complex.impl.UnionMapReader;
import org.apache.seatunnel.shade.org.apache.arrow.vector.types.Types;

import java.time.LocalDateTime;
import java.time.ZoneId;
import java.time.ZoneOffset;
import java.util.HashMap;
import java.util.Map;
import java.util.function.Function;

public class MapConverter implements Converter<MapVector> {
@Override
public Object convert(int rowIndex, MapVector fieldVector) {
return fieldVector.isNull(rowIndex) ? null : fieldVector.getObject(rowIndex);
}

@Override
public Object convert(
int rowIndex, MapVector fieldVector, Map<String, Function> genericsConverters) {
UnionMapReader reader = fieldVector.getReader();
reader.setPosition(rowIndex);
Map<Object, Object> mapValue = new HashMap<>();
Function keyConverter = genericsConverters.get(MAP_KEY);
Function valueConverter = genericsConverters.get(MAP_VALUE);
while (reader.next()) {
Object key = keyConverter.apply(processTimeZone(reader.key().readObject()));
Object value = valueConverter.apply(processTimeZone(reader.value().readObject()));
mapValue.put(key, value);
}
return mapValue;
}

private Object processTimeZone(Object value) {
if (value instanceof LocalDateTime) {
return ((LocalDateTime) value)
.atZone(ZoneOffset.UTC)
.withZoneSameInstant(ZoneId.systemDefault())
.toLocalDateTime();
} else {
return value;
}
}

@Override
public boolean support(Types.MinorType type) {
return Types.MinorType.MAP == type;
}
}
Loading

0 comments on commit 6d0c464

Please sign in to comment.