Skip to content

Commit

Permalink
[INLONG-11236][SDK] Transform SQL supports FIND_IN_SET function
Browse files Browse the repository at this point in the history
  • Loading branch information
ZKpLo committed Oct 1, 2024
1 parent e0d7f8d commit 762bfd1
Show file tree
Hide file tree
Showing 2 changed files with 136 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.inlong.sdk.transform.process.function;

import org.apache.inlong.sdk.transform.decode.SourceData;
import org.apache.inlong.sdk.transform.process.Context;
import org.apache.inlong.sdk.transform.process.operator.OperatorTools;
import org.apache.inlong.sdk.transform.process.parser.ValueParser;

import net.sf.jsqlparser.expression.Function;

/**
* LengthFunction -> FindInSetFunction(str,strList)
* description:
* - return a value in the range of 1 to N if the string str is in the string list strList consisting of N substrings.
* - return 0 if str is not in strList or if strList is the empty string.
* - return NULL if either argument is NULL.
* Note: `strList` is a string composed of substrings separated by ',' characters. This function does not work properly
* if the first argument contains a comma (,) character.
*/
@TransformFunction(names = {"find_in_set"})
public class FindInSetFunction implements ValueParser {

private final ValueParser strParser;
private final ValueParser strListParser;

public FindInSetFunction(Function expr) {
strParser = OperatorTools.buildParser(expr.getParameters().getExpressions().get(0));
strListParser = OperatorTools.buildParser(expr.getParameters().getExpressions().get(1));
}

@Override
public Object parse(SourceData sourceData, int rowIndex, Context context) {
Object strObj = strParser.parse(sourceData, rowIndex, context);
Object strListObj = strListParser.parse(sourceData, rowIndex, context);
if (strObj == null || strListObj == null) {
return null;
}
String str = OperatorTools.parseString(strObj);
String strList = OperatorTools.parseString(strListObj);
if (!strList.isEmpty()) {
String[] strArray = strList.split(",");
for (int i = 0; i < strArray.length; i++) {
if (str.equals(strArray[i])) {
return i + 1;
}
}
}
return 0;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.inlong.sdk.transform.process.function;

import org.apache.inlong.sdk.transform.decode.SourceDecoderFactory;
import org.apache.inlong.sdk.transform.encode.SinkEncoderFactory;
import org.apache.inlong.sdk.transform.pojo.TransformConfig;
import org.apache.inlong.sdk.transform.process.TransformProcessor;
import org.apache.inlong.sdk.transform.process.function.string.AbstractFunctionStringTestBase;

import org.junit.Assert;
import org.junit.Test;

import java.util.HashMap;
import java.util.List;

public class TestFindInSetFunction extends AbstractFunctionStringTestBase {

@Test
public void testFindInSetFunction() throws Exception {
String transformSql = null, data = null;
TransformConfig config = null;
TransformProcessor<String, String> processor = null;
List<String> output = null;

transformSql = "select FIND_IN_SET(string1,string2) from source";
config = new TransformConfig(transformSql);
processor = TransformProcessor
.create(config, SourceDecoderFactory.createCsvDecoder(csvSource),
SinkEncoderFactory.createKvEncoder(kvSink));
// case1: FIND_IN_SET('b','a,b,b,c,d')
data = "b|a,b,b,c,d|cloud|5|3|3";
output = processor.transform(data, new HashMap<>());
Assert.assertEquals(1, output.size());
Assert.assertEquals("result=2", output.get(0));

// case2: FIND_IN_SET('','a,,b,c,d');
data = "|a,,b,c,d|cloud|5|3|3";
output = processor.transform(data, new HashMap<>());
Assert.assertEquals(1, output.size());
Assert.assertEquals("result=2", output.get(0));

// case3: FIND_IN_SET(',','a,,b,c,d');
data = ",|a,,b,c,d|cloud|5|3|3";
output = processor.transform(data, new HashMap<>());
Assert.assertEquals(1, output.size());
Assert.assertEquals("result=0", output.get(0));

// case4: FIND_IN_SET('',''); This situation returns 0
data = "||cloud|5|3|3";
output = processor.transform(data, new HashMap<>());
Assert.assertEquals(1, output.size());
Assert.assertEquals("result=0", output.get(0));
}
}

0 comments on commit 762bfd1

Please sign in to comment.