diff --git a/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/SplitIndexFunction.java b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/SplitIndexFunction.java new file mode 100644 index 00000000000..d45706bee01 --- /dev/null +++ b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/SplitIndexFunction.java @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.sdk.transform.process.function; + +import org.apache.inlong.sdk.transform.decode.SourceData; +import org.apache.inlong.sdk.transform.process.Context; +import org.apache.inlong.sdk.transform.process.operator.OperatorTools; +import org.apache.inlong.sdk.transform.process.parser.ValueParser; + +import net.sf.jsqlparser.expression.Expression; +import net.sf.jsqlparser.expression.Function; + +import java.util.List; +/* + * SplitIndexFunction + * + * Description: + * Split_index(string1, string2, integer) -> string + * Splits string1 by delimiter string2 and returns the string at the given index integer(zero-based). + * - Returns null if the index is negative or any of the arguments is null. + * - Returns null if the index is out of bounds of the split strings. + * + */ +@TransformFunction(names = {"split_index", "splitindex"}) +public class SplitIndexFunction implements ValueParser { + + private final ValueParser strParser; + private final ValueParser delimiterParser; + private final ValueParser indexParser; + + public SplitIndexFunction(Function expr) { + List expressions = expr.getParameters().getExpressions(); + strParser = OperatorTools.buildParser(expressions.get(0)); + delimiterParser = OperatorTools.buildParser(expressions.get(1)); + indexParser = OperatorTools.buildParser(expressions.get(2)); + } + + @Override + public Object parse(SourceData sourceData, int rowIndex, Context context) { + Object strObject = strParser.parse(sourceData, rowIndex, context); + Object delimiterObject = delimiterParser.parse(sourceData, rowIndex, context); + Object indexObject = indexParser.parse(sourceData, rowIndex, context); + + if (strObject == null || delimiterObject == null || indexObject == null) { + return null; + } + + String str = OperatorTools.parseString(strObject); + String delimiter = OperatorTools.parseString(delimiterObject); + int index = OperatorTools.parseBigDecimal(indexObject).intValue(); + + if (str == null || delimiter == null || index < 0) { + return null; + } + + String[] splitStrings = str.split(delimiter); + if (index >= splitStrings.length) { + return null; + } + + return splitStrings[index]; + } +} \ No newline at end of file diff --git a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestSplitIndexFunction.java b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestSplitIndexFunction.java new file mode 100644 index 00000000000..1ad949aaa44 --- /dev/null +++ b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestSplitIndexFunction.java @@ -0,0 +1,103 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.sdk.transform.process.function.string; + +import org.apache.inlong.sdk.transform.decode.SourceDecoderFactory; +import org.apache.inlong.sdk.transform.encode.SinkEncoderFactory; +import org.apache.inlong.sdk.transform.pojo.TransformConfig; +import org.apache.inlong.sdk.transform.process.TransformProcessor; + +import org.junit.Assert; +import org.junit.Test; + +import java.util.HashMap; +import java.util.List; + +public class TestSplitIndexFunction extends AbstractFunctionStringTestBase { + + @Test + public void testSplitIndexFunction() throws Exception { + String transformSql = "select split_index(string1, string2, numeric1) from source"; + TransformConfig config = new TransformConfig(transformSql); + TransformProcessor processor = TransformProcessor + .create(config, SourceDecoderFactory.createCsvDecoder(csvSource), + SinkEncoderFactory.createKvEncoder(kvSink)); + + // case1: split_index('a,b,c', ',', 1) + String data = "a,b,c|,|cloud|1|3|3"; + List output = processor.transform(data, new HashMap<>()); + Assert.assertEquals(1, output.size()); + Assert.assertEquals("result=b", output.get(0)); + + // case2: split_index('a,b,c', ',', -1) + data = "a,b,c|,|cloud|-1|3|3"; + output = processor.transform(data, new HashMap<>()); + Assert.assertEquals(1, output.size()); + Assert.assertEquals("result=null", output.get(0)); + + // case3: split_index('a,b,c', ',', 3) + data = "a,b,c|,|cloud|3|3|3"; + output = processor.transform(data, new HashMap<>()); + Assert.assertEquals(1, output.size()); + Assert.assertEquals("result=null", output.get(0)); + + // case4: split_index(null, ',', 1) + transformSql = "select split_index(xxd, string2, numeric1) from source"; + config = new TransformConfig(transformSql); + processor = TransformProcessor + .create(config, SourceDecoderFactory.createCsvDecoder(csvSource), + SinkEncoderFactory.createKvEncoder(kvSink)); + data = "abc|,|cloud|1|3|3"; + output = processor.transform(data, new HashMap<>()); + Assert.assertEquals(1, output.size()); + Assert.assertEquals("result=null", output.get(0)); + + // case5: split_index('a,b,c', null, 1) + transformSql = "select split_index(string1, xxd, numeric1) from source"; + config = new TransformConfig(transformSql); + processor = TransformProcessor + .create(config, SourceDecoderFactory.createCsvDecoder(csvSource), + SinkEncoderFactory.createKvEncoder(kvSink)); + data = "a,b,c|xxd|cloud|1|3|3"; + output = processor.transform(data, new HashMap<>()); + Assert.assertEquals(1, output.size()); + Assert.assertEquals("result=null", output.get(0)); + + // case6: split_index('a,b,c', ',', null) + transformSql = "select split_index(string1, string2, xxd) from source"; + config = new TransformConfig(transformSql); + processor = TransformProcessor + .create(config, SourceDecoderFactory.createCsvDecoder(csvSource), + SinkEncoderFactory.createKvEncoder(kvSink)); + data = "a,b,c|,|cloud|xxd|3|3"; + output = processor.transform(data, new HashMap<>()); + Assert.assertEquals(1, output.size()); + Assert.assertEquals("result=null", output.get(0)); + + // case7: split_index('', ',', 0) + transformSql = "select split_index(string1, string2, numeric1) from source"; + config = new TransformConfig(transformSql); + processor = TransformProcessor + .create(config, SourceDecoderFactory.createCsvDecoder(csvSource), + SinkEncoderFactory.createKvEncoder(kvSink)); + data = "|,|cloud|0|3|3"; + output = processor.transform(data, new HashMap<>()); + Assert.assertEquals(1, output.size()); + Assert.assertEquals("result=", output.get(0)); + } +} \ No newline at end of file