From 3e7f81d41c0e1ab8294d93b374e54515d0b7936b Mon Sep 17 00:00:00 2001 From: Xincheng Huang <60057611+ying-hua@users.noreply.github.com> Date: Tue, 3 Sep 2024 10:25:52 +0800 Subject: [PATCH] [INLONG-10971][SDK] Transform support INSERT function (#10972) --- .../process/function/InsertFunction.java | 101 ++++++++++++++++++ ...TestTransformStringFunctionsProcessor.java | 34 ++++++ 2 files changed, 135 insertions(+) create mode 100644 inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/InsertFunction.java diff --git a/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/InsertFunction.java b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/InsertFunction.java new file mode 100644 index 00000000000..3473bc9d793 --- /dev/null +++ b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/InsertFunction.java @@ -0,0 +1,101 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.sdk.transform.process.function; + +import org.apache.inlong.sdk.transform.decode.SourceData; +import org.apache.inlong.sdk.transform.process.Context; +import org.apache.inlong.sdk.transform.process.operator.OperatorTools; +import org.apache.inlong.sdk.transform.process.parser.ValueParser; + +import net.sf.jsqlparser.expression.Expression; +import net.sf.jsqlparser.expression.Function; + +import java.util.List; +/** + * InsertFunction + * + * Description: + * Returns a string where a specified substring is replaced by another string, starting at a given position and for a specified length. + * If the position is out of the string's bounds, the original string is returned. + * If the length exceeds the remaining length of the string from the given position, the replacement continues to the end of the string. + * If any argument is null, the function returns null. + * + * Arguments: + * - str: The original string. + * - pos: The position to start the replacement (1-based index). + * - len: The number of characters to replace. + * - newstr: The string to insert. + * + * Examples: + * - INSERT('12345678', 3, 4, 'word') = '12word78' + * - INSERT('12345678', -1, 4, 'word') = '12345678' + * - INSERT('12345678', 3, 100, 'word') = '12word' + */ +@TransformFunction(names = {"insert"}) +public class InsertFunction implements ValueParser { + + private ValueParser strParser; + private ValueParser posParser; + private ValueParser lenParser; + private ValueParser newStrParser; + + public InsertFunction(Function expr) { + List expressions = expr.getParameters().getExpressions(); + strParser = OperatorTools.buildParser(expressions.get(0)); + posParser = OperatorTools.buildParser(expressions.get(1)); + lenParser = OperatorTools.buildParser(expressions.get(2)); + newStrParser = OperatorTools.buildParser(expressions.get(3)); + } + + @Override + public Object parse(SourceData sourceData, int rowIndex, Context context) { + Object strObject = strParser.parse(sourceData, rowIndex, context); + Object posObject = posParser.parse(sourceData, rowIndex, context); + Object lenObject = lenParser.parse(sourceData, rowIndex, context); + Object newStrObject = newStrParser.parse(sourceData, rowIndex, context); + + if (strObject == null || posObject == null || lenObject == null || newStrObject == null) { + return null; + } + + String str = OperatorTools.parseString(strObject); + int pos = OperatorTools.parseBigDecimal(posObject).intValue(); + int len = OperatorTools.parseBigDecimal(lenObject).intValue(); + String newStr = OperatorTools.parseString(newStrObject); + + if (str == null || newStr == null) { + return null; + } + + if (pos < 1 || pos > str.length()) { + return str; + } + + int startIndex = pos - 1; + int endIndex = Math.min(startIndex + len, str.length()); + + StringBuilder result = new StringBuilder(); + result.append(str, 0, startIndex); + result.append(newStr); + if (endIndex < str.length()) { + result.append(str, endIndex, str.length()); + } + + return result.toString(); + } +} \ No newline at end of file diff --git a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/TestTransformStringFunctionsProcessor.java b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/TestTransformStringFunctionsProcessor.java index 14511946070..a3099d09f2a 100644 --- a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/TestTransformStringFunctionsProcessor.java +++ b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/TestTransformStringFunctionsProcessor.java @@ -690,4 +690,38 @@ public void testTranslateFunction() throws Exception { Assert.assertEquals(output3.get(0), "result=Apache Inlong"); } + @Test + public void testInsertFunction() throws Exception { + String transformSql1 = "select insert(string1, numeric1, numeric2, string2) from source"; + TransformConfig config1 = new TransformConfig(transformSql1); + TransformProcessor processor1 = TransformProcessor + .create(config1, SourceDecoderFactory.createCsvDecoder(csvSource), + SinkEncoderFactory.createKvEncoder(kvSink)); + + // case1: insert('12345678', 3, 4, 'word') -> '12word78' + List output1 = processor1.transform("12345678|word|cloud|3|4|0", new HashMap<>()); + Assert.assertEquals(1, output1.size()); + Assert.assertEquals("result=12word78", output1.get(0)); + + // case2: insert('12345678', -1, 4, 'word') -> '12345678' + List output2 = processor1.transform("12345678|word|cloud|-1|4|0", new HashMap<>()); + Assert.assertEquals(1, output2.size()); + Assert.assertEquals("result=12345678", output2.get(0)); + + // case3: insert('12345678', 3, 100, 'word') -> '12word' + List output3 = processor1.transform("12345678|word|cloud|3|100|0", new HashMap<>()); + Assert.assertEquals(1, output3.size()); + Assert.assertEquals("result=12word", output3.get(0)); + + // case4: insert('', 3, 4, 'word') -> '' + List output4 = processor1.transform("|word|cloud|3|4|0", new HashMap<>()); + Assert.assertEquals(1, output4.size()); + Assert.assertEquals("result=", output4.get(0)); + + // case5: insert('12345678', 3, 4, '') -> '1278' + List output5 = processor1.transform("12345678||cloud|3|4|0", new HashMap<>()); + Assert.assertEquals(1, output5.size()); + Assert.assertEquals("result=1278", output5.get(0)); + } + }