From 49178720848472273aef73d0da8fc41eb14413b1 Mon Sep 17 00:00:00 2001 From: emptyOVO <118812562+emptyOVO@users.noreply.github.com> Date: Tue, 8 Oct 2024 13:06:26 +0800 Subject: [PATCH] [INLONG-11216][SDK] Transform support STR_TO_MAP() function (#11232) --- .../process/function/StrToMapFunction.java | 103 ++++++++++++++++++ .../function/string/TestStrToMapFunction.java | 94 ++++++++++++++++ 2 files changed, 197 insertions(+) create mode 100644 inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/StrToMapFunction.java create mode 100644 inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestStrToMapFunction.java diff --git a/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/StrToMapFunction.java b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/StrToMapFunction.java new file mode 100644 index 0000000000..06c4613380 --- /dev/null +++ b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/StrToMapFunction.java @@ -0,0 +1,103 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.sdk.transform.process.function; + +import org.apache.inlong.sdk.transform.decode.SourceData; +import org.apache.inlong.sdk.transform.process.Context; +import org.apache.inlong.sdk.transform.process.operator.OperatorTools; +import org.apache.inlong.sdk.transform.process.parser.ValueParser; + +import net.sf.jsqlparser.expression.Expression; +import net.sf.jsqlparser.expression.Function; + +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +/** + * StrToMapFunction + * description: str_to_map(string1, string2, string3) - Returns a map after splitting the string1 into key/value pairs + * using delimiters. string2 is the pair delimiter, default is ‘,’. And string3 is the key-value delimiter, + * default is ‘=’. Both pair delimiter and key-value delimiter are treated as regular expressions.So special + * characters (e.g. <([{^-=$!|]})?*+.>) need to be properly escaped before using as a delimiter literally. + * for example: STR_TO_MAP('item1:10,item2:5,item3:2', ':', ',')--{'item1' -> 10, 'item2' -> 5, 'item3' -> 2} + */ +@TransformFunction(names = {"str_to_map"}) +public class StrToMapFunction implements ValueParser { + + private ValueParser inputParser; + + private ValueParser pairDelimiterParser; + + private ValueParser kvDelimiterParser; + + public StrToMapFunction(Function expr) { + List expressions = expr.getParameters().getExpressions(); + if (!expressions.isEmpty()) { + inputParser = OperatorTools.buildParser(expressions.get(0)); + if (expressions.size() >= 2) { + pairDelimiterParser = OperatorTools.buildParser(expressions.get(1)); + if (expressions.size() >= 3) { + kvDelimiterParser = OperatorTools.buildParser(expressions.get(2)); + } + } + } + } + + @Override + public Object parse(SourceData sourceData, int rowIndex, Context context) { + Object inputStringObj = inputParser.parse(sourceData, rowIndex, context); + Object pairDelimiterStringObj = null; + String pairDelimiterString = null; + if (pairDelimiterParser != null) { + pairDelimiterStringObj = pairDelimiterParser.parse(sourceData, rowIndex, context); + pairDelimiterString = OperatorTools.parseString(pairDelimiterStringObj); + } + Object kvDelimiterStringObj = null; + String kvDelimiterString = null; + if (kvDelimiterParser != null) { + kvDelimiterStringObj = kvDelimiterParser.parse(sourceData, rowIndex, context); + kvDelimiterString = OperatorTools.parseString(kvDelimiterStringObj); + } + String inputString = OperatorTools.parseString(inputStringObj); + + return getStringStringMap(pairDelimiterString, kvDelimiterString, inputString); + } + + private Map getStringStringMap(String pairDelimiterString, String kvDelimiterString, + String inputString) { + String pairDelimiter = + (pairDelimiterString == null || pairDelimiterString.isEmpty()) ? "," : escapeRegex(pairDelimiterString); + String keyValueDelimiter = + (kvDelimiterString == null || kvDelimiterString.isEmpty()) ? "=" : escapeRegex(kvDelimiterString); + + Map map = new LinkedHashMap<>(); + String[] pairs = inputString.split(pairDelimiter); + + for (String pair : pairs) { + if (pair.contains(keyValueDelimiter)) { + String[] keyValue = pair.split(keyValueDelimiter, 2); + map.put(keyValue[0], keyValue[1]); + } + } + return map; + } + + private String escapeRegex(String delimiter) { + return delimiter.replaceAll("([\\\\^$|?*+\\[\\](){}])", "\\\\$1"); + } +} diff --git a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestStrToMapFunction.java b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestStrToMapFunction.java new file mode 100644 index 0000000000..627c2755ea --- /dev/null +++ b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestStrToMapFunction.java @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.sdk.transform.process.function.string; + +import org.apache.inlong.sdk.transform.decode.SourceDecoderFactory; +import org.apache.inlong.sdk.transform.encode.SinkEncoderFactory; +import org.apache.inlong.sdk.transform.pojo.TransformConfig; +import org.apache.inlong.sdk.transform.process.TransformProcessor; + +import org.junit.Assert; +import org.junit.Test; + +import java.util.HashMap; +import java.util.List; + +public class TestStrToMapFunction extends AbstractFunctionStringTestBase { + + @Test + public void testStrToMapFunction() throws Exception { + String transformSql = null, data = null; + TransformConfig config = null; + TransformProcessor processor = null; + List output = null; + + transformSql = "select str_to_map(string1) from source"; + config = new TransformConfig(transformSql); + processor = TransformProcessor + .create(config, SourceDecoderFactory.createCsvDecoder(csvSource), + SinkEncoderFactory.createKvEncoder(kvSink)); + + // case1: str_to_map('key1=value1,key2=value2,key3=value3') + data = "key1=value1,key2=value2,key3=value3|xxd|cloud|7|3|3"; + output = processor.transform(data, new HashMap<>()); + Assert.assertEquals(1, output.size()); + Assert.assertEquals("result={key1=value1, key2=value2, key3=value3}", output.get(0)); + + // case2: str_to_map('key1=,key2=,key3=') + data = "key1=,key2=,key3=|xxd|cloud|7|3|3"; + output = processor.transform(data, new HashMap<>()); + Assert.assertEquals(1, output.size()); + Assert.assertEquals("result={key1=, key2=, key3=}", output.get(0)); + + // case3: str_to_map('key1value1,key2value2,key3value3') + data = "key1value1,key2value2,key3value3|xxd|cloud|7|3|3"; + output = processor.transform(data, new HashMap<>()); + Assert.assertEquals(1, output.size()); + Assert.assertEquals("result={}", output.get(0)); + + // case4: str_to_map("") + data = "|xxd|cloud|7|3|3"; + output = processor.transform(data, new HashMap<>()); + Assert.assertEquals(1, output.size()); + Assert.assertEquals("result={}", output.get(0)); + + transformSql = "select str_to_map(string1,string2,string3) from source"; + config = new TransformConfig(transformSql); + processor = TransformProcessor + .create(config, SourceDecoderFactory.createCsvDecoder(csvSource), + SinkEncoderFactory.createKvEncoder(kvSink)); + + // case5: str_to_map("item1:10,item2:5,item3" , "," , ":") + data = "item1:10,item2:5,item3:2|,|:|7|3|3"; + output = processor.transform(data, new HashMap<>()); + Assert.assertEquals(1, output.size()); + Assert.assertEquals("result={item1=10, item2=5, item3=2}", output.get(0)); + + // case6: str_to_map("name->John!age->30!city->China" , "!" , "->") + data = "name->John!age->30!city->China|!|->|7|3|3"; + output = processor.transform(data, new HashMap<>()); + Assert.assertEquals(1, output.size()); + Assert.assertEquals("result={name=John, age=30, city=China}", output.get(0)); + + // case7: str_to_map("name->John!age->30!city->China" , null , "->") + data = "name->John,age->30,city->China||->|7|3|3"; + output = processor.transform(data, new HashMap<>()); + Assert.assertEquals(1, output.size()); + Assert.assertEquals("result={name=John, age=30, city=China}", output.get(0)); + } +}