Skip to content

Commit

Permalink
[INLONG-11216][SDK] Transform support STR_TO_MAP() function (#11232)
Browse files Browse the repository at this point in the history
  • Loading branch information
emptyOVO authored Oct 8, 2024
1 parent 14538d1 commit dd37068
Show file tree
Hide file tree
Showing 2 changed files with 197 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.inlong.sdk.transform.process.function;

import org.apache.inlong.sdk.transform.decode.SourceData;
import org.apache.inlong.sdk.transform.process.Context;
import org.apache.inlong.sdk.transform.process.operator.OperatorTools;
import org.apache.inlong.sdk.transform.process.parser.ValueParser;

import net.sf.jsqlparser.expression.Expression;
import net.sf.jsqlparser.expression.Function;

import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
/**
* StrToMapFunction
* description: str_to_map(string1, string2, string3) - Returns a map after splitting the string1 into key/value pairs
* using delimiters. string2 is the pair delimiter, default is ‘,’. And string3 is the key-value delimiter,
* default is ‘=’. Both pair delimiter and key-value delimiter are treated as regular expressions.So special
* characters (e.g. <([{^-=$!|]})?*+.>) need to be properly escaped before using as a delimiter literally.
* for example: STR_TO_MAP('item1:10,item2:5,item3:2', ':', ',')--{'item1' -> 10, 'item2' -> 5, 'item3' -> 2}
*/
@TransformFunction(names = {"str_to_map"})
public class StrToMapFunction implements ValueParser {

private ValueParser inputParser;

private ValueParser pairDelimiterParser;

private ValueParser kvDelimiterParser;

public StrToMapFunction(Function expr) {
List<Expression> expressions = expr.getParameters().getExpressions();
if (!expressions.isEmpty()) {
inputParser = OperatorTools.buildParser(expressions.get(0));
if (expressions.size() >= 2) {
pairDelimiterParser = OperatorTools.buildParser(expressions.get(1));
if (expressions.size() >= 3) {
kvDelimiterParser = OperatorTools.buildParser(expressions.get(2));
}
}
}
}

@Override
public Object parse(SourceData sourceData, int rowIndex, Context context) {
Object inputStringObj = inputParser.parse(sourceData, rowIndex, context);
Object pairDelimiterStringObj = null;
String pairDelimiterString = null;
if (pairDelimiterParser != null) {
pairDelimiterStringObj = pairDelimiterParser.parse(sourceData, rowIndex, context);
pairDelimiterString = OperatorTools.parseString(pairDelimiterStringObj);
}
Object kvDelimiterStringObj = null;
String kvDelimiterString = null;
if (kvDelimiterParser != null) {
kvDelimiterStringObj = kvDelimiterParser.parse(sourceData, rowIndex, context);
kvDelimiterString = OperatorTools.parseString(kvDelimiterStringObj);
}
String inputString = OperatorTools.parseString(inputStringObj);

return getStringStringMap(pairDelimiterString, kvDelimiterString, inputString);
}

private Map<String, String> getStringStringMap(String pairDelimiterString, String kvDelimiterString,
String inputString) {
String pairDelimiter =
(pairDelimiterString == null || pairDelimiterString.isEmpty()) ? "," : escapeRegex(pairDelimiterString);
String keyValueDelimiter =
(kvDelimiterString == null || kvDelimiterString.isEmpty()) ? "=" : escapeRegex(kvDelimiterString);

Map<String, String> map = new LinkedHashMap<>();
String[] pairs = inputString.split(pairDelimiter);

for (String pair : pairs) {
if (pair.contains(keyValueDelimiter)) {
String[] keyValue = pair.split(keyValueDelimiter, 2);
map.put(keyValue[0], keyValue[1]);
}
}
return map;
}

private String escapeRegex(String delimiter) {
return delimiter.replaceAll("([\\\\^$|?*+\\[\\](){}])", "\\\\$1");
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.inlong.sdk.transform.process.function.string;

import org.apache.inlong.sdk.transform.decode.SourceDecoderFactory;
import org.apache.inlong.sdk.transform.encode.SinkEncoderFactory;
import org.apache.inlong.sdk.transform.pojo.TransformConfig;
import org.apache.inlong.sdk.transform.process.TransformProcessor;

import org.junit.Assert;
import org.junit.Test;

import java.util.HashMap;
import java.util.List;

public class TestStrToMapFunction extends AbstractFunctionStringTestBase {

@Test
public void testStrToMapFunction() throws Exception {
String transformSql = null, data = null;
TransformConfig config = null;
TransformProcessor<String, String> processor = null;
List<String> output = null;

transformSql = "select str_to_map(string1) from source";
config = new TransformConfig(transformSql);
processor = TransformProcessor
.create(config, SourceDecoderFactory.createCsvDecoder(csvSource),
SinkEncoderFactory.createKvEncoder(kvSink));

// case1: str_to_map('key1=value1,key2=value2,key3=value3')
data = "key1=value1,key2=value2,key3=value3|xxd|cloud|7|3|3";
output = processor.transform(data, new HashMap<>());
Assert.assertEquals(1, output.size());
Assert.assertEquals("result={key1=value1, key2=value2, key3=value3}", output.get(0));

// case2: str_to_map('key1=,key2=,key3=')
data = "key1=,key2=,key3=|xxd|cloud|7|3|3";
output = processor.transform(data, new HashMap<>());
Assert.assertEquals(1, output.size());
Assert.assertEquals("result={key1=, key2=, key3=}", output.get(0));

// case3: str_to_map('key1value1,key2value2,key3value3')
data = "key1value1,key2value2,key3value3|xxd|cloud|7|3|3";
output = processor.transform(data, new HashMap<>());
Assert.assertEquals(1, output.size());
Assert.assertEquals("result={}", output.get(0));

// case4: str_to_map("")
data = "|xxd|cloud|7|3|3";
output = processor.transform(data, new HashMap<>());
Assert.assertEquals(1, output.size());
Assert.assertEquals("result={}", output.get(0));

transformSql = "select str_to_map(string1,string2,string3) from source";
config = new TransformConfig(transformSql);
processor = TransformProcessor
.create(config, SourceDecoderFactory.createCsvDecoder(csvSource),
SinkEncoderFactory.createKvEncoder(kvSink));

// case5: str_to_map("item1:10,item2:5,item3" , "," , ":")
data = "item1:10,item2:5,item3:2|,|:|7|3|3";
output = processor.transform(data, new HashMap<>());
Assert.assertEquals(1, output.size());
Assert.assertEquals("result={item1=10, item2=5, item3=2}", output.get(0));

// case6: str_to_map("name->John!age->30!city->China" , "!" , "->")
data = "name->John!age->30!city->China|!|->|7|3|3";
output = processor.transform(data, new HashMap<>());
Assert.assertEquals(1, output.size());
Assert.assertEquals("result={name=John, age=30, city=China}", output.get(0));

// case7: str_to_map("name->John!age->30!city->China" , null , "->")
data = "name->John,age->30,city->China||->|7|3|3";
output = processor.transform(data, new HashMap<>());
Assert.assertEquals(1, output.size());
Assert.assertEquals("result={name=John, age=30, city=China}", output.get(0));
}
}

0 comments on commit dd37068

Please sign in to comment.