Skip to content

Commit

Permalink
[INLONG-11217][SDK] Transform support JSON_QUOTE() and JSON_UNQUOTE()…
Browse files Browse the repository at this point in the history
… function (#11244)
  • Loading branch information
emptyOVO authored Oct 8, 2024
1 parent 18b9905 commit 9021b3c
Show file tree
Hide file tree
Showing 4 changed files with 402 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.inlong.sdk.transform.process.function;

import org.apache.inlong.sdk.transform.decode.SourceData;
import org.apache.inlong.sdk.transform.process.Context;
import org.apache.inlong.sdk.transform.process.operator.OperatorTools;
import org.apache.inlong.sdk.transform.process.parser.ValueParser;

import com.alibaba.fastjson.JSON;
import net.sf.jsqlparser.expression.Function;
/**
* JsonQuoteFunction
* description: JSON_QUOTE(string)--Quotes a string as a JSON value by wrapping it with double quote characters,
* escaping interior quote and special characters (’"’, ‘', ‘/’, ‘b’, ‘f’, ’n’, ‘r’, ’t’), and returning
* the result as a string. If the argument is NULL, the function returns NULL.
*
* JSON_STRING(string)--Serializes a value into JSON. returns a JSON string containing the serialized value.
* If the value is NULL, the function returns NULL.
*
* for example: json_quote('Hello, World!')--return "Hello, World!"
* json_quote('Complex string with / and \\')--return "Complex string with / and \\"
*
* json_string(1)--return 1
* json_string(true)--return "true"
*/
@TransformFunction(names = {"json_quote", "json_string"})
public class JsonQuoteFunction implements ValueParser {

private ValueParser jsonParser;

public JsonQuoteFunction(Function expr) {
this.jsonParser = OperatorTools.buildParser(expr.getParameters().getExpressions().get(0));
}

@Override
public Object parse(SourceData sourceData, int rowIndex, Context context) {
if (jsonParser == null) {
return null;
}
Object parse = jsonParser.parse(sourceData, rowIndex, context);
if (parse == null) {
return null;
}
return JSON.toJSONString(parse);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.inlong.sdk.transform.process.function;

import org.apache.inlong.sdk.transform.decode.SourceData;
import org.apache.inlong.sdk.transform.process.Context;
import org.apache.inlong.sdk.transform.process.operator.OperatorTools;
import org.apache.inlong.sdk.transform.process.parser.ValueParser;

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONException;
import net.sf.jsqlparser.expression.Function;
/**
* JsonUnQuoteFunction
* description: JSON_UNQUOTE(string)--Unquotes JSON value, unescapes escaped special characters (’"’, ‘', ‘/’, ‘b’,
* ‘f’, ’n’, ‘r’, ’t’, ‘u’ hex hex hex hex), and returns the result as a string. If the argument is NULL,
* returns NULL. If the value does not start and end with double quotes or if it starts and ends with double
* quotes but is not a valid JSON string literal, the value is passed through unmodified.
* for example: json_unquote('Hello, World!')--return "Hello, World!"
* json_unquote('Complex string with / and \\')--return "Complex string with / and \\"
*/
@TransformFunction(names = {"json_unquote"})
public class JsonUnQuoteFunction implements ValueParser {

private ValueParser jsonParser;

public JsonUnQuoteFunction(Function expr) {
this.jsonParser = OperatorTools.buildParser(expr.getParameters().getExpressions().get(0));
}

@Override
public Object parse(SourceData sourceData, int rowIndex, Context context) {
if (jsonParser == null) {
return null;
}
String jsonString = OperatorTools.parseString(jsonParser.parse(sourceData, rowIndex, context));
if (jsonString == null) {
return null;
}
if (jsonString.length() < 2 || jsonString.charAt(0) != '"'
|| jsonString.charAt(jsonString.length() - 1) != '"') {
return jsonString;
}
try {
return JSON.parseObject(jsonString, String.class);
} catch (JSONException e) {
return jsonString;
}
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.inlong.sdk.transform.process.function.string;

import org.apache.inlong.sdk.transform.decode.SourceDecoderFactory;
import org.apache.inlong.sdk.transform.encode.SinkEncoderFactory;
import org.apache.inlong.sdk.transform.pojo.TransformConfig;
import org.apache.inlong.sdk.transform.process.TransformProcessor;

import org.junit.Assert;
import org.junit.Test;

import java.util.HashMap;
import java.util.List;

public class TestJsonQuoteFunction extends AbstractFunctionStringTestBase {

@Test
public void testJsonQuoteFunction() throws Exception {
String transformSql = null, data = null;
TransformConfig config = null;
TransformProcessor<String, String> processor = null;
List<String> output = null;

transformSql = "select json_quote(string1) from source";
config = new TransformConfig(transformSql);
processor = TransformProcessor
.create(config, SourceDecoderFactory.createCsvDecoder(csvSource),
SinkEncoderFactory.createKvEncoder(kvSink));
// case1: json_quote('Hello, World!')
data = "Hello, World!|xxd|cloud|7|3|3";
output = processor.transform(data, new HashMap<>());
Assert.assertEquals(1, output.size());
Assert.assertEquals("result=\"Hello, World!\"", output.get(0));

// case2: json_quote('This is a "quoted" string')
data = "This is a \"quoted\" string|xxd|cloud|7|3|3";
output = processor.transform(data, new HashMap<>());
Assert.assertEquals(1, output.size());
Assert.assertEquals("result=\"This is a quoted string\"", output.get(0));

// case3: json_quote('A back\slash:')
data = "A back\\slash:|xxd|cloud|7|3|3";
output = processor.transform(data, new HashMap<>());
Assert.assertEquals(1, output.size());
Assert.assertEquals("result=\"A backslash:\"", output.get(0));

// case4: json_quote('Column1\tColumn2)
data = "Column1\tColumn2|xxd|cloud|7|3|3";
output = processor.transform(data, new HashMap<>());
Assert.assertEquals(1, output.size());
Assert.assertEquals("result=\"Column1\\tColumn2\"", output.get(0));

// case5: json_quote('Quotes ' and double quotes \"')
data = "Quotes ' and double quotes \\\"|xxd|cloud|7|3|3";
output = processor.transform(data, new HashMap<>());
Assert.assertEquals(1, output.size());
Assert.assertEquals("result=\"Quotes ' and double quotes \\\"\"", output.get(0));

// case6: json_quote(null)
data = "|xxd|cloud|7|3|3";
output = processor.transform(data, new HashMap<>());
Assert.assertEquals(1, output.size());
Assert.assertEquals("result=\"\"", output.get(0));

// case7: json_quote('Complex string with / and \\')
data = "Complex string with / and \\\\";
output = processor.transform(data, new HashMap<>());
Assert.assertEquals(1, output.size());
Assert.assertEquals("result=\"Complex string with / and \\\\\"", output.get(0));

// case8: json_quote('Unicode test: ሴ噸')
data = "Unicode test: ሴ噸|xxd|cloud|7|3|3";
output = processor.transform(data, new HashMap<>());
Assert.assertEquals(1, output.size());
Assert.assertEquals("result=\"Unicode test: ሴ噸\"", output.get(0));

transformSql = "select json_quote(xxd) from source";
config = new TransformConfig(transformSql);
processor = TransformProcessor
.create(config, SourceDecoderFactory.createCsvDecoder(csvSource),
SinkEncoderFactory.createKvEncoder(kvSink));
// case9: json_quote()
data = "|xxd|cloud|7|3|3";
output = processor.transform(data, new HashMap<>());
Assert.assertEquals(1, output.size());
Assert.assertEquals("result=", output.get(0));
}

@Test
public void testJsonStringFunction() throws Exception {
String transformSql = null, data = null;
TransformConfig config = null;
TransformProcessor<String, String> processor = null;
List<String> output = null;

transformSql = "select json_string(string1) from source";
config = new TransformConfig(transformSql);
processor = TransformProcessor
.create(config, SourceDecoderFactory.createCsvDecoder(csvSource),
SinkEncoderFactory.createKvEncoder(kvSink));
// case1: json_string('true')
data = "true|xxd|cloud|7|3|3";
output = processor.transform(data, new HashMap<>());
Assert.assertEquals(1, output.size());
Assert.assertEquals("result=\"true\"", output.get(0));

// case2: json_string('This is a "quoted" string')
data = "This is a \"quoted\" string|xxd|cloud|7|3|3";
output = processor.transform(data, new HashMap<>());
Assert.assertEquals(1, output.size());
Assert.assertEquals("result=\"This is a quoted string\"", output.get(0));

// case3: json_string('A back\slash:')
data = "A back\\slash:|xxd|cloud|7|3|3";
output = processor.transform(data, new HashMap<>());
Assert.assertEquals(1, output.size());
Assert.assertEquals("result=\"A backslash:\"", output.get(0));

// case4: json_string('Column1\tColumn2)
data = "Column1\tColumn2|xxd|cloud|7|3|3";
output = processor.transform(data, new HashMap<>());
Assert.assertEquals(1, output.size());
Assert.assertEquals("result=\"Column1\\tColumn2\"", output.get(0));

// case5: json_string('Quotes ' and double quotes \"')
data = "Quotes ' and double quotes \\\"|xxd|cloud|7|3|3";
output = processor.transform(data, new HashMap<>());
Assert.assertEquals(1, output.size());
Assert.assertEquals("result=\"Quotes ' and double quotes \\\"\"", output.get(0));

// case6: json_string(null)
data = "|xxd|cloud|7|3|3";
output = processor.transform(data, new HashMap<>());
Assert.assertEquals(1, output.size());
Assert.assertEquals("result=\"\"", output.get(0));

// case7: json_string('Complex string with / and \\')
data = "Complex string with / and \\\\";
output = processor.transform(data, new HashMap<>());
Assert.assertEquals(1, output.size());
Assert.assertEquals("result=\"Complex string with / and \\\\\"", output.get(0));

// case8: json_string('Unicode test: ሴ噸')
data = "Unicode test: ሴ噸|xxd|cloud|7|3|3";
output = processor.transform(data, new HashMap<>());
Assert.assertEquals(1, output.size());
Assert.assertEquals("result=\"Unicode test: ሴ噸\"", output.get(0));

transformSql = "select json_quote(xxd) from source";
config = new TransformConfig(transformSql);
processor = TransformProcessor
.create(config, SourceDecoderFactory.createCsvDecoder(csvSource),
SinkEncoderFactory.createKvEncoder(kvSink));
// case9: json_string()
data = "|xxd|cloud|7|3|3";
output = processor.transform(data, new HashMap<>());
Assert.assertEquals(1, output.size());
Assert.assertEquals("result=", output.get(0));
}
}
Loading

0 comments on commit 9021b3c

Please sign in to comment.