Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[INLONG-11028][SDK] Transform SQL support UrlEncode & UrlDecode functions #11031

Merged
merged 7 commits into from
Oct 11, 2024
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.inlong.sdk.transform.process.function.string;

import org.apache.inlong.sdk.transform.decode.SourceData;
import org.apache.inlong.sdk.transform.process.Context;
import org.apache.inlong.sdk.transform.process.function.TransformFunction;
import org.apache.inlong.sdk.transform.process.operator.OperatorTools;
import org.apache.inlong.sdk.transform.process.parser.ValueParser;

import net.sf.jsqlparser.expression.Function;

import java.net.URLDecoder;
import java.nio.charset.StandardCharsets;

/**
* UrlDecodeFunction
* description: Decodes a given string in ‘application/x-www-form-urlencoded’ format using the UTF-8 encoding scheme.
* If the input is NULL, or there is an issue with the decoding process(such as encountering an illegal escape pattern),
* or the encoding scheme is not supported, the function returns NULL.
*/
@TransformFunction(names = {"url_decode"})
public class UrlDecodeFunction implements ValueParser {

private final ValueParser stringParser;

public UrlDecodeFunction(Function expr) {
stringParser = OperatorTools.buildParser(expr.getParameters().getExpressions().get(0));
}

@Override
public Object parse(SourceData sourceData, int rowIndex, Context context) {
Object stringObj = stringParser.parse(sourceData, rowIndex, context);
if (stringObj == null) {
return null;
}
String string = OperatorTools.parseString(stringObj);
if (string == null) {
return null;
}

try {
return URLDecoder.decode(string, StandardCharsets.UTF_8.toString());
} catch (Exception e) {
return null;
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.inlong.sdk.transform.process.function.string;

import org.apache.inlong.sdk.transform.decode.SourceData;
import org.apache.inlong.sdk.transform.process.Context;
import org.apache.inlong.sdk.transform.process.function.TransformFunction;
import org.apache.inlong.sdk.transform.process.operator.OperatorTools;
import org.apache.inlong.sdk.transform.process.parser.ValueParser;

import net.sf.jsqlparser.expression.Function;

import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;

/**
* UrlEncodeFunction
* description: Translates a string into ‘application/x-www-form-urlencoded’ format using the UTF-8 encoding scheme.
* If the input is NULL, or there is an issue with the encoding process,
* or the encoding scheme is not supported, will return NULL.
*/
@TransformFunction(names = {"url_encode"})
public class UrlEncodeFunction implements ValueParser {

private final ValueParser stringParser;

public UrlEncodeFunction(Function expr) {
stringParser = OperatorTools.buildParser(expr.getParameters().getExpressions().get(0));
}

@Override
public Object parse(SourceData sourceData, int rowIndex, Context context) {
Object stringObj = stringParser.parse(sourceData, rowIndex, context);
if (stringObj == null) {
return null;
}

String string = OperatorTools.parseString(stringObj);
if (string == null) {
return null;
}

try {
return URLEncoder.encode(string, StandardCharsets.UTF_8.toString());
} catch (Exception e) {
return null;
}
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.inlong.sdk.transform.process.function.string;

import org.apache.inlong.sdk.transform.decode.SourceDecoderFactory;
import org.apache.inlong.sdk.transform.encode.SinkEncoderFactory;
import org.apache.inlong.sdk.transform.pojo.TransformConfig;
import org.apache.inlong.sdk.transform.process.TransformProcessor;

import org.junit.Assert;
import org.junit.Test;

import java.util.HashMap;
import java.util.List;

public class TestUrlDecodeFunction extends AbstractFunctionStringTestBase {

@Test
public void testUrlDecodeFunction() throws Exception {
String transformSql = "select url_decode(string1) from source";
TransformConfig config = new TransformConfig(transformSql);
TransformProcessor<String, String> processor = TransformProcessor
.create(config, SourceDecoderFactory.createCsvDecoder(csvSource),
SinkEncoderFactory.createKvEncoder(kvSink));

// case1: url_decode('https%3A%2F%2Fwww.google.com%2Fsearch%3Fq%3Djava+url+encode')
List<String> output1 = processor.transform(
"https%3A%2F%2Fwww.google.com%2Fsearch%3Fq%3Djava+url+encode|banana|cloud|1", new HashMap<>());
Assert.assertEquals(1, output1.size());
Assert.assertEquals(output1.get(0), "result=https://www.google.com/search?q=java url encode");

String transformSql2 = "select url_decode(stringX) from source";
TransformConfig config2 = new TransformConfig(transformSql2);
TransformProcessor<String, String> processor2 = TransformProcessor
.create(config2, SourceDecoderFactory.createCsvDecoder(csvSource),
SinkEncoderFactory.createKvEncoder(kvSink));
// case2: url_decode(null)
List<String> output2 = processor2.transform("|apple|banana|cloud|1", new HashMap<>());
Assert.assertEquals(1, output2.size());
Assert.assertEquals(output2.get(0), "result=");
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.inlong.sdk.transform.process.function.string;

import org.apache.inlong.sdk.transform.decode.SourceDecoderFactory;
import org.apache.inlong.sdk.transform.encode.SinkEncoderFactory;
import org.apache.inlong.sdk.transform.pojo.TransformConfig;
import org.apache.inlong.sdk.transform.process.TransformProcessor;

import org.junit.Assert;
import org.junit.Test;

import java.util.HashMap;
import java.util.List;

public class TestUrlEncodeFunction extends AbstractFunctionStringTestBase {

@Test
public void testUrlEncodeFunction() throws Exception {
String transformSql = "select url_encode(string1) from source";
TransformConfig config = new TransformConfig(transformSql);
TransformProcessor<String, String> processor = TransformProcessor
.create(config, SourceDecoderFactory.createCsvDecoder(csvSource),
SinkEncoderFactory.createKvEncoder(kvSink));

// case1: url_encode('https://www.google.com/search?q=java url encode')
List<String> output1 =
processor.transform("https://www.google.com/search?q=java url encode|banana|cloud|1", new HashMap<>());
Assert.assertEquals(1, output1.size());
Assert.assertEquals(output1.get(0), "result=https%3A%2F%2Fwww.google.com%2Fsearch%3Fq%3Djava+url+encode");

String transformSql2 = "select url_encode(stringX) from source";
TransformConfig config2 = new TransformConfig(transformSql2);
TransformProcessor<String, String> processor2 = TransformProcessor
.create(config2, SourceDecoderFactory.createCsvDecoder(csvSource),
SinkEncoderFactory.createKvEncoder(kvSink));
// case2: url_encode(null)
List<String> output2 = processor2.transform("apple|banana|cloud|1", new HashMap<>());
Assert.assertEquals(1, output2.size());
Assert.assertEquals(output2.get(0), "result=");
}
}
Loading