Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[INLONG-11369][Sort] KV split has error when there is a escape char without before & and = in text #11370

Merged
merged 2 commits into from
Oct 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -100,11 +100,17 @@ public static List<Map<String, String>> splitKv(
*/
int kvState = STATE_KEY;

char lastCh = 0;
char nextCh = 0;
for (int i = 0; i < text.length(); ++i) {
char ch = text.charAt(i);
if ((i + 1) < text.length()) {
nextCh = text.charAt(i + 1);
} else {
nextCh = 0;
}
if (ch == kvDelimiter) {
switch (state) {
// match previous kv delimiter first when there are more than one kvDelimiter
case STATE_KEY:
key = stringBuilder.toString();
stringBuilder.setLength(0);
Expand All @@ -124,24 +130,19 @@ public static List<Map<String, String>> splitKv(
} else if (ch == entryDelimiter) {
switch (state) {
case STATE_KEY:
key = lastKey;
if (lastValue == null) {
value = ch + stringBuilder.toString();
} else {
value = lastValue + ch + stringBuilder.toString();
}
fields.put(key, value);
lastKey = key;
lastValue = value;
stringBuilder.setLength(0);
stringBuilder.append(ch);
break;
case STATE_VALUE:
value = stringBuilder.toString();
fields.put(key, value);
lastKey = key;
lastValue = value;
stringBuilder.setLength(0);
state = STATE_KEY;
if (nextCh == entryDelimiter) {
stringBuilder.append(ch);
} else {
value = stringBuilder.toString();
fields.put(key, value);
lastKey = key;
lastValue = value;
stringBuilder.setLength(0);
state = STATE_KEY;
}
break;
case STATE_ESCAPING:
stringBuilder.append(ch);
Expand All @@ -154,12 +155,6 @@ public static List<Map<String, String>> splitKv(
} else if (escapeChar != null && ch == escapeChar) {
switch (state) {
case STATE_KEY:
if (lastCh != 0) {
stringBuilder.append(lastCh);
}
kvState = state;
state = STATE_ESCAPING;
break;
case STATE_VALUE:
kvState = state;
state = STATE_ESCAPING;
Expand All @@ -175,12 +170,6 @@ public static List<Map<String, String>> splitKv(
} else if (quoteChar != null && ch == quoteChar) {
switch (state) {
case STATE_KEY:
if (lastCh != 0) {
stringBuilder.append(lastCh);
}
kvState = state;
state = STATE_QUOTING;
break;
case STATE_VALUE:
kvState = state;
state = STATE_QUOTING;
Expand All @@ -196,20 +185,26 @@ public static List<Map<String, String>> splitKv(
} else if (lineDelimiter != null && ch == lineDelimiter) {
switch (state) {
case STATE_KEY:
String remainingKey = stringBuilder.toString();
key = lastKey;
stringBuilder.append(lastValue).append(lastCh);
stringBuilder.setLength(0);
stringBuilder.append(lastValue).append(entryDelimiter).append(remainingKey);
value = stringBuilder.toString();
fields.put(key, value);
Map<String, String> copyFields = new HashMap<>();
copyFields.putAll(fields);
lines.add(copyFields);
stringBuilder.setLength(0);
fields.clear();
lastKey = null;
lastValue = null;
stringBuilder.setLength(0);
break;
case STATE_VALUE:
lastKey = null;
lastValue = null;
value = stringBuilder.toString();
fields.put(key, value);
Map<String, String> copyFields = new HashMap<>();
copyFields = new HashMap<>();
copyFields.putAll(fields);
lines.add(copyFields);
stringBuilder.setLength(0);
Expand All @@ -226,14 +221,22 @@ public static List<Map<String, String>> splitKv(
}
} else {
stringBuilder.append(ch);
switch (state) {
case STATE_ESCAPING:
state = kvState;
}
}
lastCh = ch;
}

switch (state) {
case STATE_KEY:
if (lastKey != null && lastValue != null && text != null) {
fields.put(lastKey, lastValue + lastCh);
String remainingKey = stringBuilder.toString();
key = lastKey;
stringBuilder.setLength(0);
stringBuilder.append(lastValue).append(entryDelimiter).append(remainingKey);
value = stringBuilder.toString();
fields.put(key, value);
}
lines.add(fields);
return lines;
Expand All @@ -244,14 +247,19 @@ public static List<Map<String, String>> splitKv(
return lines;
case STATE_ESCAPING:
case STATE_QUOTING:
value = stringBuilder.toString();
String oldValue = fields.get(key);
if (value != null && !"".equals(value)
&& oldValue != null && !"".equals(oldValue)) {
fields.put(key, oldValue + value);
} else if (value != null && !"".equals(value)) {
fields.put(key, value);
switch (kvState) {
case STATE_VALUE:
value = stringBuilder.toString();
fields.put(key, value);
break;
case STATE_KEY:
if (lastKey != null) {
value = stringBuilder.toString();
String oldValue = fields.get(key);
fields.put(key, oldValue + entryDelimiter + value);
}
}

lines.add(fields);
return lines;
default:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,13 @@

import org.apache.inlong.sort.formats.util.StringUtils;

import org.junit.Assert;
import org.junit.Test;

import java.util.List;
import java.util.Map;

import static org.apache.inlong.sort.formats.util.StringUtils.splitKv;
import static org.junit.Assert.assertEquals;

public class StringUtilsTest {
Expand Down Expand Up @@ -55,17 +57,17 @@ public void testSplitKvString() {
'=', '\\', '\'', '\n');
assertEquals("=", map4.get(0).get("name"));
assertEquals("20&&", map4.get(0).get("age"));
assertEquals("=", map4.get(0).get("name1"));
assertEquals("20&&", map4.get(0).get("age1"));
assertEquals("=", map4.get(1).get("name1"));
assertEquals("20&&", map4.get(1).get("age1"));

String kvString5 = "name==&age=20&&\nname1==&age1=20&&&value=aaa&dddd&";
List<Map<String, String>> map5 = StringUtils.splitKv(kvString5, '&',
'=', '\\', '\'', '\n');
assertEquals("=", map5.get(0).get("name"));
assertEquals("20&&", map5.get(0).get("age"));
assertEquals("=", map5.get(0).get("name1"));
assertEquals("20&&", map5.get(0).get("age1"));
assertEquals("aaa&dddd&", map5.get(0).get("value"));
assertEquals("=", map5.get(1).get("name1"));
assertEquals("20&&", map5.get(1).get("age1"));
assertEquals("aaa&dddd&", map5.get(1).get("value"));

String kvString6 = "name==&age=20&&\\";
List<Map<String, String>> map6 = StringUtils.splitKv(kvString6, '&',
Expand Down Expand Up @@ -153,4 +155,13 @@ public void testSplitCsvStringWithMaxFields() {
assertEquals("home", csv1Array4[2][1]);
assertEquals("home", csv1Array4[2][2]);
}

@Test
public void testKvScapeCharSplit() {
String text = "k1=v1&\nk\\2=v2\\&&k3=v3";
Map<String, String> kvMap = splitKv(text, '&', '=', '\\', null);
Assert.assertTrue(kvMap != null && kvMap.size() == 3);
Assert.assertTrue(kvMap.get("k3") != null);
Assert.assertTrue(kvMap.get("\nk2") != null);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ public void testSplitDanglingKey1() {
public void testSplitDanglingKey2() {
Map<String, String> kvMap = splitKv("f1&f2=3", '&',
'=', null, null);
Assert.assertEquals("3", kvMap.get("f2"));
Assert.assertEquals("3", kvMap.get("f1&f2"));
}

@Test
Expand Down
Loading