Skip to content

Commit

Permalink
Improve CharSequences#split to support trim and min copies (#1335)
Browse files Browse the repository at this point in the history
* Improve CharSequences#split to support trim and min copies
  • Loading branch information
tkountis authored Feb 4, 2021
1 parent 58baa0a commit 4fd7559
Show file tree
Hide file tree
Showing 6 changed files with 331 additions and 114 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -206,39 +206,95 @@ public static int caseInsensitiveHashCode(CharSequence seq) {
}

/**
* Split a given {@link CharSequence} to separate ones on the given {@code delimiter}.
* The returned {@link CharSequence}s are created by invoking the {@link CharSequence#subSequence(int, int)} method
* on the main one.
* Split a given {@link #newAsciiString(Buffer) AsciiString} to separate ones on the given {@code delimiter}.
*
* Trimming white-space before and after each token can be controlled by the {@code trim} flag
* This method has no support for regex.
*
* @param input The initial {@link CharSequence} to split, this experiences no side effects
* @param delimiter The delimiter character
* @param input The initial {@link CharSequence} to split, this experiences no side effects.
* @param delimiter The delimiter character.
* @param trim Flag to control whether the individual items must be trimmed.
* @return a {@link List} of {@link CharSequence} subsequences of the input with the separated values
*/
public static List<CharSequence> split(final CharSequence input, final char delimiter) {
public static List<CharSequence> split(final CharSequence input, final char delimiter, final boolean trim) {
if (input.length() == 0) {
return emptyList();
}

return trim ? splitWithTrim(input, isAsciiString(input), delimiter) :
split0(input, isAsciiString(input), delimiter);
}

private static List<CharSequence> split0(final CharSequence input, final boolean isAscii, final char delimiter) {
int startIndex = 0;
List<CharSequence> result = new ArrayList<>();

List<CharSequence> result = new ArrayList<>(4);

for (int i = 0; i < input.length(); i++) {
if (input.charAt(i) == delimiter) {
if ((i - startIndex) > 0) {
result.add(input.subSequence(startIndex, i));
}
char c = input.charAt(i);

if (c == delimiter) {
result.add(subsequence(isAscii, input, startIndex, i));
startIndex = i + 1;
}
}

if ((input.length() - startIndex) > 0) {
result.add(input.subSequence(startIndex, input.length()));
result.add(subsequence(isAscii, input, startIndex, input.length()));
} else {
result.add(isAscii ? EMPTY_ASCII_BUFFER : "");
}

return result;
}

private static List<CharSequence> splitWithTrim(final CharSequence input, final boolean isAscii,
final char delimiter) {
int startIndex = -1;
int endIndex = -1;
boolean reset = true;

List<CharSequence> result = new ArrayList<>(4);

for (int i = 0; i < input.length(); i++) {
char c = input.charAt(i);
if (c != ' ' && c != delimiter) {
endIndex = i + 1;
}

if (reset && c != ' ' && c != delimiter) {
startIndex = i;
reset = false;
} else if (c == delimiter) {
if (endIndex > startIndex) {
result.add(subsequence(isAscii, input, startIndex, endIndex));
} else {
result.add(isAscii ? EMPTY_ASCII_BUFFER : "");
}

startIndex = i + 1;
endIndex = i + 1;
reset = true;
}
}

if (startIndex != -1) {
if ((input.length() - startIndex) > 0) {
result.add(subsequence(isAscii, input, startIndex, endIndex));
} else {
result.add(isAscii ? EMPTY_ASCII_BUFFER : "");
}
}

return result;
}

private static CharSequence subsequence(final boolean isAscii, final CharSequence input,
final int start, final int end) {
return isAscii ? newAsciiString(((AsciiBuffer) input).unwrap().copy(start, end - start)) :
input.subSequence(start, end);
}

private static boolean equalsIgnoreCase(final char a, final char b) {
return a == b || toLowerCase(a) == toLowerCase(b);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
/*
* Copyright © 2021 Apple Inc. and the ServiceTalk project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.servicetalk.buffer.api;

import org.junit.Test;

import java.util.function.Function;

import static io.servicetalk.buffer.api.CharSequences.split;
import static java.util.function.Function.identity;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.contains;

public class CharSequencesTest {

// Common strings
public static final String GZIP = "gzip";
public static final String DEFLATE = "deflate";
public static final String COMPRESS = "compress";

private static void splitNoTrim(Function<String, ? extends CharSequence> f) {
assertThat(split(f.apply(" , "), ',', false),
contains(f.apply(" "), f.apply(" ")));
assertThat(split(f.apply(" , ,"), ',', false),
contains(f.apply(" "), f.apply(" "), f.apply("")));
assertThat(split(f.apply(" gzip , deflate "), ',', false),
contains(f.apply(" gzip "), f.apply(" deflate ")));
assertThat(split(f.apply(" gzip , deflate ,"), ',', false),
contains(f.apply(" gzip "), f.apply(" deflate "), f.apply("")));
assertThat(split(f.apply("gzip, deflate"), ',', false),
contains(f.apply(GZIP), f.apply(" deflate")));
assertThat(split(f.apply("gzip , deflate"), ',', false),
contains(f.apply("gzip "), f.apply(" deflate")));
assertThat(split(f.apply("gzip , deflate"), ',', false),
contains(f.apply("gzip "), f.apply(" deflate")));
assertThat(split(f.apply(" gzip, deflate"), ',', false),
contains(f.apply(" gzip"), f.apply(" deflate")));
assertThat(split(f.apply(GZIP), ',', false),
contains(f.apply(GZIP)));
assertThat(split(f.apply("gzip,"), ',', false),
contains(f.apply(GZIP), f.apply("")));
assertThat(split(f.apply("gzip,deflate,compress"), ',', false),
contains(f.apply(GZIP), f.apply(DEFLATE), f.apply(COMPRESS)));
assertThat(split(f.apply("gzip,,compress"), ',', false),
contains(f.apply(GZIP), f.apply(""), f.apply(COMPRESS)));
assertThat(split(f.apply("gzip, ,compress"), ',', false),
contains(f.apply(GZIP), f.apply(" "), f.apply(COMPRESS)));
assertThat(split(f.apply("gzip , , compress"), ',', false),
contains(f.apply("gzip "), f.apply(" "), f.apply(" compress")));
assertThat(split(f.apply("gzip , white space word , compress"), ',', false),
contains(f.apply("gzip "), f.apply(" white space word "), f.apply(" compress")));
assertThat(split(f.apply("gzip compress"), ' ', false),
contains(f.apply(GZIP), f.apply(COMPRESS)));
assertThat(split(f.apply("gzip compress"), ' ', false),
contains(f.apply(GZIP), f.apply(""), f.apply(""), f.apply(""), f.apply(""),
f.apply(COMPRESS)));
assertThat(split(f.apply(" gzip compress "), ' ', false),
contains(f.apply(""), f.apply(GZIP), f.apply(""), f.apply(""), f.apply(""),
f.apply(""), f.apply(COMPRESS), f.apply("")));
assertThat(split(f.apply("gzip,,,,,compress"), ',', false),
contains(f.apply(GZIP), f.apply(""), f.apply(""), f.apply(""), f.apply(""),
f.apply(COMPRESS)));
assertThat(split(f.apply(",gzip,,,,,compress,"), ',', false),
contains(f.apply(""), f.apply(GZIP), f.apply(""), f.apply(""), f.apply(""),
f.apply(""), f.apply(COMPRESS), f.apply("")));
assertThat(split(f.apply(",,,,"), ',', false),
contains(f.apply(""), f.apply(""), f.apply(""), f.apply(""), f.apply("")));
assertThat(split(f.apply(" "), ' ', false),
contains(f.apply(""), f.apply(""), f.apply(""), f.apply(""), f.apply("")));
}

private static void splitWithTrim(Function<String, ? extends CharSequence> f) {
assertThat(split(f.apply(" , "), ',', true),
contains(f.apply(""), f.apply("")));
assertThat(split(f.apply(" , ,"), ',', true),
contains(f.apply(""), f.apply(""), f.apply("")));
assertThat(split(f.apply(" gzip , deflate "), ',', true),
contains(f.apply(GZIP), f.apply(DEFLATE)));
assertThat(split(f.apply(" gzip , deflate ,"), ',', true),
contains(f.apply(GZIP), f.apply(DEFLATE), f.apply("")));
assertThat(split(f.apply("gzip, deflate"), ',', true),
contains(f.apply(GZIP), f.apply(DEFLATE)));
assertThat(split(f.apply("gzip , deflate"), ',', true),
contains(f.apply(GZIP), f.apply(DEFLATE)));
assertThat(split(f.apply("gzip , deflate"), ',', true),
contains(f.apply(GZIP), f.apply(DEFLATE)));
assertThat(split(f.apply(" gzip, deflate"), ',', true),
contains(f.apply(GZIP), f.apply(DEFLATE)));
assertThat(split(f.apply(GZIP), ',', true),
contains(f.apply(GZIP)));
assertThat(split(f.apply("gzip,"), ',', true),
contains(f.apply(GZIP), f.apply("")));
assertThat(split(f.apply("gzip,deflate,compress"), ',', true),
contains(f.apply(GZIP), f.apply(DEFLATE), f.apply(COMPRESS)));
assertThat(split(f.apply("gzip,,compress"), ',', true),
contains(f.apply(GZIP), f.apply(""), f.apply(COMPRESS)));
assertThat(split(f.apply("gzip, ,compress"), ',', true),
contains(f.apply(GZIP), f.apply(""), f.apply(COMPRESS)));
assertThat(split(f.apply("gzip , , compress"), ',', true),
contains(f.apply(GZIP), f.apply(""), f.apply(COMPRESS)));
assertThat(split(f.apply("gzip , white space word , compress"), ',', true),
contains(f.apply(GZIP), f.apply("white space word"), f.apply(COMPRESS)));
assertThat(split(f.apply("gzip compress"), ' ', true),
contains(f.apply(GZIP), f.apply(COMPRESS)));
assertThat(split(f.apply("gzip compress"), ' ', true),
contains(f.apply(GZIP), f.apply(""), f.apply(""), f.apply(""), f.apply(""),
f.apply(COMPRESS)));
assertThat(split(f.apply(" gzip compress "), ' ', true),
contains(f.apply(""), f.apply(GZIP), f.apply(""), f.apply(""),
f.apply(""), f.apply(""), f.apply(COMPRESS), f.apply("")));
assertThat(split(f.apply("gzip,,,,,compress"), ',', true),
contains(f.apply(GZIP), f.apply(""), f.apply(""), f.apply(""), f.apply(""),
f.apply(COMPRESS)));
assertThat(split(f.apply(",gzip,,,,,compress,"), ',', true),
contains(f.apply(""), f.apply(GZIP), f.apply(""), f.apply(""), f.apply(""),
f.apply(""), f.apply(COMPRESS), f.apply("")));
assertThat(split(f.apply(",,,,"), ',', true),
contains(f.apply(""), f.apply(""), f.apply(""), f.apply(""), f.apply("")));
assertThat(split(f.apply(" "), ' ', true),
contains(f.apply(""), f.apply(""), f.apply(""), f.apply(""), f.apply("")));
}

@Test
public void splitStringNoTrim() {
splitNoTrim(identity());
}

@Test
public void splitStringWithTrim() {
splitWithTrim(identity());
}

@Test
public void splitAsciiNoTrim() {
splitNoTrim(CharSequences::newAsciiString);
}

@Test
public void splitAsciiWithTrim() {
splitWithTrim(CharSequences::newAsciiString);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import java.util.List;
import javax.annotation.Nullable;

import static io.servicetalk.buffer.api.CharSequences.split;
import static io.servicetalk.encoding.api.ContentCodings.identity;
import static java.util.Collections.singletonList;
import static java.util.Objects.requireNonNull;
Expand Down Expand Up @@ -104,7 +105,7 @@ private static List<ContentCodec> parseAcceptEncoding(@Nullable final CharSequen
}

List<ContentCodec> knownEncodings = new ArrayList<>();
List<CharSequence> acceptEncodingValues = CharSequences.split(acceptEncodingHeaderValue, ',');
List<CharSequence> acceptEncodingValues = split(acceptEncodingHeaderValue, ',', true);
for (CharSequence val : acceptEncodingValues) {
ContentCodec enc = encodingFor(allowedEncodings, val);
if (enc != null) {
Expand Down
1 change: 1 addition & 0 deletions servicetalk-grpc-netty/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ dependencies {
testImplementation testFixtures(project(":servicetalk-concurrent-api"))
testImplementation testFixtures(project(":servicetalk-concurrent-internal"))
testImplementation testFixtures(project(":servicetalk-transport-netty-internal"))
testImplementation testFixtures(project(":servicetalk-buffer-api"))
testImplementation project(":servicetalk-concurrent-api-internal")
testImplementation project(":servicetalk-concurrent-test-internal")
testImplementation project(":servicetalk-encoding-api-internal")
Expand Down
Loading

0 comments on commit 4fd7559

Please sign in to comment.