From c6fab2f86bbe2a9428ab014f77f1f9094a8f799f Mon Sep 17 00:00:00 2001 From: Karlheinz Friedberger Date: Fri, 7 Apr 2023 15:24:30 +0200 Subject: [PATCH] CommandLineOptions: allow quotes in @-files, e.g., for parameters containing whitespace. Parameters from @-files are no longer simply split at whitespace, but now recognize quotes (single or double quotes allowed), such that parameters can contain whitespace that are kept unmodified. Each parameter can be written as either a quoted string (single or double quotes are allowed) or a plain unquoted string. Surrounding quotes are removed from parameters when parsing. It is possible to have double quotes within a single-quoted string and vice-versa. Such internal quotes remain untouched when parsing. For simplicity, we do not handle escaped quotes. --- .../java/CommandLineOptionsParser.java | 52 ++++++++++++++----- .../java/CommandLineOptionsParserTest.java | 16 ++++++ 2 files changed, 55 insertions(+), 13 deletions(-) diff --git a/core/src/main/java/com/google/googlejavaformat/java/CommandLineOptionsParser.java b/core/src/main/java/com/google/googlejavaformat/java/CommandLineOptionsParser.java index f8a6e5654..5faedd8ad 100644 --- a/core/src/main/java/com/google/googlejavaformat/java/CommandLineOptionsParser.java +++ b/core/src/main/java/com/google/googlejavaformat/java/CommandLineOptionsParser.java @@ -14,9 +14,6 @@ package com.google.googlejavaformat.java; -import static java.nio.charset.StandardCharsets.UTF_8; - -import com.google.common.base.CharMatcher; import com.google.common.base.Preconditions; import com.google.common.base.Splitter; import com.google.common.collect.ImmutableRangeSet; @@ -25,20 +22,35 @@ import java.io.UncheckedIOException; import java.nio.file.Files; import java.nio.file.Path; -import java.nio.file.Paths; import java.util.ArrayDeque; import java.util.ArrayList; import java.util.Deque; import java.util.Iterator; import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; /** A parser for {@link CommandLineOptions}. */ final class CommandLineOptionsParser { private static final Splitter COMMA_SPLITTER = Splitter.on(','); private static final Splitter COLON_SPLITTER = Splitter.on(':'); - private static final Splitter ARG_SPLITTER = - Splitter.on(CharMatcher.breakingWhitespace()).omitEmptyStrings().trimResults(); + + /** + * Let's split arguments on whitespace (including tabulator and newline). Additionally allow quotes for arguments, + * such that they can contain whitespace that are kept in the argument without change. + * + * The regex matches either a quoted string (single or double quotes are allowed) or a plain unquoted string. + * It is possible to have double quotes within a single-quoted string and vice-versa. This is then kept 'as-is'. + * For simplicity, we do not handle escaped quotes. + */ + private static final Pattern ARG_MATCHER = Pattern.compile( + "\"([^\"]*)\"" + // group 1: string in double quotes, with whitespace allowed + "|" + // OR + "'([^']*)'" + // group 2: string in single quotes, with whitespace allowed + "|" + // OR + "([^\\s\"']+)" // group 3: unquoted string, without whitespace and without any quotes + ); /** Parses {@link CommandLineOptions}. */ static CommandLineOptions parse(Iterable options) { @@ -204,16 +216,30 @@ private static void expandParamsFiles(Iterable args, List expand throw new IllegalArgumentException("parameter file was included recursively: " + filename); } paramFilesStack.push(filename); - Path path = Paths.get(filename); - try { - String sequence = new String(Files.readAllBytes(path), UTF_8); - expandParamsFiles(ARG_SPLITTER.split(sequence), expanded, paramFilesStack); - } catch (IOException e) { - throw new UncheckedIOException(path + ": could not read file: " + e.getMessage(), e); - } + expandParamsFiles(getParamsFromFile(filename), expanded, paramFilesStack); String finishedFilename = paramFilesStack.pop(); Preconditions.checkState(filename.equals(finishedFilename)); } } } + + /** Read parameters from file and handle quoted parameters. */ + private static List getParamsFromFile(String filename) { + String fileContent; + try { + fileContent = Files.readString(Path.of(filename)); + } catch (IOException e) { + throw new UncheckedIOException(filename + ": could not read file: " + e.getMessage(), e); + } + List paramsFromFile = new ArrayList<>(); + Matcher m = ARG_MATCHER.matcher(fileContent); + while (m.find()) { + for (int i = 1; i <= m.groupCount(); i++) { + if (m.group(i) != null) { // only one group matches: double quote, single quotes or unquoted string. + paramsFromFile.add(m.group(i)); + } + } + } + return paramsFromFile; + } } diff --git a/core/src/test/java/com/google/googlejavaformat/java/CommandLineOptionsParserTest.java b/core/src/test/java/com/google/googlejavaformat/java/CommandLineOptionsParserTest.java index 612ca3a44..1d241393c 100644 --- a/core/src/test/java/com/google/googlejavaformat/java/CommandLineOptionsParserTest.java +++ b/core/src/test/java/com/google/googlejavaformat/java/CommandLineOptionsParserTest.java @@ -218,6 +218,22 @@ public void paramsFileWithRecursion() throws IOException { assertThat(exception.getMessage().startsWith("parameter file was included recursively: ")).isTrue(); } + @Test + public void paramsFileWithQuotesAndWhitespaces() throws IOException { + Path outer = testFolder.newFile("outer with whitespace").toPath(); + Path exit = testFolder.newFile("exit with whitespace").toPath(); + Path nested = testFolder.newFile("nested with whitespace").toPath(); + + String[] args = {"--dry-run", "@" + exit, "L +w", "@" + outer, "Q +w"}; + + Files.write(exit, "--set-exit-if-changed".getBytes(UTF_8)); + Files.write(outer, ("\"'M' +w\"\n\"@" + nested.toAbsolutePath() + "\"\n'\"P\" +w'").getBytes(UTF_8)); + Files.write(nested, "\"ℕ +w\"\n\n \n\"@@O +w\"\n".getBytes(UTF_8)); + + CommandLineOptions options = CommandLineOptionsParser.parse(Arrays.asList(args)); + assertThat(options.files()).containsExactly("L +w", "'M' +w", "ℕ +w", "@O +w", "\"P\" +w", "Q +w"); + } + @Test public void assumeFilename() { assertThat(