From 1b2931a787b978597fcf7b78f841298190b594fb Mon Sep 17 00:00:00 2001 From: Karlheinz Friedberger Date: Fri, 7 Apr 2023 11:02:42 +0200 Subject: [PATCH 1/3] CommandLineOptions: disallow recursive embedding of @-files. And tests. --- .../java/CommandLineOptionsParser.java | 18 +++++++-- .../java/CommandLineOptionsParserTest.java | 39 +++++++++++++++++++ 2 files changed, 53 insertions(+), 4 deletions(-) diff --git a/core/src/main/java/com/google/googlejavaformat/java/CommandLineOptionsParser.java b/core/src/main/java/com/google/googlejavaformat/java/CommandLineOptionsParser.java index f7c3dec95..f8a6e5654 100644 --- a/core/src/main/java/com/google/googlejavaformat/java/CommandLineOptionsParser.java +++ b/core/src/main/java/com/google/googlejavaformat/java/CommandLineOptionsParser.java @@ -17,6 +17,7 @@ import static java.nio.charset.StandardCharsets.UTF_8; import com.google.common.base.CharMatcher; +import com.google.common.base.Preconditions; import com.google.common.base.Splitter; import com.google.common.collect.ImmutableRangeSet; import com.google.common.collect.Range; @@ -25,7 +26,9 @@ import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; +import java.util.ArrayDeque; import java.util.ArrayList; +import java.util.Deque; import java.util.Iterator; import java.util.List; @@ -41,7 +44,7 @@ final class CommandLineOptionsParser { static CommandLineOptions parse(Iterable options) { CommandLineOptions.Builder optionsBuilder = CommandLineOptions.builder(); List expandedOptions = new ArrayList<>(); - expandParamsFiles(options, expandedOptions); + expandParamsFiles(options, expandedOptions, new ArrayDeque<>()); Iterator it = expandedOptions.iterator(); while (it.hasNext()) { String option = it.next(); @@ -186,7 +189,7 @@ private static Range parseRange(String arg) { * Pre-processes an argument list, expanding arguments of the form {@code @filename} by reading * the content of the file and appending whitespace-delimited options to {@code arguments}. */ - private static void expandParamsFiles(Iterable args, List expanded) { + private static void expandParamsFiles(Iterable args, List expanded, Deque paramFilesStack) { for (String arg : args) { if (arg.isEmpty()) { continue; @@ -196,13 +199,20 @@ private static void expandParamsFiles(Iterable args, List expand } else if (arg.startsWith("@@")) { expanded.add(arg.substring(1)); } else { - Path path = Paths.get(arg.substring(1)); + String filename = arg.substring(1); + if (paramFilesStack.contains(filename)) { + throw new IllegalArgumentException("parameter file was included recursively: " + filename); + } + paramFilesStack.push(filename); + Path path = Paths.get(filename); try { String sequence = new String(Files.readAllBytes(path), UTF_8); - expandParamsFiles(ARG_SPLITTER.split(sequence), expanded); + expandParamsFiles(ARG_SPLITTER.split(sequence), expanded, paramFilesStack); } catch (IOException e) { throw new UncheckedIOException(path + ": could not read file: " + e.getMessage(), e); } + String finishedFilename = paramFilesStack.pop(); + Preconditions.checkState(filename.equals(finishedFilename)); } } } diff --git a/core/src/test/java/com/google/googlejavaformat/java/CommandLineOptionsParserTest.java b/core/src/test/java/com/google/googlejavaformat/java/CommandLineOptionsParserTest.java index 1a4ed09b4..612ca3a44 100644 --- a/core/src/test/java/com/google/googlejavaformat/java/CommandLineOptionsParserTest.java +++ b/core/src/test/java/com/google/googlejavaformat/java/CommandLineOptionsParserTest.java @@ -18,6 +18,7 @@ import static com.google.common.truth.Truth8.assertThat; import static java.nio.charset.StandardCharsets.UTF_8; import static org.junit.Assert.fail; +import static org.junit.Assert.assertThrows; import com.google.common.collect.ImmutableList; import com.google.common.collect.Range; @@ -179,6 +180,44 @@ public void paramsFile() throws IOException { assertThat(options.files()).containsExactly("L", "M", "ℕ", "@O", "P", "Q"); } + @Test + public void paramsFileWithNesting() throws IOException { + Path outer = testFolder.newFile("outer").toPath(); + Path exit = testFolder.newFile("exit").toPath(); + Path nested1 = testFolder.newFile("nested1").toPath(); + Path nested2 = testFolder.newFile("nested2").toPath(); + Path nested3 = testFolder.newFile("nested3").toPath(); + + String[] args = {"--dry-run", "@" + exit, "L", "@" + outer, "U"}; + + Files.write(exit, "--set-exit-if-changed".getBytes(UTF_8)); + Files.write(outer, ("M\n@" + nested1.toAbsolutePath() + "\nT").getBytes(UTF_8)); + Files.write(nested1, ("ℕ\n@" + nested2.toAbsolutePath() + "\nS").getBytes(UTF_8)); + Files.write(nested2, ("O\n@" + nested3.toAbsolutePath() + "\nR").getBytes(UTF_8)); + Files.write(nested3, "P\n\n \n@@Q\n".getBytes(UTF_8)); + + CommandLineOptions options = CommandLineOptionsParser.parse(Arrays.asList(args)); + assertThat(options.files()).containsExactly("L", "M", "ℕ", "O", "P", "@Q", "R", "S", "T", "U"); + } + + @Test + public void paramsFileWithRecursion() throws IOException { + Path outer = testFolder.newFile("outer").toPath(); + Path exit = testFolder.newFile("exit").toPath(); + Path nested1 = testFolder.newFile("nested1").toPath(); + Path nested2 = testFolder.newFile("nested2").toPath(); + + String[] args = {"--dry-run", "@" + exit, "L", "@" + outer, "U"}; + + Files.write(exit, "--set-exit-if-changed".getBytes(UTF_8)); + Files.write(outer, ("M\n@" + nested1.toAbsolutePath() + "\nT").getBytes(UTF_8)); + Files.write(nested1, ("ℕ\n@" + nested2.toAbsolutePath() + "\nS").getBytes(UTF_8)); + Files.write(nested2, ("O\n@" + nested1.toAbsolutePath() + "\nR").getBytes(UTF_8)); + + Exception exception = assertThrows(IllegalArgumentException.class, () -> CommandLineOptionsParser.parse(Arrays.asList(args))); + assertThat(exception.getMessage().startsWith("parameter file was included recursively: ")).isTrue(); + } + @Test public void assumeFilename() { assertThat( From c6fab2f86bbe2a9428ab014f77f1f9094a8f799f Mon Sep 17 00:00:00 2001 From: Karlheinz Friedberger Date: Fri, 7 Apr 2023 15:24:30 +0200 Subject: [PATCH 2/3] CommandLineOptions: allow quotes in @-files, e.g., for parameters containing whitespace. Parameters from @-files are no longer simply split at whitespace, but now recognize quotes (single or double quotes allowed), such that parameters can contain whitespace that are kept unmodified. Each parameter can be written as either a quoted string (single or double quotes are allowed) or a plain unquoted string. Surrounding quotes are removed from parameters when parsing. It is possible to have double quotes within a single-quoted string and vice-versa. Such internal quotes remain untouched when parsing. For simplicity, we do not handle escaped quotes. --- .../java/CommandLineOptionsParser.java | 52 ++++++++++++++----- .../java/CommandLineOptionsParserTest.java | 16 ++++++ 2 files changed, 55 insertions(+), 13 deletions(-) diff --git a/core/src/main/java/com/google/googlejavaformat/java/CommandLineOptionsParser.java b/core/src/main/java/com/google/googlejavaformat/java/CommandLineOptionsParser.java index f8a6e5654..5faedd8ad 100644 --- a/core/src/main/java/com/google/googlejavaformat/java/CommandLineOptionsParser.java +++ b/core/src/main/java/com/google/googlejavaformat/java/CommandLineOptionsParser.java @@ -14,9 +14,6 @@ package com.google.googlejavaformat.java; -import static java.nio.charset.StandardCharsets.UTF_8; - -import com.google.common.base.CharMatcher; import com.google.common.base.Preconditions; import com.google.common.base.Splitter; import com.google.common.collect.ImmutableRangeSet; @@ -25,20 +22,35 @@ import java.io.UncheckedIOException; import java.nio.file.Files; import java.nio.file.Path; -import java.nio.file.Paths; import java.util.ArrayDeque; import java.util.ArrayList; import java.util.Deque; import java.util.Iterator; import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; /** A parser for {@link CommandLineOptions}. */ final class CommandLineOptionsParser { private static final Splitter COMMA_SPLITTER = Splitter.on(','); private static final Splitter COLON_SPLITTER = Splitter.on(':'); - private static final Splitter ARG_SPLITTER = - Splitter.on(CharMatcher.breakingWhitespace()).omitEmptyStrings().trimResults(); + + /** + * Let's split arguments on whitespace (including tabulator and newline). Additionally allow quotes for arguments, + * such that they can contain whitespace that are kept in the argument without change. + * + * The regex matches either a quoted string (single or double quotes are allowed) or a plain unquoted string. + * It is possible to have double quotes within a single-quoted string and vice-versa. This is then kept 'as-is'. + * For simplicity, we do not handle escaped quotes. + */ + private static final Pattern ARG_MATCHER = Pattern.compile( + "\"([^\"]*)\"" + // group 1: string in double quotes, with whitespace allowed + "|" + // OR + "'([^']*)'" + // group 2: string in single quotes, with whitespace allowed + "|" + // OR + "([^\\s\"']+)" // group 3: unquoted string, without whitespace and without any quotes + ); /** Parses {@link CommandLineOptions}. */ static CommandLineOptions parse(Iterable options) { @@ -204,16 +216,30 @@ private static void expandParamsFiles(Iterable args, List expand throw new IllegalArgumentException("parameter file was included recursively: " + filename); } paramFilesStack.push(filename); - Path path = Paths.get(filename); - try { - String sequence = new String(Files.readAllBytes(path), UTF_8); - expandParamsFiles(ARG_SPLITTER.split(sequence), expanded, paramFilesStack); - } catch (IOException e) { - throw new UncheckedIOException(path + ": could not read file: " + e.getMessage(), e); - } + expandParamsFiles(getParamsFromFile(filename), expanded, paramFilesStack); String finishedFilename = paramFilesStack.pop(); Preconditions.checkState(filename.equals(finishedFilename)); } } } + + /** Read parameters from file and handle quoted parameters. */ + private static List getParamsFromFile(String filename) { + String fileContent; + try { + fileContent = Files.readString(Path.of(filename)); + } catch (IOException e) { + throw new UncheckedIOException(filename + ": could not read file: " + e.getMessage(), e); + } + List paramsFromFile = new ArrayList<>(); + Matcher m = ARG_MATCHER.matcher(fileContent); + while (m.find()) { + for (int i = 1; i <= m.groupCount(); i++) { + if (m.group(i) != null) { // only one group matches: double quote, single quotes or unquoted string. + paramsFromFile.add(m.group(i)); + } + } + } + return paramsFromFile; + } } diff --git a/core/src/test/java/com/google/googlejavaformat/java/CommandLineOptionsParserTest.java b/core/src/test/java/com/google/googlejavaformat/java/CommandLineOptionsParserTest.java index 612ca3a44..1d241393c 100644 --- a/core/src/test/java/com/google/googlejavaformat/java/CommandLineOptionsParserTest.java +++ b/core/src/test/java/com/google/googlejavaformat/java/CommandLineOptionsParserTest.java @@ -218,6 +218,22 @@ public void paramsFileWithRecursion() throws IOException { assertThat(exception.getMessage().startsWith("parameter file was included recursively: ")).isTrue(); } + @Test + public void paramsFileWithQuotesAndWhitespaces() throws IOException { + Path outer = testFolder.newFile("outer with whitespace").toPath(); + Path exit = testFolder.newFile("exit with whitespace").toPath(); + Path nested = testFolder.newFile("nested with whitespace").toPath(); + + String[] args = {"--dry-run", "@" + exit, "L +w", "@" + outer, "Q +w"}; + + Files.write(exit, "--set-exit-if-changed".getBytes(UTF_8)); + Files.write(outer, ("\"'M' +w\"\n\"@" + nested.toAbsolutePath() + "\"\n'\"P\" +w'").getBytes(UTF_8)); + Files.write(nested, "\"ℕ +w\"\n\n \n\"@@O +w\"\n".getBytes(UTF_8)); + + CommandLineOptions options = CommandLineOptionsParser.parse(Arrays.asList(args)); + assertThat(options.files()).containsExactly("L +w", "'M' +w", "ℕ +w", "@O +w", "\"P\" +w", "Q +w"); + } + @Test public void assumeFilename() { assertThat( From ad09e3d29eb56dccadc628877cb333e4090393f6 Mon Sep 17 00:00:00 2001 From: Karlheinz Friedberger Date: Fri, 7 Apr 2023 20:34:25 +0200 Subject: [PATCH 3/3] CommandLineOptions: allow open-ended quotes in @-files. We do not enforce an ending quote in @-files and simply close the string on end-fo-file. This might be nicer for the user and does not harm. --- .../googlejavaformat/java/CommandLineOptionsParser.java | 4 ++-- .../googlejavaformat/java/CommandLineOptionsParserTest.java | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/core/src/main/java/com/google/googlejavaformat/java/CommandLineOptionsParser.java b/core/src/main/java/com/google/googlejavaformat/java/CommandLineOptionsParser.java index 5faedd8ad..5ecee8649 100644 --- a/core/src/main/java/com/google/googlejavaformat/java/CommandLineOptionsParser.java +++ b/core/src/main/java/com/google/googlejavaformat/java/CommandLineOptionsParser.java @@ -45,9 +45,9 @@ final class CommandLineOptionsParser { * For simplicity, we do not handle escaped quotes. */ private static final Pattern ARG_MATCHER = Pattern.compile( - "\"([^\"]*)\"" + // group 1: string in double quotes, with whitespace allowed + "\"([^\"]*)(?:\"|$)" + // group 1: string in double quotes (or until EOF), with whitespace allowed "|" + // OR - "'([^']*)'" + // group 2: string in single quotes, with whitespace allowed + "'([^']*)(?:'|$)" + // group 2: string in single quotes (or until EOF), with whitespace allowed "|" + // OR "([^\\s\"']+)" // group 3: unquoted string, without whitespace and without any quotes ); diff --git a/core/src/test/java/com/google/googlejavaformat/java/CommandLineOptionsParserTest.java b/core/src/test/java/com/google/googlejavaformat/java/CommandLineOptionsParserTest.java index 1d241393c..b2c831f4f 100644 --- a/core/src/test/java/com/google/googlejavaformat/java/CommandLineOptionsParserTest.java +++ b/core/src/test/java/com/google/googlejavaformat/java/CommandLineOptionsParserTest.java @@ -226,12 +226,12 @@ public void paramsFileWithQuotesAndWhitespaces() throws IOException { String[] args = {"--dry-run", "@" + exit, "L +w", "@" + outer, "Q +w"}; - Files.write(exit, "--set-exit-if-changed".getBytes(UTF_8)); + Files.write(exit, "--set-exit-if-changed 'K +w".getBytes(UTF_8)); Files.write(outer, ("\"'M' +w\"\n\"@" + nested.toAbsolutePath() + "\"\n'\"P\" +w'").getBytes(UTF_8)); - Files.write(nested, "\"ℕ +w\"\n\n \n\"@@O +w\"\n".getBytes(UTF_8)); + Files.write(nested, "\"ℕ +w\"\n\n \n\"@@O +w".getBytes(UTF_8)); CommandLineOptions options = CommandLineOptionsParser.parse(Arrays.asList(args)); - assertThat(options.files()).containsExactly("L +w", "'M' +w", "ℕ +w", "@O +w", "\"P\" +w", "Q +w"); + assertThat(options.files()).containsExactly("K +w", "L +w", "'M' +w", "ℕ +w", "@O +w", "\"P\" +w", "Q +w"); } @Test