Skip to content

Commit dbca717

Browse files
committed
simplify regexparser
1 parent 5f5a50d commit dbca717

File tree

2 files changed

+24
-20
lines changed

2 files changed

+24
-20
lines changed

dot-parse/src/main/java/com/google/common/labs/parse/Parser.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,8 @@ public static Parser<String> string(String value) {
217217
* .map(digits -> Character.toString(Integer.parseInt(digits, 16)));
218218
* quotedStringWithEscapes('"', unicodeEscaped.or(chars(1))).parse("foo\\uD83D");
219219
* }</pre>
220+
*
221+
* @since 9.4
220222
*/
221223
public static Parser<String> quotedStringWithEscapes(
222224
char quoteChar, Parser<? extends CharSequence> escaped) {
@@ -564,7 +566,7 @@ public static <A, B, R> Parser<R>.OrEmpty zeroOrMoreDelimited(
564566
}
565567

566568
/**
567-
* Applies {@code first} and the optional {@code second} patterns in order, for zero or more
569+
* Applies {@code first} and the optional {@code second} pattern in order, for zero or more
568570
* times, collecting the results using the provided {@link BiCollector}.
569571
*
570572
* <p>Typically used to parse key-value pairs:

dot-parse/src/main/java/com/google/common/labs/regex/RegexParsers.java

Lines changed: 21 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -14,15 +14,18 @@
1414
*****************************************************************************/
1515
package com.google.common.labs.regex;
1616

17+
import static com.google.common.labs.parse.Parser.consecutive;
1718
import static com.google.common.labs.parse.Parser.literally;
19+
import static com.google.common.labs.parse.Parser.string;
1820
import static com.google.common.labs.parse.Parser.word;
1921
import static com.google.mu.util.CharPredicate.ANY;
2022
import static com.google.mu.util.CharPredicate.is;
2123
import static com.google.mu.util.stream.BiStream.groupingByEach;
24+
import static com.google.mu.util.stream.MoreCollectors.onlyElement;
2225
import static java.util.Arrays.stream;
26+
import static java.util.function.UnaryOperator.identity;
2327

2428
import java.util.Map;
25-
import java.util.function.UnaryOperator;
2629
import java.util.stream.Collectors;
2730

2831
import com.google.common.labs.parse.Parser;
@@ -35,21 +38,20 @@
3538
import com.google.common.labs.regex.RegexPattern.PredefinedCharClass;
3639
import com.google.common.labs.regex.RegexPattern.Quantifier;
3740
import com.google.mu.util.CharPredicate;
38-
import com.google.mu.util.stream.MoreCollectors;
3941

4042
/** Parsers for {@link RegexPattern}. */
4143
final class RegexParsers {
4244
private static final Parser<Character> ESCAPED_CHAR =
43-
literally(Parser.string("\\").then(Parser.single(ANY, "escaped char")));
45+
literally(string("\\").then(Parser.single(ANY, "escaped char")));
4446
private static final Map<String, RegexPattern.CharacterProperty> POSIX_CHAR_CLASS_MAP =
4547
stream(RegexPattern.PosixCharClass.values())
46-
.collect(groupingByEach(charClass -> charClass.names().stream(), MoreCollectors.onlyElement(UnaryOperator.identity())))
48+
.collect(groupingByEach(charClass -> charClass.names().stream(), onlyElement(identity())))
4749
.collect(Collectors::toUnmodifiableMap);
4850
static final Parser<?> FREE_SPACES =
4951
Parser.anyOf(
50-
Parser.consecutive(Character::isWhitespace, "whitespace"),
51-
Parser.string("#")
52-
.then(Parser.consecutive(c -> c != '\n', "comment").optionallyFollowedBy("\n")));
52+
consecutive(Character::isWhitespace, "whitespace"),
53+
string("#")
54+
.then(consecutive(c -> c != '\n', "comment").followedByOrEof(string("\n"))));
5355

5456
static Parser<RegexPattern> pattern() {
5557
var lazy = new Parser.Rule<RegexPattern>();
@@ -61,9 +63,9 @@ static Parser<RegexPattern> pattern() {
6163
groupOrLookaround(lazy),
6264
anyOf(PredefinedCharClass.values()),
6365
anyOf(Anchor.values()),
64-
Parser.consecutive(CharPredicate.noneOf(".[]{}()*+?^$|\\ #"), "literal char")
66+
consecutive(CharPredicate.noneOf(".[]{}()*+?^$|\\ #"), "literal char")
6567
.map(Literal::new),
66-
Parser.consecutive(is('#').or(Character::isWhitespace), "whitespace or #").map(Literal::new),
68+
consecutive(is('#').or(Character::isWhitespace), "whitespace or #").map(Literal::new),
6769
ESCAPED_CHAR.map(c -> new Literal(Character.toString(c))));
6870
Parser<RegexPattern> sequence =
6971
atomic.postfix(quantifier()).atLeastOnce(RegexPattern.inSequence());
@@ -72,27 +74,27 @@ static Parser<RegexPattern> pattern() {
7274

7375
private static Parser<Quantifier> quantifier() {
7476
Parser<Integer> number = Parser.digits().map(Integer::parseInt);
75-
Parser<Quantifier> question = Parser.string("?").thenReturn(Quantifier.atMost(1));
76-
Parser<Quantifier> star = Parser.string("*").thenReturn(Quantifier.repeated());
77-
Parser<Quantifier> plus = Parser.string("+").thenReturn(Quantifier.atLeast(1));
77+
Parser<Quantifier> question = string("?").thenReturn(Quantifier.atMost(1));
78+
Parser<Quantifier> star = string("*").thenReturn(Quantifier.repeated());
79+
Parser<Quantifier> plus = string("+").thenReturn(Quantifier.atLeast(1));
7880
Parser<Quantifier> exact = number.between("{", "}").map(Quantifier::repeated);
7981
Parser<Quantifier> atLeast = number.followedBy(",").between("{", "}").map(Quantifier::atLeast);
8082
Parser<Quantifier> atMost =
81-
Parser.string(",").then(number).between("{", "}").map(Quantifier::atMost);
83+
string(",").then(number).between("{", "}").map(Quantifier::atMost);
8284
Parser<Quantifier> range =
83-
Parser.sequence(number, Parser.string(",").then(number), Quantifier::repeated)
85+
Parser.sequence(number, string(",").then(number), Quantifier::repeated)
8486
.between("{", "}");
8587
return Parser.anyOf(question, star, plus, exact, atLeast, atMost, range)
8688
.optionallyFollowedBy("?", Quantifier::reluctant)
8789
.optionallyFollowedBy("+", Quantifier::possessive);
8890
}
8991

9092
private static Parser<RegexPattern.CharacterProperty> positiveCharacterProperty() {
91-
return Parser.string("\\p").then(characterPropertySuffix());
93+
return string("\\p").then(characterPropertySuffix());
9294
}
9395

9496
private static Parser<RegexPattern.CharacterProperty.Negated> negativeCharacterProperty() {
95-
return Parser.string("\\P")
97+
return string("\\P")
9698
.then(characterPropertySuffix())
9799
.map(RegexPattern.CharacterProperty::negated);
98100
}
@@ -113,7 +115,7 @@ private static Parser<RegexPattern.CharacterSet> charClass() {
113115
ESCAPED_CHAR, Parser.single(CharPredicate.noneOf("]\\"), "literal character or dash"));
114116
Parser<CharRange> range =
115117
Parser.sequence(
116-
literalChar, Parser.string("-").then(literalChar), RegexPattern.CharRange::new);
118+
literalChar, string("-").then(literalChar), RegexPattern.CharRange::new);
117119
var element =
118120
Parser.anyOf(
119121
positiveCharacterProperty(),
@@ -129,7 +131,7 @@ private static Parser<RegexPattern.CharacterSet> charClass() {
129131
private static Parser<RegexPattern> groupOrLookaround(Parser<RegexPattern> content) {
130132
Parser<Group.Named> named =
131133
word()
132-
.between(Parser.string("?<").or(Parser.string("?P<")), Parser.string(">"))
134+
.between(string("?<").or(string("?P<")), string(">"))
133135
.flatMap(n -> content.map(c -> new Group.Named(n, c)))
134136
.between("(", ")");
135137
return Parser.anyOf(
@@ -143,6 +145,6 @@ private static Parser<RegexPattern> groupOrLookaround(Parser<RegexPattern> conte
143145
}
144146

145147
private static <E extends Enum<E>> Parser<E> anyOf(E... values) {
146-
return stream(values).map(e -> Parser.string(e.toString()).thenReturn(e)).collect(Parser.or());
148+
return stream(values).map(e -> string(e.toString()).thenReturn(e)).collect(Parser.or());
147149
}
148150
}

0 commit comments

Comments
 (0)