Skip to content

Commit 038583a

Browse files
committed
zeroOrMore(characterSet)
1 parent 1935595 commit 038583a

File tree

3 files changed

+79
-5
lines changed

3 files changed

+79
-5
lines changed

dot-parse/README.md

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -79,8 +79,9 @@ you can nest it between parentheses.
7979

8080
## Example — Parse Regex-like Character Set
8181

82-
The `Parser.anyCharIn()` and `Parser.oneOrMoreCharsIn()` accept a character set string.
83-
And you can call it with `anyCharIn("[0-9a-fA-F]")`, `oneOrMoreCharsIn("[^0-9]")` etc.
82+
The `Parser.anyCharIn()`, `Parser.oneOrMoreCharsIn()` and `Parser.zeroOrMoreCharsIn()` methods
83+
accept a character set string. And you can call it with `anyCharIn("[0-9a-fA-F]")`,
84+
`oneOrMoreCharsIn("[^0-9]")` etc.
8485

8586
It makes it easier to create a primitive parser using a regex-like character set specification
8687
if you are already familiar with them.
@@ -124,7 +125,7 @@ Parser<CharPredicate> singleChar = supportedChar.map(CharPredicate::is);
124125
```
125126
Regex character set doesn't allow literal `']'`.
126127

127-
And the API decides not to support escaping because escaping rule is pretty complex
128+
The API decides not to support escaping because escaping rule is pretty complex
128129
and they hurt readability (particularly in Java where you can easily get lost on the
129130
number of backslashes you need). Instead, for use cases that need these special characters,
130131
there's always the `single(CharPredicate)` and `consecutive(CharPredicate)` to programmatically

dot-parse/src/main/java/com/google/common/labs/parse/Parser.java

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -463,6 +463,22 @@ public final <A, R> Parser<R> atLeastOnceDelimitedBy(
463463
});
464464
}
465465

466+
/**
467+
* Starts a fluent chain for matching zero or more characters in the given {@code characterSet}. If no
468+
* such character is found, empty string is the result.
469+
*
470+
* <p>For example if you need to parse a quoted literal that's allowed to be empty:
471+
*
472+
* <pre>{@code
473+
* zeroOrMoreCharsIn("[^']").between("'", "'")
474+
* }</pre>
475+
*
476+
* @since 9.4
477+
*/
478+
public static Parser<String>.OrEmpty zeroOrMoreCharsIn(String characterSet) {
479+
return oneOrMoreCharsIn(characterSet).orElse("");
480+
}
481+
466482
/**
467483
* Starts a fluent chain for matching consecutive {@code charsToMatch} zero or more times. If no
468484
* such character is found, empty string is the result.
@@ -1526,8 +1542,8 @@ private static CharPredicate compileCharacterSet(String characterSet) {
15261542
"Character set must be in square brackets. Use [%s] instead.", characterSet);
15271543
checkArgument(
15281544
!characterSet.contains("\\"),
1529-
"Escaping (%s) not supported. "
1530-
+ "Please use single(CharePredicate) or consecutive(CharPredicate) instead.",
1545+
"Escaping (%s) not supported in a character set. "
1546+
+ "Please use single(CharePredicate) or consecutive(CharPredicate) instead.",
15311547
characterSet);
15321548
Parser<Character> validChar = single(isNot(']'), "character");
15331549
Parser<CharPredicate> range =

dot-parse/src/test/java/com/google/common/labs/parse/ParserTest.java

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import static com.google.common.labs.parse.Parser.string;
1010
import static com.google.common.labs.parse.Parser.word;
1111
import static com.google.common.labs.parse.Parser.zeroOrMore;
12+
import static com.google.common.labs.parse.Parser.zeroOrMoreCharsIn;
1213
import static com.google.common.truth.Truth.assertThat;
1314
import static com.google.common.truth.Truth8.assertThat;
1415
import static com.google.mu.util.CharPredicate.is;
@@ -3193,6 +3194,62 @@ public void zeroOrMore_charMatcher_matchesMultipleTimes_source() {
31933194
.containsExactly("[ 123 ]");
31943195
}
31953196

3197+
@Test
3198+
public void zeroOrMore_charSet_matchesZeroTimes() {
3199+
Parser<String> parser = zeroOrMoreCharsIn("[0-9]").between("[", "]");
3200+
assertThat(parser.parse("[]")).isEmpty();
3201+
assertThat(parser.parseToStream("[]")).containsExactly("");
3202+
assertThat(parser.parseSkipping(Character::isWhitespace, "[ ]")).isEmpty();
3203+
assertThat(parser.skipping(Character::isWhitespace).parseToStream("[ ]")).containsExactly("");
3204+
}
3205+
3206+
@Test
3207+
public void zeroOrMore_charSet_matchesZeroTimes_source() {
3208+
Parser<String> parser = zeroOrMoreCharsIn("[0-9]").between("[", "]");
3209+
assertThat(parser.source().parse("[]")).isEqualTo("[]");
3210+
assertThat(parser.source().parseToStream("[]")).containsExactly("[]");
3211+
assertThat(parser.source().parseSkipping(Character::isWhitespace, "[ ]")).isEqualTo("[ ]");
3212+
assertThat(parser.source().skipping(Character::isWhitespace).parseToStream("[ ]")).containsExactly("[ ]");
3213+
}
3214+
3215+
@Test
3216+
public void zeroOrMore_charSet_matchesOneTime() {
3217+
Parser<String> parser = zeroOrMoreCharsIn("[0-9]").between("[", "]");
3218+
assertThat(parser.parse("[1]")).isEqualTo("1");
3219+
assertThat(parser.parseToStream("[1]")).containsExactly("1");
3220+
assertThat(parser.parseSkipping(Character::isWhitespace, "[ 1 ]")).isEqualTo("1");
3221+
assertThat(parser.skipping(Character::isWhitespace).parseToStream("[ 1 ]")).containsExactly("1");
3222+
}
3223+
3224+
@Test
3225+
public void zeroOrMore_charSet_matchesOneTime_source() {
3226+
Parser<String> parser = zeroOrMoreCharsIn("[0-9]").between("[", "]");
3227+
assertThat(parser.source().parse("[1]")).isEqualTo("[1]");
3228+
assertThat(parser.source().parseToStream("[1]")).containsExactly("[1]");
3229+
assertThat(parser.source().parseSkipping(Character::isWhitespace, "[ 1 ]")).isEqualTo("[ 1 ]");
3230+
assertThat(parser.source().skipping(Character::isWhitespace).parseToStream("[ 1 ]"))
3231+
.containsExactly("[ 1 ]");
3232+
}
3233+
3234+
@Test
3235+
public void zeroOrMore_charSet_matchesMultipleTimes() {
3236+
Parser<String> parser = zeroOrMoreCharsIn("[0-9]").between("[", "]");
3237+
assertThat(parser.parse("[123]")).isEqualTo("123");
3238+
assertThat(parser.parseToStream("[123]")).containsExactly("123");
3239+
assertThat(parser.parseSkipping(Character::isWhitespace, "[ 123 ]")).isEqualTo("123");
3240+
assertThat(parser.skipping(Character::isWhitespace).parseToStream("[ 123 ]")).containsExactly("123");
3241+
}
3242+
3243+
@Test
3244+
public void zeroOrMore_charSet_matchesMultipleTimes_source() {
3245+
Parser<String> parser = zeroOrMoreCharsIn("[0-9]").between("[", "]");
3246+
assertThat(parser.source().parse("[123]")).isEqualTo("[123]");
3247+
assertThat(parser.source().parseToStream("[123]")).containsExactly("[123]");
3248+
assertThat(parser.source().parseSkipping(Character::isWhitespace, "[ 123 ]")).isEqualTo("[ 123 ]");
3249+
assertThat(parser.source().skipping(Character::isWhitespace).parseToStream("[ 123 ]"))
3250+
.containsExactly("[ 123 ]");
3251+
}
3252+
31963253
@Test
31973254
public void skipping_propagatesThroughOptional() {
31983255
Parser<String> foo = string("foo");

0 commit comments

Comments
 (0)