• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2008 The Guava Authors
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.google.common.base;
18 
19 import static com.google.common.base.Preconditions.checkArgument;
20 import static com.google.common.base.Preconditions.checkNotNull;
21 
22 import com.google.common.annotations.Beta;
23 import com.google.common.annotations.GwtCompatible;
24 
25 import java.util.ArrayList;
26 import java.util.Arrays;
27 import java.util.List;
28 
29 import javax.annotation.CheckReturnValue;
30 
31 /**
32  * Determines a true or false value for any Java {@code char} value, just as {@link Predicate} does
33  * for any {@link Object}. Also offers basic text processing methods based on this function.
34  * Implementations are strongly encouraged to be side-effect-free and immutable.
35  *
36  * <p>Throughout the documentation of this class, the phrase "matching character" is used to mean
37  * "any character {@code c} for which {@code this.matches(c)} returns {@code true}".
38  *
39  * <p><b>Note:</b> This class deals only with {@code char} values; it does not understand
40  * supplementary Unicode code points in the range {@code 0x10000} to {@code 0x10FFFF}. Such logical
41  * characters are encoded into a {@code String} using surrogate pairs, and a {@code CharMatcher}
42  * treats these just as two separate characters.
43  *
44  * <p>Example usages: <pre>
45  *   String trimmed = {@link #WHITESPACE WHITESPACE}.{@link #trimFrom trimFrom}(userInput);
46  *   if ({@link #ASCII ASCII}.{@link #matchesAllOf matchesAllOf}(s)) { ... }</pre>
47  *
48  * @author Kevin Bourrillion
49  * @since 1.0
50  */
51 @Beta // Possibly change from chars to code points; decide constants vs. methods
52 @GwtCompatible
53 public abstract class CharMatcher implements Predicate<Character> {
54   // Constants
55 
56   // Excludes 2000-2000a, which is handled as a range
57   private static final String BREAKING_WHITESPACE_CHARS =
58       "\t\n\013\f\r \u0085\u1680\u2028\u2029\u205f\u3000";
59 
60   // Excludes 2007, which is handled as a gap in a pair of ranges
61   private static final String NON_BREAKING_WHITESPACE_CHARS =
62       "\u00a0\u180e\u202f";
63 
64   /**
65    * Determines whether a character is whitespace according to the latest Unicode standard, as
66    * illustrated
67    * <a href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bwhitespace%7D">here</a>.
68    * This is not the same definition used by other Java APIs. (See a
69    * <a href="http://spreadsheets.google.com/pub?key=pd8dAQyHbdewRsnE5x5GzKQ">comparison of several
70    * definitions of "whitespace"</a>.)
71    *
72    * <p><b>Note:</b> as the Unicode definition evolves, we will modify this constant to keep it up
73    * to date.
74    */
75   public static final CharMatcher WHITESPACE =
76       anyOf(BREAKING_WHITESPACE_CHARS + NON_BREAKING_WHITESPACE_CHARS)
77           .or(inRange('\u2000', '\u200a'))
78           .precomputed();
79 
80   /**
81    * Determines whether a character is a breaking whitespace (that is, a whitespace which can be
82    * interpreted as a break between words for formatting purposes). See {@link #WHITESPACE} for a
83    * discussion of that term.
84    *
85    * @since 2.0
86    */
87   public static final CharMatcher BREAKING_WHITESPACE =
88       anyOf(BREAKING_WHITESPACE_CHARS)
89           .or(inRange('\u2000', '\u2006'))
90           .or(inRange('\u2008', '\u200a'))
91           .precomputed();
92 
93   /**
94    * Determines whether a character is ASCII, meaning that its code point is less than 128.
95    */
96   public static final CharMatcher ASCII = inRange('\0', '\u007f');
97 
98   /**
99    * Determines whether a character is a digit according to
100    * <a href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bdigit%7D">Unicode</a>.
101    */
102   public static final CharMatcher DIGIT;
103 
104   static {
105     CharMatcher digit = inRange('0', '9');
106     String zeroes =
107         "\u0660\u06f0\u07c0\u0966\u09e6\u0a66\u0ae6\u0b66\u0be6\u0c66"
108             + "\u0ce6\u0d66\u0e50\u0ed0\u0f20\u1040\u1090\u17e0\u1810\u1946"
109             + "\u19d0\u1b50\u1bb0\u1c40\u1c50\ua620\ua8d0\ua900\uaa50\uff10";
110     for (char base : zeroes.toCharArray()) {
111       digit = digit.or(inRange(base, (char) (base + 9)));
112     }
113     DIGIT = digit.precomputed();
114   }
115 
116   /**
117    * Determines whether a character is a digit according to {@link Character#isDigit(char) Java's
118    * definition}. If you only care to match ASCII digits, you can use {@code inRange('0', '9')}.
119    */
120   public static final CharMatcher JAVA_DIGIT = new CharMatcher() {
121     @Override public boolean matches(char c) {
122       return Character.isDigit(c);
123     }
124   };
125 
126   /**
127    * Determines whether a character is a letter according to {@link Character#isLetter(char) Java's
128    * definition}. If you only care to match letters of the Latin alphabet, you can use {@code
129    * inRange('a', 'z').or(inRange('A', 'Z'))}.
130    */
131   public static final CharMatcher JAVA_LETTER = new CharMatcher() {
132     @Override public boolean matches(char c) {
133       return Character.isLetter(c);
134     }
135   };
136 
137   /**
138    * Determines whether a character is a letter or digit according to {@link
139    * Character#isLetterOrDigit(char) Java's definition}.
140    */
141   public static final CharMatcher JAVA_LETTER_OR_DIGIT = new CharMatcher() {
142     @Override public boolean matches(char c) {
143       return Character.isLetterOrDigit(c);
144     }
145   };
146 
147   /**
148    * Determines whether a character is upper case according to {@link Character#isUpperCase(char)
149    * Java's definition}.
150    */
151   public static final CharMatcher JAVA_UPPER_CASE = new CharMatcher() {
152     @Override public boolean matches(char c) {
153       return Character.isUpperCase(c);
154     }
155   };
156 
157   /**
158    * Determines whether a character is lower case according to {@link Character#isLowerCase(char)
159    * Java's definition}.
160    */
161   public static final CharMatcher JAVA_LOWER_CASE = new CharMatcher() {
162     @Override public boolean matches(char c) {
163       return Character.isLowerCase(c);
164     }
165   };
166 
167   /**
168    * Determines whether a character is an ISO control character as specified by {@link
169    * Character#isISOControl(char)}.
170    */
171   public static final CharMatcher JAVA_ISO_CONTROL =
172       inRange('\u0000', '\u001f').or(inRange('\u007f', '\u009f'));
173 
174   /**
175    * Determines whether a character is invisible; that is, if its Unicode category is any of
176    * SPACE_SEPARATOR, LINE_SEPARATOR, PARAGRAPH_SEPARATOR, CONTROL, FORMAT, SURROGATE, and
177    * PRIVATE_USE according to ICU4J.
178    */
179   public static final CharMatcher INVISIBLE = inRange('\u0000', '\u0020')
180       .or(inRange('\u007f', '\u00a0'))
181       .or(is('\u00ad'))
182       .or(inRange('\u0600', '\u0603'))
183       .or(anyOf("\u06dd\u070f\u1680\u17b4\u17b5\u180e"))
184       .or(inRange('\u2000', '\u200f'))
185       .or(inRange('\u2028', '\u202f'))
186       .or(inRange('\u205f', '\u2064'))
187       .or(inRange('\u206a', '\u206f'))
188       .or(is('\u3000'))
189       .or(inRange('\ud800', '\uf8ff'))
190       .or(anyOf("\ufeff\ufff9\ufffa\ufffb"))
191       .precomputed();
192 
193   /**
194    * Determines whether a character is single-width (not double-width). When in doubt, this matcher
195    * errs on the side of returning {@code false} (that is, it tends to assume a character is
196    * double-width).
197    *
198    * <p><b>Note:</b> as the reference file evolves, we will modify this constant to keep it up to
199    * date.
200    */
201   public static final CharMatcher SINGLE_WIDTH = inRange('\u0000', '\u04f9')
202       .or(is('\u05be'))
203       .or(inRange('\u05d0', '\u05ea'))
204       .or(is('\u05f3'))
205       .or(is('\u05f4'))
206       .or(inRange('\u0600', '\u06ff'))
207       .or(inRange('\u0750', '\u077f'))
208       .or(inRange('\u0e00', '\u0e7f'))
209       .or(inRange('\u1e00', '\u20af'))
210       .or(inRange('\u2100', '\u213a'))
211       .or(inRange('\ufb50', '\ufdff'))
212       .or(inRange('\ufe70', '\ufeff'))
213       .or(inRange('\uff61', '\uffdc'))
214       .precomputed();
215 
216   /** Matches any character. */
217   public static final CharMatcher ANY =
218       new CharMatcher() {
219         @Override public boolean matches(char c) {
220           return true;
221         }
222 
223         @Override public int indexIn(CharSequence sequence) {
224           return (sequence.length() == 0) ? -1 : 0;
225         }
226 
227         @Override public int indexIn(CharSequence sequence, int start) {
228           int length = sequence.length();
229           Preconditions.checkPositionIndex(start, length);
230           return (start == length) ? -1 : start;
231         }
232 
233         @Override public int lastIndexIn(CharSequence sequence) {
234           return sequence.length() - 1;
235         }
236 
237         @Override public boolean matchesAllOf(CharSequence sequence) {
238           checkNotNull(sequence);
239           return true;
240         }
241 
242         @Override public boolean matchesNoneOf(CharSequence sequence) {
243           return sequence.length() == 0;
244         }
245 
246         @Override public String removeFrom(CharSequence sequence) {
247           checkNotNull(sequence);
248           return "";
249         }
250 
251         @Override public String replaceFrom(CharSequence sequence, char replacement) {
252           char[] array = new char[sequence.length()];
253           Arrays.fill(array, replacement);
254           return new String(array);
255         }
256 
257         @Override public String replaceFrom(CharSequence sequence, CharSequence replacement) {
258           StringBuilder retval = new StringBuilder(sequence.length() * replacement.length());
259           for (int i = 0; i < sequence.length(); i++) {
260             retval.append(replacement);
261           }
262           return retval.toString();
263         }
264 
265         @Override public String collapseFrom(CharSequence sequence, char replacement) {
266           return (sequence.length() == 0) ? "" : String.valueOf(replacement);
267         }
268 
269         @Override public String trimFrom(CharSequence sequence) {
270           checkNotNull(sequence);
271           return "";
272         }
273 
274         @Override public int countIn(CharSequence sequence) {
275           return sequence.length();
276         }
277 
278         @Override public CharMatcher and(CharMatcher other) {
279           return checkNotNull(other);
280         }
281 
282         @Override public CharMatcher or(CharMatcher other) {
283           checkNotNull(other);
284           return this;
285         }
286 
287         @Override public CharMatcher negate() {
288           return NONE;
289         }
290 
291         @Override public CharMatcher precomputed() {
292           return this;
293         }
294       };
295 
296   /** Matches no characters. */
297   public static final CharMatcher NONE =
298       new CharMatcher() {
299         @Override public boolean matches(char c) {
300           return false;
301         }
302 
303         @Override public int indexIn(CharSequence sequence) {
304           checkNotNull(sequence);
305           return -1;
306         }
307 
308         @Override public int indexIn(CharSequence sequence, int start) {
309           int length = sequence.length();
310           Preconditions.checkPositionIndex(start, length);
311           return -1;
312         }
313 
314         @Override public int lastIndexIn(CharSequence sequence) {
315           checkNotNull(sequence);
316           return -1;
317         }
318 
319         @Override public boolean matchesAllOf(CharSequence sequence) {
320           return sequence.length() == 0;
321         }
322 
323         @Override public boolean matchesNoneOf(CharSequence sequence) {
324           checkNotNull(sequence);
325           return true;
326         }
327 
328         @Override public String removeFrom(CharSequence sequence) {
329           return sequence.toString();
330         }
331 
332         @Override public String replaceFrom(CharSequence sequence, char replacement) {
333           return sequence.toString();
334         }
335 
336         @Override public String replaceFrom(CharSequence sequence, CharSequence replacement) {
337           checkNotNull(replacement);
338           return sequence.toString();
339         }
340 
341         @Override public String collapseFrom(CharSequence sequence, char replacement) {
342           return sequence.toString();
343         }
344 
345         @Override public String trimFrom(CharSequence sequence) {
346           return sequence.toString();
347         }
348 
349         @Override public int countIn(CharSequence sequence) {
350           checkNotNull(sequence);
351           return 0;
352         }
353 
354         @Override public CharMatcher and(CharMatcher other) {
355           checkNotNull(other);
356           return this;
357         }
358 
359         @Override public CharMatcher or(CharMatcher other) {
360           return checkNotNull(other);
361         }
362 
363         @Override public CharMatcher negate() {
364           return ANY;
365         }
366 
367         @Override void setBits(LookupTable table) {}
368 
369         @Override public CharMatcher precomputed() {
370           return this;
371         }
372       };
373 
374   // Static factories
375 
376   /**
377    * Returns a {@code char} matcher that matches only one specified character.
378    */
is(final char match)379   public static CharMatcher is(final char match) {
380     return new CharMatcher() {
381       @Override public boolean matches(char c) {
382         return c == match;
383       }
384 
385       @Override public String replaceFrom(CharSequence sequence, char replacement) {
386         return sequence.toString().replace(match, replacement);
387       }
388 
389       @Override public CharMatcher and(CharMatcher other) {
390         return other.matches(match) ? this : NONE;
391       }
392 
393       @Override public CharMatcher or(CharMatcher other) {
394         return other.matches(match) ? other : super.or(other);
395       }
396 
397       @Override public CharMatcher negate() {
398         return isNot(match);
399       }
400 
401       @Override void setBits(LookupTable table) {
402         table.set(match);
403       }
404 
405       @Override public CharMatcher precomputed() {
406         return this;
407       }
408     };
409   }
410 
411   /**
412    * Returns a {@code char} matcher that matches any character except the one specified.
413    *
414    * <p>To negate another {@code CharMatcher}, use {@link #negate()}.
415    */
416   public static CharMatcher isNot(final char match) {
417     return new CharMatcher() {
418       @Override public boolean matches(char c) {
419         return c != match;
420       }
421 
422       @Override public CharMatcher and(CharMatcher other) {
423         return other.matches(match) ? super.and(other) : other;
424       }
425 
426       @Override public CharMatcher or(CharMatcher other) {
427         return other.matches(match) ? ANY : this;
428       }
429 
430       @Override public CharMatcher negate() {
431         return is(match);
432       }
433     };
434   }
435 
436   /**
437    * Returns a {@code char} matcher that matches any character present in the given character
438    * sequence.
439    */
440   public static CharMatcher anyOf(final CharSequence sequence) {
441     switch (sequence.length()) {
442       case 0:
443         return NONE;
444       case 1:
445         return is(sequence.charAt(0));
446       case 2:
447         final char match1 = sequence.charAt(0);
448         final char match2 = sequence.charAt(1);
449         return new CharMatcher() {
450           @Override public boolean matches(char c) {
451             return c == match1 || c == match2;
452           }
453 
454           @Override void setBits(LookupTable table) {
455             table.set(match1);
456             table.set(match2);
457           }
458 
459           @Override public CharMatcher precomputed() {
460             return this;
461           }
462         };
463     }
464 
465     final char[] chars = sequence.toString().toCharArray();
466     Arrays.sort(chars); // not worth collapsing duplicates
467 
468     return new CharMatcher() {
469       @Override public boolean matches(char c) {
470         return Arrays.binarySearch(chars, c) >= 0;
471       }
472 
473       @Override void setBits(LookupTable table) {
474         for (char c : chars) {
475           table.set(c);
476         }
477       }
478     };
479   }
480 
481   /**
482    * Returns a {@code char} matcher that matches any character not present in the given character
483    * sequence.
484    */
485   public static CharMatcher noneOf(CharSequence sequence) {
486     return anyOf(sequence).negate();
487   }
488 
489   /**
490    * Returns a {@code char} matcher that matches any character in a given range (both endpoints are
491    * inclusive). For example, to match any lowercase letter of the English alphabet, use {@code
492    * CharMatcher.inRange('a', 'z')}.
493    *
494    * @throws IllegalArgumentException if {@code endInclusive < startInclusive}
495    */
496   public static CharMatcher inRange(final char startInclusive, final char endInclusive) {
497     checkArgument(endInclusive >= startInclusive);
498     return new CharMatcher() {
499       @Override public boolean matches(char c) {
500         return startInclusive <= c && c <= endInclusive;
501       }
502 
503       @Override void setBits(LookupTable table) {
504         char c = startInclusive;
505         while (true) {
506           table.set(c);
507           if (c++ == endInclusive) {
508             break;
509           }
510         }
511       }
512 
513       @Override public CharMatcher precomputed() {
514         return this;
515       }
516     };
517   }
518 
519   /**
520    * Returns a matcher with identical behavior to the given {@link Character}-based predicate, but
521    * which operates on primitive {@code char} instances instead.
522    */
523   public static CharMatcher forPredicate(final Predicate<? super Character> predicate) {
524     checkNotNull(predicate);
525     if (predicate instanceof CharMatcher) {
526       return (CharMatcher) predicate;
527     }
528     return new CharMatcher() {
529       @Override public boolean matches(char c) {
530         return predicate.apply(c);
531       }
532 
533       @Override public boolean apply(Character character) {
534         return predicate.apply(checkNotNull(character));
535       }
536     };
537   }
538 
539   // Constructors
540 
541   /**
542    * Constructor for use by subclasses.
543    */
544   protected CharMatcher() {}
545 
546   // Abstract methods
547 
548   /** Determines a true or false value for the given character. */
549   public abstract boolean matches(char c);
550 
551   // Non-static factories
552 
553   /**
554    * Returns a matcher that matches any character not matched by this matcher.
555    */
556   public CharMatcher negate() {
557     final CharMatcher original = this;
558     return new CharMatcher() {
559       @Override public boolean matches(char c) {
560         return !original.matches(c);
561       }
562 
563       @Override public boolean matchesAllOf(CharSequence sequence) {
564         return original.matchesNoneOf(sequence);
565       }
566 
567       @Override public boolean matchesNoneOf(CharSequence sequence) {
568         return original.matchesAllOf(sequence);
569       }
570 
571       @Override public int countIn(CharSequence sequence) {
572         return sequence.length() - original.countIn(sequence);
573       }
574 
575       @Override public CharMatcher negate() {
576         return original;
577       }
578     };
579   }
580 
581   /**
582    * Returns a matcher that matches any character matched by both this matcher and {@code other}.
583    */
584   public CharMatcher and(CharMatcher other) {
585     return new And(Arrays.asList(this, checkNotNull(other)));
586   }
587 
588   private static class And extends CharMatcher {
589     List<CharMatcher> components;
590 
591     And(List<CharMatcher> components) {
592       this.components = components; // Skip defensive copy (private)
593     }
594 
595     @Override public boolean matches(char c) {
596       for (CharMatcher matcher : components) {
597         if (!matcher.matches(c)) {
598           return false;
599         }
600       }
601       return true;
602     }
603 
604     @Override public CharMatcher and(CharMatcher other) {
605       List<CharMatcher> newComponents = new ArrayList<CharMatcher>(components);
606       newComponents.add(checkNotNull(other));
607       return new And(newComponents);
608     }
609   }
610 
611   /**
612    * Returns a matcher that matches any character matched by either this matcher or {@code other}.
613    */
614   public CharMatcher or(CharMatcher other) {
615     return new Or(Arrays.asList(this, checkNotNull(other)));
616   }
617 
618   private static class Or extends CharMatcher {
619     List<CharMatcher> components;
620 
621     Or(List<CharMatcher> components) {
622       this.components = components; // Skip defensive copy (private)
623     }
624 
625     @Override public boolean matches(char c) {
626       for (CharMatcher matcher : components) {
627         if (matcher.matches(c)) {
628           return true;
629         }
630       }
631       return false;
632     }
633 
634     @Override public CharMatcher or(CharMatcher other) {
635       List<CharMatcher> newComponents = new ArrayList<CharMatcher>(components);
636       newComponents.add(checkNotNull(other));
637       return new Or(newComponents);
638     }
639 
640     @Override void setBits(LookupTable table) {
641       for (CharMatcher matcher : components) {
642         matcher.setBits(table);
643       }
644     }
645   }
646 
647   /**
648    * Returns a {@code char} matcher functionally equivalent to this one, but which may be faster to
649    * query than the original; your mileage may vary. Precomputation takes time and is likely to be
650    * worthwhile only if the precomputed matcher is queried many thousands of times.
651    *
652    * <p>This method has no effect (returns {@code this}) when called in GWT: it's unclear whether a
653    * precomputed matcher is faster, but it certainly consumes more memory, which doesn't seem like a
654    * worthwhile tradeoff in a browser.
655    */
656   public CharMatcher precomputed() {
657     return Platform.precomputeCharMatcher(this);
658   }
659 
660   /**
661    * This is the actual implementation of {@link #precomputed}, but we bounce calls through a method
662    * on {@link Platform} so that we can have different behavior in GWT.
663    *
664    * <p>The default precomputation is to cache the configuration of the original matcher in an
665    * eight-kilobyte bit array. In some situations this produces a matcher which is faster to query
666    * than the original.
667    *
668    * <p>The default implementation creates a new bit array and passes it to {@link
669    * #setBits(LookupTable)}.
670    */
671   CharMatcher precomputedInternal() {
672     final LookupTable table = new LookupTable();
673     setBits(table);
674 
675     return new CharMatcher() {
676       @Override public boolean matches(char c) {
677         return table.get(c);
678       }
679 
680       // TODO(kevinb): make methods like negate() smart?
681 
682       @Override public CharMatcher precomputed() {
683         return this;
684       }
685     };
686   }
687 
688   /**
689    * For use by implementors; sets the bit corresponding to each character ('\0' to '{@literal
690    * \}uFFFF') that matches this matcher in the given bit array, leaving all other bits untouched.
691    *
692    * <p>The default implementation loops over every possible character value, invoking {@link
693    * #matches} for each one.
694    */
695   void setBits(LookupTable table) {
696     char c = Character.MIN_VALUE;
697     while (true) {
698       if (matches(c)) {
699         table.set(c);
700       }
701       if (c++ == Character.MAX_VALUE) {
702         break;
703       }
704     }
705   }
706 
707   /**
708    * A bit array with one bit per {@code char} value, used by {@link CharMatcher#precomputed}.
709    *
710    * <p>TODO(kevinb): possibly share a common BitArray class with BloomFilter and others... a
711    * simpler java.util.BitSet.
712    */
713   private static final class LookupTable {
714     int[] data = new int[2048];
715 
716     void set(char index) {
717       data[index >> 5] |= (1 << index);
718     }
719 
720     boolean get(char index) {
721       return (data[index >> 5] & (1 << index)) != 0;
722     }
723   }
724 
725   // Text processing routines
726 
727   /**
728    * Returns {@code true} if a character sequence contains at least one matching character.
729    * Equivalent to {@code !matchesNoneOf(sequence)}.
730    *
731    * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each
732    * character, until this returns {@code true} or the end is reached.
733    *
734    * @param sequence the character sequence to examine, possibly empty
735    * @return {@code true} if this matcher matches at least one character in the sequence
736    * @since 8.0
737    */
738   public boolean matchesAnyOf(CharSequence sequence) {
739     return !matchesNoneOf(sequence);
740   }
741 
742   /**
743    * Returns {@code true} if a character sequence contains only matching characters.
744    *
745    * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each
746    * character, until this returns {@code false} or the end is reached.
747    *
748    * @param sequence the character sequence to examine, possibly empty
749    * @return {@code true} if this matcher matches every character in the sequence, including when
750    *         the sequence is empty
751    */
752   public boolean matchesAllOf(CharSequence sequence) {
753     for (int i = sequence.length() - 1; i >= 0; i--) {
754       if (!matches(sequence.charAt(i))) {
755         return false;
756       }
757     }
758     return true;
759   }
760 
761   /**
762    * Returns {@code true} if a character sequence contains no matching characters. Equivalent to
763    * {@code !matchesAnyOf(sequence)}.
764    *
765    * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each
766    * character, until this returns {@code false} or the end is reached.
767    *
768    * @param sequence the character sequence to examine, possibly empty
769    * @return {@code true} if this matcher matches every character in the sequence, including when
770    *         the sequence is empty
771    */
772   public boolean matchesNoneOf(CharSequence sequence) {
773     return indexIn(sequence) == -1;
774   }
775 
776   // TODO(kevinb): add matchesAnyOf()
777 
778   /**
779    * Returns the index of the first matching character in a character sequence, or {@code -1} if no
780    * matching character is present.
781    *
782    * <p>The default implementation iterates over the sequence in forward order calling {@link
783    * #matches} for each character.
784    *
785    * @param sequence the character sequence to examine from the beginning
786    * @return an index, or {@code -1} if no character matches
787    */
788   public int indexIn(CharSequence sequence) {
789     int length = sequence.length();
790     for (int i = 0; i < length; i++) {
791       if (matches(sequence.charAt(i))) {
792         return i;
793       }
794     }
795     return -1;
796   }
797 
798   /**
799    * Returns the index of the first matching character in a character sequence, starting from a
800    * given position, or {@code -1} if no character matches after that position.
801    *
802    * <p>The default implementation iterates over the sequence in forward order, beginning at {@code
803    * start}, calling {@link #matches} for each character.
804    *
805    * @param sequence the character sequence to examine
806    * @param start the first index to examine; must be nonnegative and no greater than {@code
807    *        sequence.length()}
808    * @return the index of the first matching character, guaranteed to be no less than {@code start},
809    *         or {@code -1} if no character matches
810    * @throws IndexOutOfBoundsException if start is negative or greater than {@code
811    *         sequence.length()}
812    */
813   public int indexIn(CharSequence sequence, int start) {
814     int length = sequence.length();
815     Preconditions.checkPositionIndex(start, length);
816     for (int i = start; i < length; i++) {
817       if (matches(sequence.charAt(i))) {
818         return i;
819       }
820     }
821     return -1;
822   }
823 
824   /**
825    * Returns the index of the last matching character in a character sequence, or {@code -1} if no
826    * matching character is present.
827    *
828    * <p>The default implementation iterates over the sequence in reverse order calling {@link
829    * #matches} for each character.
830    *
831    * @param sequence the character sequence to examine from the end
832    * @return an index, or {@code -1} if no character matches
833    */
834   public int lastIndexIn(CharSequence sequence) {
835     for (int i = sequence.length() - 1; i >= 0; i--) {
836       if (matches(sequence.charAt(i))) {
837         return i;
838       }
839     }
840     return -1;
841   }
842 
843   /**
844    * Returns the number of matching characters found in a character sequence.
845    */
846   public int countIn(CharSequence sequence) {
847     int count = 0;
848     for (int i = 0; i < sequence.length(); i++) {
849       if (matches(sequence.charAt(i))) {
850         count++;
851       }
852     }
853     return count;
854   }
855 
856   /**
857    * Returns a string containing all non-matching characters of a character sequence, in order. For
858    * example: <pre>   {@code
859    *
860    *   CharMatcher.is('a').removeFrom("bazaar")}</pre>
861    *
862    * ... returns {@code "bzr"}.
863    */
864   @CheckReturnValue
865   public String removeFrom(CharSequence sequence) {
866     String string = sequence.toString();
867     int pos = indexIn(string);
868     if (pos == -1) {
869       return string;
870     }
871 
872     char[] chars = string.toCharArray();
873     int spread = 1;
874 
875     // This unusual loop comes from extensive benchmarking
876     OUT: while (true) {
877       pos++;
878       while (true) {
879         if (pos == chars.length) {
880           break OUT;
881         }
882         if (matches(chars[pos])) {
883           break;
884         }
885         chars[pos - spread] = chars[pos];
886         pos++;
887       }
888       spread++;
889     }
890     return new String(chars, 0, pos - spread);
891   }
892 
893   /**
894    * Returns a string containing all matching characters of a character sequence, in order. For
895    * example: <pre>   {@code
896    *
897    *   CharMatcher.is('a').retainFrom("bazaar")}</pre>
898    *
899    * ... returns {@code "aaa"}.
900    */
901   @CheckReturnValue
902   public String retainFrom(CharSequence sequence) {
903     return negate().removeFrom(sequence);
904   }
905 
906   /**
907    * Returns a string copy of the input character sequence, with each character that matches this
908    * matcher replaced by a given replacement character. For example: <pre>   {@code
909    *
910    *   CharMatcher.is('a').replaceFrom("radar", 'o')}</pre>
911    *
912    * ... returns {@code "rodor"}.
913    *
914    * <p>The default implementation uses {@link #indexIn(CharSequence)} to find the first matching
915    * character, then iterates the remainder of the sequence calling {@link #matches(char)} for each
916    * character.
917    *
918    * @param sequence the character sequence to replace matching characters in
919    * @param replacement the character to append to the result string in place of each matching
920    *        character in {@code sequence}
921    * @return the new string
922    */
923   @CheckReturnValue
924   public String replaceFrom(CharSequence sequence, char replacement) {
925     String string = sequence.toString();
926     int pos = indexIn(string);
927     if (pos == -1) {
928       return string;
929     }
930     char[] chars = string.toCharArray();
931     chars[pos] = replacement;
932     for (int i = pos + 1; i < chars.length; i++) {
933       if (matches(chars[i])) {
934         chars[i] = replacement;
935       }
936     }
937     return new String(chars);
938   }
939 
940   /**
941    * Returns a string copy of the input character sequence, with each character that matches this
942    * matcher replaced by a given replacement sequence. For example: <pre>   {@code
943    *
944    *   CharMatcher.is('a').replaceFrom("yaha", "oo")}</pre>
945    *
946    * ... returns {@code "yoohoo"}.
947    *
948    * <p><b>Note:</b> If the replacement is a fixed string with only one character, you are better
949    * off calling {@link #replaceFrom(CharSequence, char)} directly.
950    *
951    * @param sequence the character sequence to replace matching characters in
952    * @param replacement the characters to append to the result string in place of each matching
953    *        character in {@code sequence}
954    * @return the new string
955    */
956   @CheckReturnValue
957   public String replaceFrom(CharSequence sequence, CharSequence replacement) {
958     int replacementLen = replacement.length();
959     if (replacementLen == 0) {
960       return removeFrom(sequence);
961     }
962     if (replacementLen == 1) {
963       return replaceFrom(sequence, replacement.charAt(0));
964     }
965 
966     String string = sequence.toString();
967     int pos = indexIn(string);
968     if (pos == -1) {
969       return string;
970     }
971 
972     int len = string.length();
973     StringBuilder buf = new StringBuilder((len * 3 / 2) + 16);
974 
975     int oldpos = 0;
976     do {
977       buf.append(string, oldpos, pos);
978       buf.append(replacement);
979       oldpos = pos + 1;
980       pos = indexIn(string, oldpos);
981     } while (pos != -1);
982 
983     buf.append(string, oldpos, len);
984     return buf.toString();
985   }
986 
987   /**
988    * Returns a substring of the input character sequence that omits all characters this matcher
989    * matches from the beginning and from the end of the string. For example: <pre>   {@code
990    *
991    *   CharMatcher.anyOf("ab").trimFrom("abacatbab")}</pre>
992    *
993    * ... returns {@code "cat"}.
994    *
995    * <p>Note that: <pre>   {@code
996    *
997    *   CharMatcher.inRange('\0', ' ').trimFrom(str)}</pre>
998    *
999    * ... is equivalent to {@link String#trim()}.
1000    */
1001   @CheckReturnValue
1002   public String trimFrom(CharSequence sequence) {
1003     int len = sequence.length();
1004     int first;
1005     int last;
1006 
1007     for (first = 0; first < len; first++) {
1008       if (!matches(sequence.charAt(first))) {
1009         break;
1010       }
1011     }
1012     for (last = len - 1; last > first; last--) {
1013       if (!matches(sequence.charAt(last))) {
1014         break;
1015       }
1016     }
1017 
1018     return sequence.subSequence(first, last + 1).toString();
1019   }
1020 
1021   /**
1022    * Returns a substring of the input character sequence that omits all characters this matcher
1023    * matches from the beginning of the string. For example: <pre> {@code
1024    *
1025    *   CharMatcher.anyOf("ab").trimLeadingFrom("abacatbab")}</pre>
1026    *
1027    * ... returns {@code "catbab"}.
1028    */
1029   @CheckReturnValue
1030   public String trimLeadingFrom(CharSequence sequence) {
1031     int len = sequence.length();
1032     int first;
1033 
1034     for (first = 0; first < len; first++) {
1035       if (!matches(sequence.charAt(first))) {
1036         break;
1037       }
1038     }
1039 
1040     return sequence.subSequence(first, len).toString();
1041   }
1042 
1043   /**
1044    * Returns a substring of the input character sequence that omits all characters this matcher
1045    * matches from the end of the string. For example: <pre> {@code
1046    *
1047    *   CharMatcher.anyOf("ab").trimTrailingFrom("abacatbab")}</pre>
1048    *
1049    * ... returns {@code "abacat"}.
1050    */
1051   @CheckReturnValue
1052   public String trimTrailingFrom(CharSequence sequence) {
1053     int len = sequence.length();
1054     int last;
1055 
1056     for (last = len - 1; last >= 0; last--) {
1057       if (!matches(sequence.charAt(last))) {
1058         break;
1059       }
1060     }
1061 
1062     return sequence.subSequence(0, last + 1).toString();
1063   }
1064 
1065   /**
1066    * Returns a string copy of the input character sequence, with each group of consecutive
1067    * characters that match this matcher replaced by a single replacement character. For example:
1068    * <pre>   {@code
1069    *
1070    *   CharMatcher.anyOf("eko").collapseFrom("bookkeeper", '-')}</pre>
1071    *
1072    * ... returns {@code "b-p-r"}.
1073    *
1074    * <p>The default implementation uses {@link #indexIn(CharSequence)} to find the first matching
1075    * character, then iterates the remainder of the sequence calling {@link #matches(char)} for each
1076    * character.
1077    *
1078    * @param sequence the character sequence to replace matching groups of characters in
1079    * @param replacement the character to append to the result string in place of each group of
1080    *        matching characters in {@code sequence}
1081    * @return the new string
1082    */
1083   @CheckReturnValue
1084   public String collapseFrom(CharSequence sequence, char replacement) {
1085     int first = indexIn(sequence);
1086     if (first == -1) {
1087       return sequence.toString();
1088     }
1089 
1090     // TODO(kevinb): see if this implementation can be made faster
1091     StringBuilder builder = new StringBuilder(sequence.length())
1092         .append(sequence.subSequence(0, first))
1093         .append(replacement);
1094     boolean in = true;
1095     for (int i = first + 1; i < sequence.length(); i++) {
1096       char c = sequence.charAt(i);
1097       if (apply(c)) {
1098         if (!in) {
1099           builder.append(replacement);
1100           in = true;
1101         }
1102       } else {
1103         builder.append(c);
1104         in = false;
1105       }
1106     }
1107     return builder.toString();
1108   }
1109 
1110   /**
1111    * Collapses groups of matching characters exactly as {@link #collapseFrom} does, except that
1112    * groups of matching characters at the start or end of the sequence are removed without
1113    * replacement.
1114    */
1115   @CheckReturnValue
1116   public String trimAndCollapseFrom(CharSequence sequence, char replacement) {
1117     int first = negate().indexIn(sequence);
1118     if (first == -1) {
1119       return ""; // everything matches. nothing's left.
1120     }
1121     StringBuilder builder = new StringBuilder(sequence.length());
1122     boolean inMatchingGroup = false;
1123     for (int i = first; i < sequence.length(); i++) {
1124       char c = sequence.charAt(i);
1125       if (apply(c)) {
1126         inMatchingGroup = true;
1127       } else {
1128         if (inMatchingGroup) {
1129           builder.append(replacement);
1130           inMatchingGroup = false;
1131         }
1132         builder.append(c);
1133       }
1134     }
1135     return builder.toString();
1136   }
1137 
1138   // Predicate interface
1139 
1140   /**
1141    * Returns {@code true} if this matcher matches the given character.
1142    *
1143    * @throws NullPointerException if {@code character} is null
1144    */
1145   @Override public boolean apply(Character character) {
1146     return matches(character);
1147   }
1148 }
1149