• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2008 Google Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.google.android.mail.common.base;
18 
19 import static com.google.android.mail.common.base.Preconditions.checkArgument;
20 import static com.google.android.mail.common.base.Preconditions.checkNotNull;
21 
22 import java.util.ArrayList;
23 import java.util.Arrays;
24 import java.util.List;
25 
26 /**
27  * Determines a true or false value for any Java {@code char} value, just as
28  * {@link Predicate} does for any {@link Object}. Also offers basic text
29  * processing methods based on this function. Implementations are strongly
30  * encouraged to be side-effect-free and immutable.
31  *
32  * <p>Throughout the documentation of this class, the phrase "matching
33  * character" is used to mean "any character {@code c} for which {@code
34  * this.matches(c)} returns {@code true}".
35  *
36  * <p><b>Note:</b> This class deals only with {@code char} values; it does not
37  * understand supplementary Unicode code points in the range {@code 0x10000} to
38  * {@code 0x10FFFF}. Such logical characters are encoded into a {@code String}
39  * using surrogate pairs, and a {@code CharMatcher} treats these just as two
40  * separate characters.
41  *
42  * @author Kevin Bourrillion
43  * @since 2009.09.15 <b>tentative</b>
44  */
45 public abstract class CharMatcher implements Predicate<Character> {
46 
47   // Constants
48 
49   // Excludes 2000-2000a, which is handled as a range
50   private static final String BREAKING_WHITESPACE_CHARS =
51       "\t\n\013\f\r \u0085\u1680\u2028\u2029\u205f\u3000";
52 
53   // Excludes 2007, which is handled as a gap in a pair of ranges
54   private static final String NON_BREAKING_WHITESPACE_CHARS =
55       "\u00a0\u180e\u202f";
56 
57   /**
58    * Determines whether a character is whitespace according to the latest
59    * Unicode standard, as illustrated
60    * <a href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bwhitespace%7D">here</a>.
61    * This is not the same definition used by other Java APIs. See a comparison
62    * of several definitions of "whitespace" at
63    * <a href="TODO">(TODO)</a>.
64    *
65    * <p><b>Note:</b> as the Unicode definition evolves, we will modify this
66    * constant to keep it up to date.
67    */
68   public static final CharMatcher WHITESPACE =
69       anyOf(BREAKING_WHITESPACE_CHARS + NON_BREAKING_WHITESPACE_CHARS)
70           .or(inRange('\u2000', '\u200a'));
71 
72   /**
73    * Determines whether a character is a breaking whitespace (that is,
74    * a whitespace which can be interpreted as a break between words
75    * for formatting purposes).  See {@link #WHITESPACE} for a discussion
76    * of that term.
77    *
78    * @since 2010.01.04 <b>tentative</b>
79    */
80   public static final CharMatcher BREAKING_WHITESPACE =
81       anyOf(BREAKING_WHITESPACE_CHARS)
82           .or(inRange('\u2000', '\u2006'))
83           .or(inRange('\u2008', '\u200a'));
84 
85   /**
86    * Determines whether a character is ASCII, meaning that its code point is
87    * less than 128.
88    */
89   public static final CharMatcher ASCII = inRange('\0', '\u007f');
90 
91   /**
92    * Determines whether a character is a digit according to
93    * <a href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bdigit%7D">Unicode</a>.
94    */
95   public static final CharMatcher DIGIT;
96 
97   static {
98     CharMatcher digit = inRange('0', '9');
99     String zeroes =
100         "\u0660\u06f0\u07c0\u0966\u09e6\u0a66\u0ae6\u0b66\u0be6\u0c66"
101             + "\u0ce6\u0d66\u0e50\u0ed0\u0f20\u1040\u1090\u17e0\u1810\u1946"
102             + "\u19d0\u1b50\u1bb0\u1c40\u1c50\ua620\ua8d0\ua900\uaa50\uff10";
103     for (char base : zeroes.toCharArray()) {
104       digit = digit.or(inRange(base, (char) (base + 9)));
105     }
106     DIGIT = digit;
107   }
108 
109   /**
110    * Determines whether a character is whitespace according to {@link
111    * Character#isWhitespace(char) Java's definition}; it is usually preferable
112    * to use {@link #WHITESPACE}. See a comparison of several definitions of
113    * "whitespace" at <a href="http://go/white+space">go/white+space</a>.
114    */
115   public static final CharMatcher JAVA_WHITESPACE
116       = inRange('\u0009', (char) 13)  // \\u000d doesn't work as a char literal
117       .or(inRange('\u001c', '\u0020'))
118       .or(is('\u1680'))
119       .or(is('\u180e'))
120       .or(inRange('\u2000', '\u2006'))
121       .or(inRange('\u2008', '\u200b'))
122       .or(inRange('\u2028', '\u2029'))
123       .or(is('\u205f'))
124       .or(is('\u3000'));
125 
126   /**
127    * Determines whether a character is a digit according to {@link
128    * Character#isDigit(char) Java's definition}. If you only care to match
129    * ASCII digits, you can use {@code inRange('0', '9')}.
130    */
131   public static final CharMatcher JAVA_DIGIT = new CharMatcher() {
132     @Override public boolean matches(char c) {
133       return Character.isDigit(c);
134     }
135   };
136 
137   /**
138    * Determines whether a character is a letter according to {@link
139    * Character#isLetter(char) Java's definition}. If you only care to match
140    * letters of the Latin alphabet, you can use {@code
141    * inRange('a', 'z').or(inRange('A', 'Z'))}.
142    */
143   public static final CharMatcher JAVA_LETTER = new CharMatcher() {
144     @Override public boolean matches(char c) {
145       return Character.isLetter(c);
146     }
147   };
148 
149   /**
150    * Determines whether a character is a letter or digit according to {@link
151    * Character#isLetterOrDigit(char) Java's definition}.
152    */
153   public static final CharMatcher JAVA_LETTER_OR_DIGIT = new CharMatcher() {
154     @Override public boolean matches(char c) {
155       return Character.isLetterOrDigit(c);
156     }
157   };
158 
159   /**
160    * Determines whether a character is upper case according to {@link
161    * Character#isUpperCase(char) Java's definition}.
162    */
163   public static final CharMatcher JAVA_UPPER_CASE = new CharMatcher() {
164     @Override public boolean matches(char c) {
165       return Character.isUpperCase(c);
166     }
167   };
168 
169   /**
170    * Determines whether a character is lower case according to {@link
171    * Character#isLowerCase(char) Java's definition}.
172    */
173   public static final CharMatcher JAVA_LOWER_CASE = new CharMatcher() {
174     @Override public boolean matches(char c) {
175       return Character.isLowerCase(c);
176     }
177   };
178 
179   /**
180    * Determines whether a character is an ISO control character according to
181    * {@link Character#isISOControl(char)}.
182    */
183   public static final CharMatcher JAVA_ISO_CONTROL = inRange('\u0000', '\u001f')
184       .or(inRange('\u007f', '\u009f'));
185 
186   /**
187    * Determines whether a character is invisible; that is, if its Unicode
188    * category is any of SPACE_SEPARATOR, LINE_SEPARATOR,
189    * PARAGRAPH_SEPARATOR, CONTROL, FORMAT, SURROGATE, and PRIVATE_USE according
190    * to ICU4J.
191    */
192   public static final CharMatcher INVISIBLE = inRange('\u0000', '\u0020')
193       .or(inRange('\u007f', '\u00a0'))
194       .or(is('\u00ad'))
195       .or(inRange('\u0600', '\u0603'))
196       .or(anyOf("\u06dd\u070f\u1680\u17b4\u17b5\u180e"))
197       .or(inRange('\u2000', '\u200f'))
198       .or(inRange('\u2028', '\u202f'))
199       .or(inRange('\u205f', '\u2064'))
200       .or(inRange('\u206a', '\u206f'))
201       .or(is('\u3000'))
202       .or(inRange('\ud800', '\uf8ff'))
203       .or(anyOf("\ufeff\ufff9\ufffa\ufffb"));
204 
205   /**
206    * Determines whether a character is single-width (not double-width).  When
207    * in doubt, this matcher errs on the side of returning {@code false} (that
208    * is, it tends to assume a character is double-width).
209    *
210    * <b>Note:</b> as the reference file evolves, we will modify this constant
211    * to keep it up to date.
212    */
213   public static final CharMatcher SINGLE_WIDTH = inRange('\u0000', '\u04f9')
214       .or(is('\u05be'))
215       .or(inRange('\u05d0', '\u05ea'))
216       .or(is('\u05f3'))
217       .or(is('\u05f4'))
218       .or(inRange('\u0600', '\u06ff'))
219       .or(inRange('\u0750', '\u077f'))
220       .or(inRange('\u0e00', '\u0e7f'))
221       .or(inRange('\u1e00', '\u20af'))
222       .or(inRange('\u2100', '\u213a'))
223       .or(inRange('\ufb50', '\ufdff'))
224       .or(inRange('\ufe70', '\ufeff'))
225       .or(inRange('\uff61', '\uffdc'));
226 
227   /**
228    * Determines whether a character is whitespace according to an arbitrary definition used by
229    * {@link StringUtil} for years. Most likely you don't want to use this. See a comparison of
230    * several definitions of "whitespace" at <a href="http://goto/white space">goto/white space</a>.
231    *
232    * <p><b>To be deprecated.</b> use {@link #WHITESPACE} to switch to the Unicode definition, or
233    * create a matcher for the specific characters you want. Not deprecated yet because it is a
234    * stepping stone for getting off of many deprecated {@link StringUtil} methods.
235    */
236   @Deprecated
237   public static final CharMatcher LEGACY_WHITESPACE =
238       anyOf(" \r\n\t\u3000\u00A0\u2007\u202F").precomputed();
239 
240 
241   /** Matches any character. */
242   public static final CharMatcher ANY = new CharMatcher() {
243     @Override public boolean matches(char c) {
244       return true;
245     }
246 
247     @Override public int indexIn(CharSequence sequence) {
248       return (sequence.length() == 0) ? -1 : 0;
249     }
250     @Override public int indexIn(CharSequence sequence, int start) {
251       int length = sequence.length();
252       Preconditions.checkPositionIndex(start, length);
253       return (start == length) ? -1 : start;
254     }
255     @Override public int lastIndexIn(CharSequence sequence) {
256       return sequence.length() - 1;
257     }
258     @Override public boolean matchesAllOf(CharSequence sequence) {
259       checkNotNull(sequence);
260       return true;
261     }
262     @Override public boolean matchesNoneOf(CharSequence sequence) {
263       return sequence.length() == 0;
264     }
265     @Override public String removeFrom(CharSequence sequence) {
266       checkNotNull(sequence);
267       return "";
268     }
269     @Override public String replaceFrom(
270         CharSequence sequence, char replacement) {
271       char[] array = new char[sequence.length()];
272       Arrays.fill(array, replacement);
273       return new String(array);
274     }
275     @Override public String replaceFrom(
276         CharSequence sequence, CharSequence replacement) {
277       StringBuilder retval = new StringBuilder(sequence.length() * replacement.length());
278       for (int i = 0; i < sequence.length(); i++) {
279         retval.append(replacement);
280       }
281       return retval.toString();
282     }
283     @Override public String collapseFrom(CharSequence sequence, char replacement) {
284       return (sequence.length() == 0) ? "" : String.valueOf(replacement);
285     }
286     @Override public String trimFrom(CharSequence sequence) {
287       checkNotNull(sequence);
288       return "";
289     }
290     @Override public int countIn(CharSequence sequence) {
291       return sequence.length();
292     }
293     @Override public CharMatcher and(CharMatcher other) {
294       return checkNotNull(other);
295     }
296     @Override public CharMatcher or(CharMatcher other) {
297       checkNotNull(other);
298       return this;
299     }
300     @Override public CharMatcher negate() {
301       return NONE;
302     }
303     @Override public CharMatcher precomputed() {
304       return this;
305     }
306   };
307 
308   /** Matches no characters. */
309   public static final CharMatcher NONE = new CharMatcher() {
310     @Override public boolean matches(char c) {
311       return false;
312     }
313 
314     @Override public int indexIn(CharSequence sequence) {
315       checkNotNull(sequence);
316       return -1;
317     }
318     @Override public int indexIn(CharSequence sequence, int start) {
319       int length = sequence.length();
320       Preconditions.checkPositionIndex(start, length);
321       return -1;
322     }
323     @Override public int lastIndexIn(CharSequence sequence) {
324       checkNotNull(sequence);
325       return -1;
326     }
327     @Override public boolean matchesAllOf(CharSequence sequence) {
328       return sequence.length() == 0;
329     }
330     @Override public boolean matchesNoneOf(CharSequence sequence) {
331       checkNotNull(sequence);
332       return true;
333     }
334     @Override public String removeFrom(CharSequence sequence) {
335       return sequence.toString();
336     }
337     @Override public String replaceFrom(
338         CharSequence sequence, char replacement) {
339       return sequence.toString();
340     }
341     @Override public String replaceFrom(
342         CharSequence sequence, CharSequence replacement) {
343       checkNotNull(replacement);
344       return sequence.toString();
345     }
346     @Override public String collapseFrom(
347         CharSequence sequence, char replacement) {
348       return sequence.toString();
349     }
350     @Override public String trimFrom(CharSequence sequence) {
351       return sequence.toString();
352     }
353     @Override public int countIn(CharSequence sequence) {
354       checkNotNull(sequence);
355       return 0;
356     }
357     @Override public CharMatcher and(CharMatcher other) {
358       checkNotNull(other);
359       return this;
360     }
361     @Override public CharMatcher or(CharMatcher other) {
362       return checkNotNull(other);
363     }
364     @Override public CharMatcher negate() {
365       return ANY;
366     }
367     @Override protected void setBits(LookupTable table) {
368     }
369     @Override public CharMatcher precomputed() {
370       return this;
371     }
372   };
373 
374   // Static factories
375 
376   /**
377    * Returns a {@code char} matcher that matches only one specified character.
378    */
is(final char match)379   public static CharMatcher is(final char match) {
380     return new CharMatcher() {
381       @Override public boolean matches(char c) {
382         return c == match;
383       }
384 
385       @Override public String replaceFrom(
386           CharSequence sequence, char replacement) {
387         return sequence.toString().replace(match, replacement);
388       }
389       @Override public CharMatcher and(CharMatcher other) {
390         return other.matches(match) ? this : NONE;
391       }
392       @Override public CharMatcher or(CharMatcher other) {
393         return other.matches(match) ? other : super.or(other);
394       }
395       @Override public CharMatcher negate() {
396         return isNot(match);
397       }
398       @Override protected void setBits(LookupTable table) {
399         table.set(match);
400       }
401       @Override public CharMatcher precomputed() {
402         return this;
403       }
404     };
405   }
406 
407   /**
408    * Returns a {@code char} matcher that matches any character except the one
409    * specified.
410    *
411    * <p>To negate another {@code CharMatcher}, use {@link #negate()}.
412    */
413   public static CharMatcher isNot(final char match) {
414     return new CharMatcher() {
415       @Override public boolean matches(char c) {
416         return c != match;
417       }
418 
419       @Override public CharMatcher and(CharMatcher other) {
420         return other.matches(match) ? super.and(other) : other;
421       }
422       @Override public CharMatcher or(CharMatcher other) {
423         return other.matches(match) ? ANY : this;
424       }
425       @Override public CharMatcher negate() {
426         return is(match);
427       }
428     };
429   }
430 
431   /**
432    * Returns a {@code char} matcher that matches any character present in the
433    * given character sequence.
434    */
435   public static CharMatcher anyOf(final CharSequence sequence) {
436     switch (sequence.length()) {
437       case 0:
438         return NONE;
439       case 1:
440         return is(sequence.charAt(0));
441       case 2:
442         final char match1 = sequence.charAt(0);
443         final char match2 = sequence.charAt(1);
444         return new CharMatcher() {
445           @Override public boolean matches(char c) {
446             return c == match1 || c == match2;
447           }
448           @Override protected void setBits(LookupTable table) {
449             table.set(match1);
450             table.set(match2);
451           }
452           @Override public CharMatcher precomputed() {
453             return this;
454           }
455         };
456     }
457 
458     final char[] chars = sequence.toString().toCharArray();
459     Arrays.sort(chars); // not worth collapsing duplicates
460 
461     return new CharMatcher() {
462       @Override public boolean matches(char c) {
463         return Arrays.binarySearch(chars, c) >= 0;
464       }
465       @Override protected void setBits(LookupTable table) {
466         for (char c : chars) {
467           table.set(c);
468         }
469       }
470     };
471   }
472 
473   /**
474    * Returns a {@code char} matcher that matches any character not present in
475    * the given character sequence.
476    */
477   public static CharMatcher noneOf(CharSequence sequence) {
478     return anyOf(sequence).negate();
479   }
480 
481   /**
482    * Returns a {@code char} matcher that matches any character in a given range
483    * (both endpoints are inclusive). For example, to match any lowercase letter
484    * of the English alphabet, use {@code CharMatcher.inRange('a', 'z')}.
485    *
486    * @throws IllegalArgumentException if {@code endInclusive < startInclusive}
487    */
488   public static CharMatcher inRange(
489       final char startInclusive, final char endInclusive) {
490     checkArgument(endInclusive >= startInclusive);
491     return new CharMatcher() {
492       @Override public boolean matches(char c) {
493         return startInclusive <= c && c <= endInclusive;
494       }
495       @Override protected void setBits(LookupTable table) {
496         char c = startInclusive;
497         while (true) {
498           table.set(c);
499           if (c++ == endInclusive) {
500             break;
501           }
502         }
503       }
504       @Override public CharMatcher precomputed() {
505         return this;
506       }
507     };
508   }
509 
510   /**
511    * Returns a matcher with identical behavior to the given {@link
512    * Character}-based predicate, but which operates on primitive {@code char}
513    * instances instead.
514    */
515   public static CharMatcher forPredicate(
516       final Predicate<? super Character> predicate) {
517     checkNotNull(predicate);
518     if (predicate instanceof CharMatcher) {
519       return (CharMatcher) predicate;
520     }
521     return new CharMatcher() {
522       @Override public boolean matches(char c) {
523         return predicate.apply(c);
524       }
525       @Override public boolean apply(Character character) {
526         return predicate.apply(checkNotNull(character));
527       }
528     };
529   }
530 
531   // Abstract methods
532 
533   /** Determines a true or false value for the given character. */
534   public abstract boolean matches(char c);
535 
536   // Non-static factories
537 
538   /**
539    * Returns a matcher that matches any character not matched by this matcher.
540    */
541   public CharMatcher negate() {
542     final CharMatcher original = this;
543     return new CharMatcher() {
544       @Override public boolean matches(char c) {
545         return !original.matches(c);
546       }
547 
548       @Override public boolean matchesAllOf(CharSequence sequence) {
549         return original.matchesNoneOf(sequence);
550       }
551       @Override public boolean matchesNoneOf(CharSequence sequence) {
552         return original.matchesAllOf(sequence);
553       }
554       @Override public int countIn(CharSequence sequence) {
555         return sequence.length() - original.countIn(sequence);
556       }
557       @Override public CharMatcher negate() {
558         return original;
559       }
560     };
561   }
562 
563   /**
564    * Returns a matcher that matches any character matched by both this matcher
565    * and {@code other}.
566    */
567   public CharMatcher and(CharMatcher other) {
568     return new And(Arrays.asList(this, checkNotNull(other)));
569   }
570 
571   private static class And extends CharMatcher {
572     List<CharMatcher> components;
573 
574     And(List<CharMatcher> components) {
575       this.components = components; // Skip defensive copy (private)
576     }
577 
578     @Override public boolean matches(char c) {
579       for (CharMatcher matcher : components) {
580         if (!matcher.matches(c)) {
581           return false;
582         }
583       }
584       return true;
585     }
586 
587     @Override public CharMatcher and(CharMatcher other) {
588       List<CharMatcher> newComponents = new ArrayList<CharMatcher>(components);
589       newComponents.add(checkNotNull(other));
590       return new And(newComponents);
591     }
592   }
593 
594   /**
595    * Returns a matcher that matches any character matched by either this matcher
596    * or {@code other}.
597    */
598   public CharMatcher or(CharMatcher other) {
599     return new Or(Arrays.asList(this, checkNotNull(other)));
600   }
601 
602   private static class Or extends CharMatcher {
603     List<CharMatcher> components;
604 
605     Or(List<CharMatcher> components) {
606       this.components = components; // Skip defensive copy (private)
607     }
608 
609     @Override public boolean matches(char c) {
610       for (CharMatcher matcher : components) {
611         if (matcher.matches(c)) {
612           return true;
613         }
614       }
615       return false;
616     }
617 
618     @Override public CharMatcher or(CharMatcher other) {
619       List<CharMatcher> newComponents = new ArrayList<CharMatcher>(components);
620       newComponents.add(checkNotNull(other));
621       return new Or(newComponents);
622     }
623 
624     @Override protected void setBits(LookupTable table) {
625       for (CharMatcher matcher : components) {
626         matcher.setBits(table);
627       }
628     }
629   }
630 
631   /**
632    * Returns a {@code char} matcher functionally equivalent to this one, but
633    * which may be faster to query than the original; your mileage may vary.
634    * Precomputation takes time and is likely to be worthwhile only if the
635    * precomputed matcher is queried many thousands of times.
636    *
637    * <p>This method has no effect (returns {@code this}) when called in GWT:
638    * it's unclear whether a precomputed matcher is faster, but it certainly
639    * consumes more memory, which doesn't seem like a worthwhile tradeoff in a
640    * browser.
641    */
642   public CharMatcher precomputed() {
643     return Platform.precomputeCharMatcher(this);
644   }
645 
646   /**
647    * This is the actual implementation of {@link #precomputed}, but we bounce
648    * calls through a method on {@link Platform} so that we can have different
649    * behavior in GWT.
650    *
651    * <p>The default precomputation is to cache the configuration of the original
652    * matcher in an eight-kilobyte bit array. In some situations this produces a
653    * matcher which is faster to query than the original.
654    *
655    * <p>The default implementation creates a new bit array and passes it to
656    * {@link #setBits(LookupTable)}.
657    */
658   CharMatcher precomputedInternal() {
659     final LookupTable table = new LookupTable();
660     setBits(table);
661 
662     return new CharMatcher() {
663       @Override public boolean matches(char c) {
664         return table.get(c);
665       }
666 
667       // TODO: make methods like negate() smart
668 
669       @Override public CharMatcher precomputed() {
670         return this;
671       }
672     };
673   }
674 
675   /**
676    * For use by implementors; sets the bit corresponding to each character ('\0'
677    * to '{@literal \}uFFFF') that matches this matcher in the given bit array,
678    * leaving all other bits untouched.
679    *
680    * <p>The default implementation loops over every possible character value,
681    * invoking {@link #matches} for each one.
682    */
683   protected void setBits(LookupTable table) {
684     char c = Character.MIN_VALUE;
685     while (true) {
686       if (matches(c)) {
687         table.set(c);
688       }
689       if (c++ == Character.MAX_VALUE) {
690         break;
691       }
692     }
693   }
694 
695   /**
696    * A bit array with one bit per {@code char} value, used by {@link
697    * CharMatcher#precomputed}.
698    *
699    * <p>TODO: possibly share a common BitArray class with BloomFilter
700    * and others... a simpler java.util.BitSet.
701    */
702   protected static class LookupTable {
703     int[] data = new int[2048];
704 
705     void set(char index) {
706       data[index >> 5] |= (1 << index);
707     }
708     boolean get(char index) {
709       return (data[index >> 5] & (1 << index)) != 0;
710     }
711   }
712 
713   // Text processing routines
714 
715   /**
716    * Returns {@code true} if a character sequence contains only matching
717    * characters.
718    *
719    * <p>The default implementation iterates over the sequence, invoking {@link
720    * #matches} for each character, until this returns {@code false} or the end
721    * is reached.
722    *
723    * @param sequence the character sequence to examine, possibly empty
724    * @return {@code true} if this matcher matches every character in the
725    *     sequence, including when the sequence is empty
726    */
727   public boolean matchesAllOf(CharSequence sequence) {
728     for (int i = sequence.length() - 1; i >= 0; i--) {
729       if (!matches(sequence.charAt(i))) {
730         return false;
731       }
732     }
733     return true;
734   }
735 
736   /**
737    * Returns {@code true} if a character sequence contains no matching
738    * characters.
739    *
740    * <p>The default implementation iterates over the sequence, invoking {@link
741    * #matches} for each character, until this returns {@code false} or the end is
742    * reached.
743    *
744    * @param sequence the character sequence to examine, possibly empty
745    * @return {@code true} if this matcher matches every character in the
746    *     sequence, including when the sequence is empty
747    */
748   public boolean matchesNoneOf(CharSequence sequence) {
749     return indexIn(sequence) == -1;
750   }
751 
752   // TODO: perhaps add matchesAnyOf()
753 
754   /**
755    * Returns the index of the first matching character in a character sequence,
756    * or {@code -1} if no matching character is present.
757    *
758    * <p>The default implementation iterates over the sequence in forward order
759    * calling {@link #matches} for each character.
760    *
761    * @param sequence the character sequence to examine from the beginning
762    * @return an index, or {@code -1} if no character matches
763    */
764   public int indexIn(CharSequence sequence) {
765     int length = sequence.length();
766     for (int i = 0; i < length; i++) {
767       if (matches(sequence.charAt(i))) {
768         return i;
769       }
770     }
771     return -1;
772   }
773 
774   /**
775    * Returns the index of the first matching character in a character sequence,
776    * starting from a given position, or {@code -1} if no character matches after
777    * that position.
778    *
779    * <p>The default implementation iterates over the sequence in forward order,
780    * beginning at {@code start}, calling {@link #matches} for each character.
781    *
782    * @param sequence the character sequence to examine
783    * @param start the first index to examine; must be nonnegative and no
784    *     greater than {@code sequence.length()}
785    * @return the index of the first matching character, guaranteed to be no less
786    *     than {@code start}, or {@code -1} if no character matches
787    * @throws IndexOutOfBoundsException if start is negative or greater than
788    *     {@code sequence.length()}
789    */
790   public int indexIn(CharSequence sequence, int start) {
791     int length = sequence.length();
792     Preconditions.checkPositionIndex(start, length);
793     for (int i = start; i < length; i++) {
794       if (matches(sequence.charAt(i))) {
795         return i;
796       }
797     }
798     return -1;
799   }
800 
801   /**
802    * Returns the index of the last matching character in a character sequence,
803    * or {@code -1} if no matching character is present.
804    *
805    * <p>The default implementation iterates over the sequence in reverse order
806    * calling {@link #matches} for each character.
807    *
808    * @param sequence the character sequence to examine from the end
809    * @return an index, or {@code -1} if no character matches
810    */
811   public int lastIndexIn(CharSequence sequence) {
812     for (int i = sequence.length() - 1; i >= 0; i--) {
813       if (matches(sequence.charAt(i))) {
814         return i;
815       }
816     }
817     return -1;
818   }
819 
820   /**
821    * Returns the number of matching characters found in a character sequence.
822    */
823   public int countIn(CharSequence sequence) {
824     int count = 0;
825     for (int i = 0; i < sequence.length(); i++) {
826       if (matches(sequence.charAt(i))) {
827         count++;
828       }
829     }
830     return count;
831   }
832 
833   /**
834    * Returns a string containing all non-matching characters of a character
835    * sequence, in order. For example: <pre>   {@code
836    *
837    *   CharMatcher.is('a').removeFrom("bazaar")}</pre>
838    *
839    * ... returns {@code "bzr"}.
840    */
841   public String removeFrom(CharSequence sequence) {
842     String string = sequence.toString();
843     int pos = indexIn(string);
844     if (pos == -1) {
845       return string;
846     }
847 
848     char[] chars = string.toCharArray();
849     int spread = 1;
850 
851     // This unusual loop comes from extensive benchmarking
852     OUT:
853     while (true) {
854       pos++;
855       while (true) {
856         if (pos == chars.length) {
857           break OUT;
858         }
859         if (matches(chars[pos])) {
860           break;
861         }
862         chars[pos - spread] = chars[pos];
863         pos++;
864       }
865       spread++;
866     }
867     return new String(chars, 0, pos - spread);
868   }
869 
870   /**
871    * Returns a string containing all matching characters of a character
872    * sequence, in order. For example: <pre>   {@code
873    *
874    *   CharMatcher.is('a').retainFrom("bazaar")}</pre>
875    *
876    * ... returns {@code "aaa"}.
877    */
878   public String retainFrom(CharSequence sequence) {
879     return negate().removeFrom(sequence);
880   }
881 
882   /**
883    * Returns a string copy of the input character sequence, with each character
884    * that matches this matcher replaced by a given replacement character. For
885    * example: <pre>   {@code
886    *
887    *   CharMatcher.is('a').replaceFrom("radar", 'o')}</pre>
888    *
889    * ... returns {@code "rodor"}.
890    *
891    * <p>The default implementation uses {@link #indexIn(CharSequence)} to find
892    * the first matching character, then iterates the remainder of the sequence
893    * calling {@link #matches(char)} for each character.
894    *
895    * @param sequence the character sequence to replace matching characters in
896    * @param replacement the character to append to the result string in place of
897    *     each matching character in {@code sequence}
898    * @return the new string
899    */
900   public String replaceFrom(CharSequence sequence, char replacement) {
901     String string = sequence.toString();
902     int pos = indexIn(string);
903     if (pos == -1) {
904       return string;
905     }
906     char[] chars = string.toCharArray();
907     chars[pos] = replacement;
908     for (int i = pos + 1; i < chars.length; i++) {
909       if (matches(chars[i])) {
910         chars[i] = replacement;
911       }
912     }
913     return new String(chars);
914   }
915 
916   /**
917    * Returns a string copy of the input character sequence, with each character
918    * that matches this matcher replaced by a given replacement sequence. For
919    * example: <pre>   {@code
920    *
921    *   CharMatcher.is('a').replaceFrom("yaha", "oo")}</pre>
922    *
923    * ... returns {@code "yoohoo"}.
924    *
925    * <p><b>Note:</b> If the replacement is a fixed string with only one character,
926    * you are better off calling {@link #replaceFrom(CharSequence, char)} directly.
927    *
928    * @param sequence the character sequence to replace matching characters in
929    * @param replacement the characters to append to the result string in place
930    *     of each matching character in {@code sequence}
931    * @return the new string
932    */
933   public String replaceFrom(CharSequence sequence, CharSequence replacement) {
934     int replacementLen = replacement.length();
935     if (replacementLen == 0) {
936       return removeFrom(sequence);
937     }
938     if (replacementLen == 1) {
939       return replaceFrom(sequence, replacement.charAt(0));
940     }
941 
942     String string = sequence.toString();
943     int pos = indexIn(string);
944     if (pos == -1) {
945       return string;
946     }
947 
948     int len = string.length();
949     StringBuilder buf = new StringBuilder((int) (len * 1.5) + 16);
950 
951     int oldpos = 0;
952     do {
953       buf.append(string, oldpos, pos);
954       buf.append(replacement);
955       oldpos = pos + 1;
956       pos = indexIn(string, oldpos);
957     } while (pos != -1);
958 
959     buf.append(string, oldpos, len);
960     return buf.toString();
961   }
962 
963   /**
964    * Returns a substring of the input character sequence that omits all
965    * characters this matcher matches from the beginning and from the end of the
966    * string. For example: <pre> {@code
967    *
968    *   CharMatcher.anyOf("ab").trimFrom("abacatbab")}</pre>
969    *
970    * ... returns {@code "cat"}.
971    *
972    * <p>Note that<pre>   {@code
973    *
974    *   CharMatcher.inRange('\0', ' ').trimFrom(str)}</pre>
975    *
976    * ... is equivalent to {@link String#trim()}.
977    */
978   public String trimFrom(CharSequence sequence) {
979     int len = sequence.length();
980     int first;
981     int last;
982 
983     for (first = 0; first < len; first++) {
984       if (!matches(sequence.charAt(first))) {
985         break;
986       }
987     }
988     for (last = len - 1; last > first; last--) {
989       if (!matches(sequence.charAt(last))) {
990         break;
991       }
992     }
993 
994     return sequence.subSequence(first, last + 1).toString();
995   }
996 
997   /**
998    * Returns a substring of the input character sequence that omits all
999    * characters this matcher matches from the beginning of the
1000    * string. For example: <pre> {@code
1001    *
1002    *   CharMatcher.anyOf("ab").trimLeadingFrom("abacatbab")}</pre>
1003    *
1004    * ... returns {@code "catbab"}.
1005    */
1006   public String trimLeadingFrom(CharSequence sequence) {
1007     int len = sequence.length();
1008     int first;
1009 
1010     for (first = 0; first < len; first++) {
1011       if (!matches(sequence.charAt(first))) {
1012         break;
1013       }
1014     }
1015 
1016     return sequence.subSequence(first, len).toString();
1017   }
1018 
1019   /**
1020    * Returns a substring of the input character sequence that omits all
1021    * characters this matcher matches from the end of the
1022    * string. For example: <pre> {@code
1023    *
1024    *   CharMatcher.anyOf("ab").trimTrailingFrom("abacatbab")}</pre>
1025    *
1026    * ... returns {@code "abacat"}.
1027    */
1028   public String trimTrailingFrom(CharSequence sequence) {
1029     int len = sequence.length();
1030     int last;
1031 
1032     for (last = len - 1; last >= 0; last--) {
1033       if (!matches(sequence.charAt(last))) {
1034         break;
1035       }
1036     }
1037 
1038     return sequence.subSequence(0, last + 1).toString();
1039   }
1040 
1041   /**
1042    * Returns a string copy of the input character sequence, with each group of
1043    * consecutive characters that match this matcher replaced by a single
1044    * replacement character. For example: <pre>   {@code
1045    *
1046    *   CharMatcher.anyOf("eko").collapseFrom("bookkeeper", '-')}</pre>
1047    *
1048    * ... returns {@code "b-p-r"}.
1049    *
1050    * <p>The default implementation uses {@link #indexIn(CharSequence)} to find
1051    * the first matching character, then iterates the remainder of the sequence
1052    * calling {@link #matches(char)} for each character.
1053    *
1054    * @param sequence the character sequence to replace matching groups of
1055    *     characters in
1056    * @param replacement the character to append to the result string in place of
1057    *     each group of matching characters in {@code sequence}
1058    * @return the new string
1059    */
1060   public String collapseFrom(CharSequence sequence, char replacement) {
1061     int first = indexIn(sequence);
1062     if (first == -1) {
1063       return sequence.toString();
1064     }
1065 
1066     // TODO: this implementation can probably be made faster.
1067 
1068     StringBuilder builder = new StringBuilder(sequence.length())
1069         .append(sequence.subSequence(0, first))
1070         .append(replacement);
1071     boolean in = true;
1072     for (int i = first + 1; i < sequence.length(); i++) {
1073       char c = sequence.charAt(i);
1074       if (apply(c)) {
1075         if (!in) {
1076           builder.append(replacement);
1077           in = true;
1078         }
1079       } else {
1080         builder.append(c);
1081         in = false;
1082       }
1083     }
1084     return builder.toString();
1085   }
1086 
1087   /**
1088    * Collapses groups of matching characters exactly as {@link #collapseFrom}
1089    * does, except that groups of matching characters at the start or end of the
1090    * sequence are removed without replacement.
1091    */
1092   public String trimAndCollapseFrom(CharSequence sequence, char replacement) {
1093     int first = negate().indexIn(sequence);
1094     if (first == -1) {
1095       return ""; // everything matches. nothing's left.
1096     }
1097     StringBuilder builder = new StringBuilder(sequence.length());
1098     boolean inMatchingGroup = false;
1099     for (int i = first; i < sequence.length(); i++) {
1100       char c = sequence.charAt(i);
1101       if (apply(c)) {
1102         inMatchingGroup = true;
1103       } else {
1104         if (inMatchingGroup) {
1105           builder.append(replacement);
1106           inMatchingGroup = false;
1107         }
1108         builder.append(c);
1109       }
1110     }
1111     return builder.toString();
1112   }
1113 
1114   // Predicate interface
1115 
1116   /**
1117    * Returns {@code true} if this matcher matches the given character.
1118    *
1119    * @throws NullPointerException if {@code character} is null
1120    */
1121   /*@Override*/ public boolean apply(Character character) {
1122     return matches(character);
1123   }
1124 }