• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2009 Google Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.google.android.mail.common.base;
18 
19 import static com.google.android.mail.common.base.Preconditions.checkArgument;
20 import static com.google.android.mail.common.base.Preconditions.checkNotNull;
21 import static com.google.android.mail.common.base.Preconditions.checkState;
22 
23 import com.google.common.base.Joiner;
24 
25 import java.util.Iterator;
26 import java.util.NoSuchElementException;
27 import java.util.StringTokenizer;
28 import java.util.regex.Matcher;
29 import java.util.regex.Pattern;
30 import java.util.regex.PatternSyntaxException;
31 
32 /**
33  * An object that divides strings (or other instances of {@code CharSequence})
34  * into substrings, by recognizing a <i>separator</i> (a.k.a. "delimiter")
35  * which can be expressed as a single character, literal string, regular
36  * expression, {@code CharMatcher}, or by using a fixed substring length. This
37  * class provides the complementary functionality to {@link Joiner}.
38  *
39  * <p>Here is the most basic example of {@code Splitter} usage: <pre>   {@code
40  *
41  *   Splitter.on(',').split("foo,bar")}</pre>
42  *
43  * This invocation returns an {@code Iterable<String>} containing {@code "foo"}
44  * and {@code "bar"}, in that order.
45  *
46  * <p>By default {@code Splitter}'s behavior is very simplistic: <pre>   {@code
47  *
48  *   Splitter.on(',').split("foo,,bar,  quux")}</pre>
49  *
50  * This returns an iterable containing {@code ["foo", "", "bar", "  quux"]}.
51  * Notice that the splitter does not assume that you want empty strings removed,
52  * or that you wish to trim whitespace. If you want features like these, simply
53  * ask for them: <pre> {@code
54  *
55  *   private static final Splitter MY_SPLITTER = Splitter.on(',')
56  *       .trimResults()
57  *       .omitEmptyStrings();}</pre>
58  *
59  * Now {@code MY_SPLITTER.split("foo, ,bar,  quux,")} returns an iterable
60  * containing just {@code ["foo", "bar", "quux"]}. Note that the order in which
61  * the configuration methods are called is never significant; for instance,
62  * trimming is always applied first before checking for an empty result,
63  * regardless of the order in which the {@link #trimResults()} and
64  * {@link #omitEmptyStrings()} methods were invoked.
65  *
66  * <p><b>Warning: splitter instances are always immutable</b>; a configuration
67  * method such as {@code omitEmptyStrings} has no effect on the instance it
68  * is invoked on! You must store and use the new splitter instance returned by
69  * the method. This makes splitters thread-safe, and safe to store as {@code
70  * static final} constants (as illustrated above). <pre>   {@code
71  *
72  *   // Bad! Do not do this!
73  *   Splitter splitter = Splitter.on('/');
74  *   splitter.trimResults(); // does nothing!
75  *   return splitter.split("wrong / wrong / wrong");}</pre>
76  *
77  * The separator recognized by the splitter does not have to be a single
78  * literal character as in the examples above. See the methods {@link
79  * #on(String)}, {@link #on(Pattern)} and {@link #on(CharMatcher)} for examples
80  * of other ways to specify separators.
81  *
82  * <p><b>Note:</b> this class does not mimic any of the quirky behaviors of
83  * similar JDK methods; for instance, it does not silently discard trailing
84  * separators, as does {@link String#split(String)}, nor does it have a default
85  * behavior of using five particular whitespace characters as separators, like
86  * {@link StringTokenizer}.
87  *
88  * @author Julien Silland
89  * @author Jesse Wilson
90  * @author Kevin Bourrillion
91  * @since 2009.09.15 <b>tentative</b>
92  */
93 public final class Splitter {
94   private final CharMatcher trimmer;
95   private final boolean omitEmptyStrings;
96   private final Strategy strategy;
97 
Splitter(Strategy strategy)98   private Splitter(Strategy strategy) {
99     this(strategy, false, CharMatcher.NONE);
100   }
101 
Splitter(Strategy strategy, boolean omitEmptyStrings, CharMatcher trimmer)102   private Splitter(Strategy strategy, boolean omitEmptyStrings,
103       CharMatcher trimmer) {
104     this.strategy = strategy;
105     this.omitEmptyStrings = omitEmptyStrings;
106     this.trimmer = trimmer;
107   }
108 
109   /**
110    * Returns a splitter that uses the given single-character separator. For
111    * example, {@code Splitter.on(',').split("foo,,bar")} returns an iterable
112    * containing {@code ["foo", "", "bar"]}.
113    *
114    * @param separator the character to recognize as a separator
115    * @return a splitter, with default settings, that recognizes that separator
116    */
on(char separator)117   public static Splitter on(char separator) {
118     return on(CharMatcher.is(separator));
119   }
120 
121   /**
122    * Returns a splitter that considers any single character matched by the
123    * given {@code CharMatcher} to be a separator. For example, {@code
124    * Splitter.on(CharMatcher.anyOf(";,")).split("foo,;bar,quux")} returns an
125    * iterable containing {@code ["foo", "", "bar", "quux"]}.
126    *
127    * @param separatorMatcher a {@link CharMatcher} that determines whether a
128    *     character is a separator
129    * @return a splitter, with default settings, that uses this matcher
130    */
on(final CharMatcher separatorMatcher)131   public static Splitter on(final CharMatcher separatorMatcher) {
132     checkNotNull(separatorMatcher);
133 
134     return new Splitter(new Strategy() {
135       /*@Override*/ public SplittingIterator iterator(
136           Splitter splitter, final CharSequence toSplit) {
137         return new SplittingIterator(splitter, toSplit) {
138           @Override int separatorStart(int start) {
139             return separatorMatcher.indexIn(toSplit, start);
140           }
141 
142           @Override int separatorEnd(int separatorPosition) {
143             return separatorPosition + 1;
144           }
145         };
146       }
147     });
148   }
149 
150   /**
151    * Returns a splitter that uses the given fixed string as a separator. For
152    * example, {@code Splitter.on(", ").split("foo, bar, baz,qux")} returns an
153    * iterable containing {@code ["foo", "bar", "baz,qux"]}.
154    *
155    * @param separator the literal, nonempty string to recognize as a separator
156    * @return a splitter, with default settings, that recognizes that separator
157    */
158   public static Splitter on(final String separator) {
159     checkArgument(separator.length() != 0,
160         "The separator may not be the empty string.");
161 
162     return new Splitter(new Strategy() {
163       /*@Override*/ public SplittingIterator iterator(
164           Splitter splitter, CharSequence toSplit) {
165         return new SplittingIterator(splitter, toSplit) {
166           @Override public int separatorStart(int start) {
167             int delimeterLength = separator.length();
168 
169             positions:
170             for (int p = start, last = toSplit.length() - delimeterLength;
171                 p <= last; p++) {
172               for (int i = 0; i < delimeterLength; i++) {
173                 if (toSplit.charAt(i + p) != separator.charAt(i)) {
174                   continue positions;
175                 }
176               }
177               return p;
178             }
179             return -1;
180           }
181 
182           @Override public int separatorEnd(int separatorPosition) {
183             return separatorPosition + separator.length();
184           }
185         };
186       }
187     });
188   }
189 
190   /**
191    * Returns a splitter that considers any subsequence matching {@code
192    * pattern} to be a separator. For example, {@code
193    * Splitter.on(Pattern.compile("\r?\n")).split(entireFile)} splits a string
194    * into lines whether it uses DOS-style or UNIX-style line terminators.
195    *
196    * @param separatorPattern the pattern that determines whether a subsequence
197    *     is a separator. This pattern may not match the empty string.
198    * @return a splitter, with default settings, that uses this pattern
199    * @throws IllegalArgumentException if {@code separatorPattern} matches the
200    *     empty string
201    */
202   public static Splitter on(final Pattern separatorPattern) {
203     checkNotNull(separatorPattern);
204     checkArgument(!separatorPattern.matcher("").matches(),
205         "The pattern may not match the empty string: %s", separatorPattern);
206 
207     return new Splitter(new Strategy() {
208       /*@Override*/ public SplittingIterator iterator(
209           final Splitter splitter, CharSequence toSplit) {
210         final Matcher matcher = separatorPattern.matcher(toSplit);
211         return new SplittingIterator(splitter, toSplit) {
212           @Override public int separatorStart(int start) {
213             return matcher.find(start) ? matcher.start() : -1;
214           }
215 
216           @Override public int separatorEnd(int separatorPosition) {
217             return matcher.end();
218           }
219         };
220       }
221     });
222   }
223 
224   /**
225    * Returns a splitter that considers any subsequence matching a given
226    * pattern (regular expression) to be a separator. For example, {@code
227    * Splitter.onPattern("\r?\n").split(entireFile)} splits a string into lines
228    * whether it uses DOS-style or UNIX-style line terminators. This is
229    * equivalent to {@code Splitter.on(Pattern.compile(pattern))}.
230    *
231    * @param separatorPattern the pattern that determines whether a subsequence
232    *     is a separator. This pattern may not match the empty string.
233    * @return a splitter, with default settings, that uses this pattern
234    * @throws PatternSyntaxException if {@code separatorPattern} is a malformed
235    *     expression
236    * @throws IllegalArgumentException if {@code separatorPattern} matches the
237    *     empty string
238    */
239   public static Splitter onPattern(String separatorPattern) {
240     return on(Pattern.compile(separatorPattern));
241   }
242 
243   /**
244    * Returns a splitter that divides strings into pieces of the given length.
245    * For example, {@code Splitter.atEach(2).split("abcde")} returns an
246    * iterable containing {@code ["ab", "cd", "e"]}. The last piece can be
247    * smaller than {@code length} but will never be empty.
248    *
249    * @param length the desired length of pieces after splitting
250    * @return a splitter, with default settings, that can split into fixed sized
251    *     pieces
252    */
253   public static Splitter fixedLength(final int length) {
254     checkArgument(length > 0, "The length may not be less than 1");
255 
256     return new Splitter(new Strategy() {
257       /*@Override*/ public SplittingIterator iterator(
258           final Splitter splitter, CharSequence toSplit) {
259         return new SplittingIterator(splitter, toSplit) {
260           @Override public int separatorStart(int start) {
261             int nextChunkStart = start + length;
262             return (nextChunkStart < toSplit.length() ? nextChunkStart : -1);
263           }
264 
265           @Override public int separatorEnd(int separatorPosition) {
266             return separatorPosition;
267           }
268         };
269       }
270     });
271   }
272 
273   /**
274    * Returns a splitter that behaves equivalently to {@code this} splitter, but
275    * automatically omits empty strings from the results. For example, {@code
276    * Splitter.on(',').omitEmptyStrings().split(",a,,,b,c,,")} returns an
277    * iterable containing only {@code ["a", "b", "c"]}.
278    *
279    * <p>If either {@code trimResults} option is also specified when creating a
280    * splitter, that splitter always trims results first before checking for
281    * emptiness. So, for example, {@code
282    * Splitter.on(':').omitEmptyStrings().trimResults().split(": : : ")} returns
283    * an empty iterable.
284    *
285    * <p>Note that it is ordinarily not possible for {@link #split(CharSequence)}
286    * to return an empty iterable, but when using this option, it can (if the
287    * input sequence consists of nothing but separators).
288    *
289    * @return a splitter with the desired configuration
290    */
291   public Splitter omitEmptyStrings() {
292     return new Splitter(strategy, true, trimmer);
293   }
294 
295   /**
296    * Returns a splitter that behaves equivalently to {@code this} splitter, but
297    * automatically removes leading and trailing {@linkplain
298    * CharMatcher#WHITESPACE whitespace} from each returned substring; equivalent
299    * to {@code trimResults(CharMatcher.WHITESPACE)}. For example, {@code
300    * Splitter.on(',').trimResults().split(" a, b  ,c  ")} returns an iterable
301    * containing {@code ["a", "b", "c"]}.
302    *
303    * @return a splitter with the desired configuration
304    */
305   public Splitter trimResults() {
306     return trimResults(CharMatcher.WHITESPACE);
307   }
308 
309   /**
310    * Returns a splitter that behaves equivalently to {@code this} splitter, but
311    * removes all leading or trailing characters matching the given {@code
312    * CharMatcher} from each returned substring. For example, {@code
313    * Splitter.on(',').trimResults(CharMatcher.is('_')).split("_a ,_b_ ,c__")}
314    * returns an iterable containing {@code ["a ", "b_ ", "c"]}.
315    *
316    * @param trimmer a {@link CharMatcher} that determines whether a character
317    *     should be removed from the beginning/end of a subsequence
318    * @return a splitter with the desired configuration
319    */
320   public Splitter trimResults(CharMatcher trimmer) {
321     checkNotNull(trimmer);
322     return new Splitter(strategy, omitEmptyStrings, trimmer);
323   }
324 
325   /**
326    * Splits the {@link CharSequence} passed in parameter.
327    *
328    * @param sequence the sequence of characters to split
329    * @return an iteration over the segments split from the parameter.
330    */
331   public Iterable<String> split(final CharSequence sequence) {
332     checkNotNull(sequence);
333 
334     return new Iterable<String>() {
335       /*@Override*/ public Iterator<String> iterator() {
336         return strategy.iterator(Splitter.this, sequence);
337       }
338     };
339   }
340 
341   private interface Strategy {
342     Iterator<String> iterator(Splitter splitter, CharSequence toSplit);
343   }
344 
345   private abstract static class SplittingIterator
346       extends AbstractIterator<String> {
347     final CharSequence toSplit;
348     final CharMatcher trimmer;
349     final boolean omitEmptyStrings;
350 
351     /**
352      * Returns the first index in {@code toSplit} at or after {@code start}
353      * that contains the separator.
354      */
355     abstract int separatorStart(int start);
356 
357     /**
358      * Returns the first index in {@code toSplit} after {@code
359      * separatorPosition} that does not contain a separator. This method is only
360      * invoked after a call to {@code separatorStart}.
361      */
362     abstract int separatorEnd(int separatorPosition);
363 
364     int offset = 0;
365 
366     protected SplittingIterator(Splitter splitter, CharSequence toSplit) {
367       this.trimmer = splitter.trimmer;
368       this.omitEmptyStrings = splitter.omitEmptyStrings;
369       this.toSplit = toSplit;
370     }
371 
372     @Override protected String computeNext() {
373       while (offset != -1) {
374         int start = offset;
375         int end;
376 
377         int separatorPosition = separatorStart(offset);
378         if (separatorPosition == -1) {
379           end = toSplit.length();
380           offset = -1;
381         } else {
382           end = separatorPosition;
383           offset = separatorEnd(separatorPosition);
384         }
385 
386         while (start < end && trimmer.matches(toSplit.charAt(start))) {
387           start++;
388         }
389         while (end > start && trimmer.matches(toSplit.charAt(end - 1))) {
390           end--;
391         }
392 
393         if (omitEmptyStrings && start == end) {
394           continue;
395         }
396 
397         return toSplit.subSequence(start, end).toString();
398       }
399       return endOfData();
400     }
401   }
402 
403   /*
404    * Copied from common.collect.AbstractIterator. TODO: un-fork once these
405    * packages have been combined into a single library.
406    */
407   private static abstract class AbstractIterator<T> implements Iterator<T> {
408     State state = State.NOT_READY;
409 
410     enum State {
411       READY, NOT_READY, DONE, FAILED,
412     }
413 
414     T next;
415 
416     protected abstract T computeNext();
417 
418     protected final T endOfData() {
419       state = State.DONE;
420       return null;
421     }
422 
423     public final boolean hasNext() {
424       checkState(state != State.FAILED);
425       switch (state) {
426         case DONE:
427           return false;
428         case READY:
429           return true;
430         default:
431       }
432       return tryToComputeNext();
433     }
434 
435     boolean tryToComputeNext() {
436       state = State.FAILED; // temporary pessimism
437       next = computeNext();
438       if (state != State.DONE) {
439         state = State.READY;
440         return true;
441       }
442       return false;
443     }
444 
445     public final T next() {
446       if (!hasNext()) {
447         throw new NoSuchElementException();
448       }
449       state = State.NOT_READY;
450       return next;
451     }
452 
453     /*@Override*/ public void remove() {
454       throw new UnsupportedOperationException();
455     }
456   }
457 }
458