• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GENERATED SOURCE. DO NOT MODIFY. */
2 // © 2016 and later: Unicode, Inc. and others.
3 // License & terms of use: http://www.unicode.org/copyright.html#License
4 /**
5 *******************************************************************************
6 * Copyright (C) 1996-2016, International Business Machines Corporation and    *
7 * others. All Rights Reserved.                                                *
8 *******************************************************************************
9 */
10 
11 package android.icu.util;
12 
13 import java.util.Enumeration;
14 import java.util.NoSuchElementException;
15 
16 import android.icu.text.UTF16;
17 import android.icu.text.UnicodeSet;
18 
19 /**
20  * <strong>[icu enhancement]</strong> ICU's replacement for {@link java.util.Calendar}.&nbsp;Methods, fields, and other functionality specific to ICU are labeled '<strong>[icu]</strong>'.
21  *
22  * <p>The string tokenizer class allows an application to break a string
23  * into tokens by performing code point comparison.
24  * The <code>StringTokenizer</code> methods do not distinguish
25  * among identifiers, numbers, and quoted strings, nor do they recognize
26  * and skip comments.
27  * <p>
28  * The set of delimiters (the codepoints that separate tokens) may be
29  * specified either at creation time or on a per-token basis.
30  * <p>
31  * An instance of <code>StringTokenizer</code> behaves in one of three ways,
32  * depending on whether it was created with the <code>returnDelims</code>
33  * and <code>coalesceDelims</code>
34  * flags having the value <code>true</code> or <code>false</code>:
35  * <ul>
36  * <li>If returnDelims is <code>false</code>, delimiter code points serve to
37  * separate tokens. A token is a maximal sequence of consecutive
38  * code points that are not delimiters.
39  * <li>If returnDelims is <code>true</code>, delimiter code points are
40  * themselves considered to be tokens. In this case, if coalesceDelims is
41  * <code>true</code>, such tokens will be the maximal sequence of consecutive
42  * code points that <em>are</em> delimiters.  If coalesceDelims is false,
43  * a token will be received for each delimiter code point.
44  * </ul>
45  * <p>A token is thus either one
46  * delimiter code point, a maximal sequence of consecutive code points that
47  * are delimiters, or a maximal sequence of consecutive code
48  * points that are not delimiters.
49  * <p>
50  * A <tt>StringTokenizer</tt> object internally maintains a current
51  * position within the string to be tokenized. Some operations advance this
52  * current position past the code point processed.
53  * <p>
54  * A token is returned by taking a substring of the string that was used to
55  * create the <tt>StringTokenizer</tt> object.
56  * <p>
57  * Example of the use of the default delimiter tokenizer.
58  * <blockquote><pre>
59  * StringTokenizer st = new StringTokenizer("this is a test");
60  * while (st.hasMoreTokens()) {
61  *     println(st.nextToken());
62  *     }
63  * </pre></blockquote>
64  * <p>
65  * prints the following output:
66  * <blockquote><pre>
67  *     this
68  *     is
69  *     a
70  *     test
71  * </pre></blockquote>
72  * <p>
73  * Example of the use of the tokenizer with user specified delimiter.
74  * <blockquote><pre>
75  *     StringTokenizer st = new StringTokenizer(
76  *     "this is a test with supplementary characters &#92;ud800&#92;ud800&#92;udc00&#92;udc00",
77  *         " &#92;ud800&#92;udc00");
78  *     while (st.hasMoreTokens()) {
79  *         println(st.nextToken());
80  *     }
81  * </pre></blockquote>
82  * <p>
83  * prints the following output:
84  * <blockquote><pre>
85  *     this
86  *     is
87  *     a
88  *     test
89  *     with
90  *     supplementary
91  *     characters
92  *     &#92;ud800
93  *     &#92;udc00
94  * </pre></blockquote>
95  *
96  * @author syn wee
97  * @hide Only a subset of ICU is exposed in Android
98  */
99 public final class StringTokenizer implements Enumeration<Object>
100 {
101     // public constructors ---------------------------------------------
102 
103     /**
104      * <strong>[icu]</strong> Constructs a string tokenizer for the specified string. All
105      * characters in the delim argument are the delimiters for separating
106      * tokens.
107      * <p>If the returnDelims flag is false, the delimiter characters are
108      * skipped and only serve as separators between tokens.
109      * <p>If the returnDelims flag is true, then the delimiter characters
110      * are also returned as tokens, one per delimiter.
111      * @param str a string to be parsed.
112      * @param delim the delimiters.
113      * @param returndelims flag indicating whether to return the delimiters
114      *        as tokens.
115      * @exception NullPointerException if str is null
116      */
StringTokenizer(String str, UnicodeSet delim, boolean returndelims)117     public StringTokenizer(String str, UnicodeSet delim, boolean returndelims)
118     {
119         this(str, delim, returndelims, false);
120     }
121 
122     /**
123      * <strong>[icu]</strong> Constructs a string tokenizer for the specified string. All
124      * characters in the delim argument are the delimiters for separating
125      * tokens.
126      * <p>If the returnDelims flag is false, the delimiter characters are
127      * skipped and only serve as separators between tokens.
128      * <p>If the returnDelims flag is true, then the delimiter characters
129      * are also returned as tokens.  If coalescedelims is true, one token
130      * is returned for each run of delimiter characters, otherwise one
131      * token is returned per delimiter.  Since surrogate pairs can be
132      * delimiters, the returned token might be two chars in length.
133      * @param str a string to be parsed.
134      * @param delim the delimiters.
135      * @param returndelims flag indicating whether to return the delimiters
136      *        as tokens.
137      * @param coalescedelims flag indicating whether to return a run of
138      *        delimiters as a single token or as one token per delimiter.
139      *        This only takes effect if returndelims is true.
140      * @exception NullPointerException if str is null
141      * @deprecated This API is ICU internal only.
142      * @hide draft / provisional / internal are hidden on Android
143      */
144     @Deprecated
StringTokenizer(String str, UnicodeSet delim, boolean returndelims, boolean coalescedelims)145     public StringTokenizer(String str, UnicodeSet delim, boolean returndelims, boolean coalescedelims)
146     {
147         m_source_ = str;
148         m_length_ = str.length();
149         if (delim == null) {
150             m_delimiters_ = EMPTY_DELIMITER_;
151         }
152         else {
153             m_delimiters_ = delim;
154         }
155         m_returnDelimiters_ = returndelims;
156         m_coalesceDelimiters_ = coalescedelims;
157         m_tokenOffset_ = -1;
158         m_tokenSize_ = -1;
159         if (m_length_ == 0) {
160             // string length 0, no tokens
161             m_nextOffset_ = -1;
162         }
163         else {
164             m_nextOffset_ = 0;
165             if (!returndelims) {
166                 m_nextOffset_ = getNextNonDelimiter(0);
167             }
168         }
169     }
170 
171     /**
172      * <strong>[icu]</strong> Constructs a string tokenizer for the specified string. The
173      * characters in the delim argument are the delimiters for separating
174      * tokens.
175      * <p>Delimiter characters themselves will not be treated as tokens.
176      * @param str a string to be parsed.
177      * @param delim the delimiters.
178      * @exception NullPointerException if str is null
179      */
StringTokenizer(String str, UnicodeSet delim)180     public StringTokenizer(String str, UnicodeSet delim)
181     {
182         this(str, delim, false, false);
183     }
184 
185     /**
186      * <p>Constructs a string tokenizer for the specified string. All
187      * characters in the delim argument are the delimiters for separating
188      * tokens.
189      * <p>If the returnDelims flag is false, the delimiter characters are
190      * skipped and only serve as separators between tokens.
191      * <p>If the returnDelims flag is true, then the delimiter characters
192      * are also returned as tokens, one per delimiter.
193      * @param str a string to be parsed.
194      * @param delim the delimiters.
195      * @param returndelims flag indicating whether to return the delimiters
196      *        as tokens.
197      * @exception NullPointerException if str is null
198      */
StringTokenizer(String str, String delim, boolean returndelims)199     public StringTokenizer(String str, String delim, boolean returndelims)
200     {
201         this(str, delim, returndelims, false); // java default behavior
202     }
203 
204     /**
205      * <p>Constructs a string tokenizer for the specified string. All
206      * characters in the delim argument are the delimiters for separating
207      * tokens.
208      * <p>If the returnDelims flag is false, the delimiter characters are
209      * skipped and only serve as separators between tokens.
210      * <p>If the returnDelims flag is true, then the delimiter characters
211      * are also returned as tokens.  If coalescedelims is true, one token
212      * is returned for each run of delimiter characters, otherwise one
213      * token is returned per delimiter.  Since surrogate pairs can be
214      * delimiters, the returned token might be two chars in length.
215      * @param str a string to be parsed.
216      * @param delim the delimiters.
217      * @param returndelims flag indicating whether to return the delimiters
218      *        as tokens.
219      * @param coalescedelims flag indicating whether to return a run of
220      *        delimiters as a single token or as one token per delimiter.
221      *        This only takes effect if returndelims is true.
222      * @exception NullPointerException if str is null
223      * @deprecated This API is ICU internal only.
224      * @hide draft / provisional / internal are hidden on Android
225      */
226     @Deprecated
StringTokenizer(String str, String delim, boolean returndelims, boolean coalescedelims)227     public StringTokenizer(String str, String delim, boolean returndelims, boolean coalescedelims)
228     {
229         // don't ignore whitespace
230         m_delimiters_ = EMPTY_DELIMITER_;
231         if (delim != null && delim.length() > 0) {
232             m_delimiters_ = new UnicodeSet();
233             m_delimiters_.addAll(delim);
234             checkDelimiters();
235         }
236         m_coalesceDelimiters_ = coalescedelims;
237         m_source_ = str;
238         m_length_ = str.length();
239         m_returnDelimiters_ = returndelims;
240         m_tokenOffset_ = -1;
241         m_tokenSize_ = -1;
242         if (m_length_ == 0) {
243             // string length 0, no tokens
244             m_nextOffset_ = -1;
245         }
246         else {
247             m_nextOffset_ = 0;
248             if (!returndelims) {
249                 m_nextOffset_ = getNextNonDelimiter(0);
250             }
251         }
252     }
253 
254     /**
255      * <p>Constructs a string tokenizer for the specified string. The
256      * characters in the delim argument are the delimiters for separating
257      * tokens.
258      * <p>Delimiter characters themselves will not be treated as tokens.
259      * @param str a string to be parsed.
260      * @param delim the delimiters.
261      * @exception NullPointerException if str is null
262      */
StringTokenizer(String str, String delim)263     public StringTokenizer(String str, String delim)
264     {
265         // don't ignore whitespace
266         this(str, delim, false, false);
267     }
268 
269     /**
270      * <p>Constructs a string tokenizer for the specified string.
271      * The tokenizer uses the default delimiter set, which is
272      * " &#92;t&#92;n&#92;r&#92;f":
273      * the space character, the tab character, the newline character, the
274      * carriage-return character, and the form-feed character.
275      * <p>Delimiter characters themselves will not be treated as tokens.
276      * @param str a string to be parsed
277      * @exception NullPointerException if str is null
278      */
StringTokenizer(String str)279     public StringTokenizer(String str)
280     {
281         this(str, DEFAULT_DELIMITERS_, false, false);
282     }
283 
284     // public methods --------------------------------------------------
285 
286     /**
287      * Tests if there are more tokens available from this tokenizer's
288      * string.
289      * If this method returns <tt>true</tt>, then a subsequent call to
290      * <tt>nextToken</tt> with no argument will successfully return a token.
291      * @return <code>true</code> if and only if there is at least one token
292      *         in the string after the current position; <code>false</code>
293      *         otherwise.
294      */
hasMoreTokens()295     public boolean hasMoreTokens()
296     {
297         return m_nextOffset_ >= 0;
298     }
299 
300     /**
301      * Returns the next token from this string tokenizer.
302      * @return the next token from this string tokenizer.
303      * @exception NoSuchElementException if there are no more tokens in
304      *            this tokenizer's string.
305      */
nextToken()306     public String nextToken()
307     {
308         if (m_tokenOffset_ < 0) {
309             if (m_nextOffset_ < 0) {
310                 throw new NoSuchElementException("No more tokens in String");
311             }
312             // pre-calculations of tokens not done
313             if (m_returnDelimiters_) {
314                 int tokenlimit = 0;
315                 int c = UTF16.charAt(m_source_, m_nextOffset_);
316                 boolean contains = delims == null
317                     ? m_delimiters_.contains(c)
318                     : c < delims.length && delims[c];
319                 if (contains) {
320                      if (m_coalesceDelimiters_) {
321                         tokenlimit = getNextNonDelimiter(m_nextOffset_);
322                      } else {
323                         tokenlimit = m_nextOffset_ + UTF16.getCharCount(c);
324                         if (tokenlimit == m_length_) {
325                             tokenlimit = -1;
326                         }
327                      }
328                 }
329                 else {
330                     tokenlimit = getNextDelimiter(m_nextOffset_);
331                 }
332                 String result;
333                 if (tokenlimit < 0) {
334                     result = m_source_.substring(m_nextOffset_);
335                 }
336                 else {
337                     result = m_source_.substring(m_nextOffset_, tokenlimit);
338                 }
339                 m_nextOffset_ = tokenlimit;
340                 return result;
341             }
342             else {
343                 int tokenlimit = getNextDelimiter(m_nextOffset_);
344                 String result;
345                 if (tokenlimit < 0) {
346                     result = m_source_.substring(m_nextOffset_);
347                     m_nextOffset_ = tokenlimit;
348                 }
349                 else {
350                     result = m_source_.substring(m_nextOffset_, tokenlimit);
351                     m_nextOffset_ = getNextNonDelimiter(tokenlimit);
352                 }
353 
354                 return result;
355             }
356         }
357         // count was called before and we have all the tokens
358         if (m_tokenOffset_ >= m_tokenSize_) {
359             throw new NoSuchElementException("No more tokens in String");
360         }
361         String result;
362         if (m_tokenLimit_[m_tokenOffset_] >= 0) {
363             result = m_source_.substring(m_tokenStart_[m_tokenOffset_],
364                                          m_tokenLimit_[m_tokenOffset_]);
365         }
366         else {
367             result = m_source_.substring(m_tokenStart_[m_tokenOffset_]);
368         }
369         m_tokenOffset_ ++;
370         m_nextOffset_ = -1;
371         if (m_tokenOffset_ < m_tokenSize_) {
372             m_nextOffset_ = m_tokenStart_[m_tokenOffset_];
373         }
374         return result;
375     }
376 
377     /**
378      * Returns the next token in this string tokenizer's string. First,
379      * the set of characters considered to be delimiters by this
380      * <tt>StringTokenizer</tt> object is changed to be the characters in
381      * the string <tt>delim</tt>. Then the next token in the string
382      * after the current position is returned. The current position is
383      * advanced beyond the recognized token.  The new delimiter set
384      * remains the default after this call.
385      * @param delim the new delimiters.
386      * @return the next token, after switching to the new delimiter set.
387      * @exception NoSuchElementException if there are no more tokens in
388      *            this tokenizer's string.
389      */
390     public String nextToken(String delim)
391     {
392         m_delimiters_ = EMPTY_DELIMITER_;
393         if (delim != null && delim.length() > 0) {
394             m_delimiters_ = new UnicodeSet();
395             m_delimiters_.addAll(delim);
396         }
397         return nextToken(m_delimiters_);
398     }
399 
400     /**
401      * <strong>[icu]</strong> Returns the next token in this string tokenizer's string. First,
402      * the set of characters considered to be delimiters by this
403      * <tt>StringTokenizer</tt> object is changed to be the characters in
404      * the string <tt>delim</tt>. Then the next token in the string
405      * after the current position is returned. The current position is
406      * advanced beyond the recognized token.  The new delimiter set
407      * remains the default after this call.
408      * @param delim the new delimiters.
409      * @return the next token, after switching to the new delimiter set.
410      * @exception NoSuchElementException if there are no more tokens in
411      *            this tokenizer's string.
412      */
413     public String nextToken(UnicodeSet delim)
414     {
415         m_delimiters_ = delim;
416         checkDelimiters();
417         m_tokenOffset_ = -1;
418         m_tokenSize_ = -1;
419         if (!m_returnDelimiters_) {
420             m_nextOffset_ = getNextNonDelimiter(m_nextOffset_);
421         }
422         return nextToken();
423     }
424 
425     /**
426      * Returns the same value as the <code>hasMoreTokens</code> method.
427      * It exists so that this class can implement the
428      * <code>Enumeration</code> interface.
429      * @return <code>true</code> if there are more tokens;
430      *         <code>false</code> otherwise.
431      * @see #hasMoreTokens()
432      */
433     public boolean hasMoreElements()
434     {
435         return hasMoreTokens();
436     }
437 
438     /**
439      * Returns the same value as the <code>nextToken</code> method, except
440      * that its declared return value is <code>Object</code> rather than
441      * <code>String</code>. It exists so that this class can implement the
442      * <code>Enumeration</code> interface.
443      * @return the next token in the string.
444      * @exception NoSuchElementException if there are no more tokens in
445      *            this tokenizer's string.
446      * @see #nextToken()
447      */
448     public Object nextElement()
449     {
450         return nextToken();
451     }
452 
453     /**
454      * Calculates the number of times that this tokenizer's
455      * <code>nextToken</code> method can be called before it generates an
456      * exception. The current position is not advanced.
457      * @return the number of tokens remaining in the string using the
458      *         current delimiter set.
459      * @see #nextToken()
460      */
461     public int countTokens()
462     {
463         int result = 0;
464         if (hasMoreTokens()) {
465             if (m_tokenOffset_ >= 0) {
466                 return m_tokenSize_ - m_tokenOffset_;
467             }
468             if (m_tokenStart_ == null) {
469                 m_tokenStart_ = new int[TOKEN_SIZE_];
470                 m_tokenLimit_ = new int[TOKEN_SIZE_];
471             }
472             do {
473                 if (m_tokenStart_.length == result) {
474                     int temptokenindex[] = m_tokenStart_;
475                     int temptokensize[] = m_tokenLimit_;
476                     int originalsize = temptokenindex.length;
477                     int newsize = originalsize + TOKEN_SIZE_;
478                     m_tokenStart_ = new int[newsize];
479                     m_tokenLimit_ = new int[newsize];
480                     System.arraycopy(temptokenindex, 0, m_tokenStart_, 0,
481                                      originalsize);
482                     System.arraycopy(temptokensize, 0, m_tokenLimit_, 0,
483                                      originalsize);
484                 }
485                 m_tokenStart_[result] = m_nextOffset_;
486                 if (m_returnDelimiters_) {
487                     int c = UTF16.charAt(m_source_, m_nextOffset_);
488                     boolean contains = delims == null
489                         ? m_delimiters_.contains(c)
490                         : c < delims.length && delims[c];
491                     if (contains) {
492                         if (m_coalesceDelimiters_) {
493                             m_tokenLimit_[result] = getNextNonDelimiter(
494                                                                 m_nextOffset_);
495                         } else {
496                             int p = m_nextOffset_ + 1;
497                             if (p == m_length_) {
498                                 p = -1;
499                             }
500                             m_tokenLimit_[result] = p;
501 
502                         }
503                     }
504                     else {
505                         m_tokenLimit_[result] = getNextDelimiter(m_nextOffset_);
506                     }
507                     m_nextOffset_ = m_tokenLimit_[result];
508                 }
509                 else {
510                     m_tokenLimit_[result] = getNextDelimiter(m_nextOffset_);
511                     m_nextOffset_ = getNextNonDelimiter(m_tokenLimit_[result]);
512                 }
513                 result ++;
514             } while (m_nextOffset_ >= 0);
515             m_tokenOffset_ = 0;
516             m_tokenSize_ = result;
517             m_nextOffset_ = m_tokenStart_[0];
518         }
519         return result;
520     }
521 
522     // private data members -------------------------------------------------
523 
524     /**
525      * Current offset to the token array. If the array token is not set up yet,
526      * this value is a -1
527      */
528     private int m_tokenOffset_;
529     /**
530      * Size of the token array. If the array token is not set up yet,
531      * this value is a -1
532      */
533     private int m_tokenSize_;
534     /**
535      * Array of pre-calculated tokens start indexes in source string terminated
536      * by -1.
537      * This is only set up during countTokens() and only stores the remaining
538      * tokens, not all tokens including parsed ones
539      */
540     private int m_tokenStart_[];
541     /**
542      * Array of pre-calculated tokens limit indexes in source string.
543      * This is only set up during countTokens() and only stores the remaining
544      * tokens, not all tokens including parsed ones
545      */
546     private int m_tokenLimit_[];
547     /**
548      * UnicodeSet containing delimiters
549      */
550     private UnicodeSet m_delimiters_;
551     /**
552      * String to parse for tokens
553      */
554     private String m_source_;
555     /**
556      * Length of m_source_
557      */
558     private int m_length_;
559     /**
560      * Current position in string to parse for tokens
561      */
562     private int m_nextOffset_;
563     /**
564      * Flag indicator if delimiters are to be treated as tokens too
565      */
566     private boolean m_returnDelimiters_;
567 
568     /**
569      * Flag indicating whether to coalesce runs of delimiters into single tokens
570      */
571     private boolean m_coalesceDelimiters_;
572 
573     /**
574      * Default set of delimiters &#92;t&#92;n&#92;r&#92;f
575      */
576     private static final UnicodeSet DEFAULT_DELIMITERS_
577         = new UnicodeSet(0x09, 0x0a, 0x0c, 0x0d, 0x20, 0x20);   // UnicodeSet("[ \t\n\r\f]", false)
578     /**
579      * Array size increments
580      */
581     private static final int TOKEN_SIZE_ = 100;
582     /**
583      * A empty delimiter UnicodeSet, used when user specified null delimiters
584      */
585     private static final UnicodeSet EMPTY_DELIMITER_ = UnicodeSet.EMPTY;
586 
587     // private methods ------------------------------------------------------
588 
589     /**
590      * Gets the index of the next delimiter after offset
591      * @param offset to the source string
592      * @return offset of the immediate next delimiter, otherwise
593      *         (- source string length - 1) if there
594      *         are no more delimiters after m_nextOffset
595      */
getNextDelimiter(int offset)596     private int getNextDelimiter(int offset)
597     {
598         if (offset >= 0) {
599             int result = offset;
600             int c = 0;
601             if (delims == null) {
602                 do {
603                     c = UTF16.charAt(m_source_, result);
604                     if (m_delimiters_.contains(c)) {
605                         break;
606                     }
607                     result ++;
608                 } while (result < m_length_);
609             } else {
610                 do {
611                     c = UTF16.charAt(m_source_, result);
612                     if (c < delims.length && delims[c]) {
613                         break;
614                     }
615                     result ++;
616                 } while (result < m_length_);
617             }
618             if (result < m_length_) {
619                 return result;
620             }
621         }
622         return -1 - m_length_;
623     }
624 
625     /**
626      * Gets the index of the next non-delimiter after m_nextOffset_
627      * @param offset to the source string
628      * @return offset of the immediate next non-delimiter, otherwise
629      *         (- source string length - 1) if there
630      *         are no more delimiters after m_nextOffset
631      */
getNextNonDelimiter(int offset)632     private int getNextNonDelimiter(int offset)
633     {
634         if (offset >= 0) {
635             int result = offset;
636             int c = 0;
637             if (delims == null) {
638                 do {
639                     c = UTF16.charAt(m_source_, result);
640                     if (!m_delimiters_.contains(c)) {
641                         break;
642                     }
643                     result ++;
644                 } while (result < m_length_);
645             } else {
646                 do {
647                     c = UTF16.charAt(m_source_, result);
648                     if (!(c < delims.length && delims[c])) {
649                         break;
650                     }
651                     result ++;
652                 } while (result < m_length_);
653             }
654             if (result < m_length_) {
655                 return result;
656             }
657         }
658         return -1 - m_length_;
659     }
660 
checkDelimiters()661     void checkDelimiters() {
662         if (m_delimiters_ == null || m_delimiters_.size() == 0) {
663             delims = new boolean[0];
664         } else {
665             int maxChar = m_delimiters_.getRangeEnd(m_delimiters_.getRangeCount()-1);
666             if (maxChar < 0x7f) {
667                 delims = new boolean[maxChar+1];
668                 for (int i = 0, ch; -1 != (ch = m_delimiters_.charAt(i)); ++i) {
669                     delims[ch] = true;
670                 }
671             } else {
672                 delims = null;
673             }
674         }
675     }
676     private boolean[] delims;
677 }
678