• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GENERATED SOURCE. DO NOT MODIFY. */
2 // © 2016 and later: Unicode, Inc. and others.
3 // License & terms of use: http://www.unicode.org/copyright.html#License
4 /*
5  *******************************************************************************
6  * Copyright (C) 1996-2014, International Business Machines Corporation and    *
7  * others. All Rights Reserved.                                                *
8  *******************************************************************************
9  */
10 package ohos.global.icu.text;
11 
12 import java.util.Iterator;
13 
14 /**
15  * UnicodeSetIterator iterates over the contents of a UnicodeSet.  It
16  * iterates over either code points or code point ranges.  After all
17  * code points or ranges have been returned, it returns the
18  * multicharacter strings of the UnicodSet, if any.
19  *
20  * <p>To iterate over code points and multicharacter strings,
21  * use a loop like this:
22  * <pre>
23  * for (UnicodeSetIterator it = new UnicodeSetIterator(set); it.next();) {
24  *   processString(it.getString());
25  * }
26  * </pre>
27  *
28  * <p>To iterate over code point ranges, use a loop like this:
29  * <pre>
30  * for (UnicodeSetIterator it = new UnicodeSetIterator(set); it.nextRange();) {
31  *   if (it.codepoint != UnicodeSetIterator.IS_STRING) {
32  *     processCodepointRange(it.codepoint, it.codepointEnd);
33  *   } else {
34  *     processString(it.getString());
35  *   }
36  * }
37  * </pre>
38  * <p><b>Warning: </b>For speed, UnicodeSet iteration does not check for concurrent modification.
39  * Do not alter the UnicodeSet while iterating.
40  * @author M. Davis
41  */
42 public class UnicodeSetIterator {
43 
44     /**
45      * Value of <tt>codepoint</tt> if the iterator points to a string.
46      * If <tt>codepoint == IS_STRING</tt>, then examine
47      * <tt>string</tt> for the current iteration result.
48      */
49     public static int IS_STRING = -1;
50 
51     /**
52      * Current code point, or the special value <tt>IS_STRING</tt>, if
53      * the iterator points to a string.
54      */
55     public int codepoint;
56 
57     /**
58      * When iterating over ranges using <tt>nextRange()</tt>,
59      * <tt>codepointEnd</tt> contains the inclusive end of the
60      * iteration range, if <tt>codepoint != IS_STRING</tt>.  If
61      * iterating over code points using <tt>next()</tt>, or if
62      * <tt>codepoint == IS_STRING</tt>, then the value of
63      * <tt>codepointEnd</tt> is undefined.
64      */
65     public int codepointEnd;
66 
67     /**
68      * If <tt>codepoint == IS_STRING</tt>, then <tt>string</tt> points
69      * to the current string.  If <tt>codepoint != IS_STRING</tt>, the
70      * value of <tt>string</tt> is undefined.
71      */
72     public String string;
73 
74     /**
75      * Create an iterator over the given set.
76      * @param set set to iterate over
77      */
UnicodeSetIterator(UnicodeSet set)78     public UnicodeSetIterator(UnicodeSet set) {
79         reset(set);
80     }
81 
82     /**
83      * Create an iterator over nothing.  <tt>next()</tt> and
84      * <tt>nextRange()</tt> return false. This is a convenience
85      * constructor allowing the target to be set later.
86      */
UnicodeSetIterator()87     public UnicodeSetIterator() {
88         reset(new UnicodeSet());
89     }
90 
91     /**
92      * Returns the next element in the set, either a single code point
93      * or a string.  If there are no more elements in the set, return
94      * false.  If <tt>codepoint == IS_STRING</tt>, the value is a
95      * string in the <tt>string</tt> field.  Otherwise the value is a
96      * single code point in the <tt>codepoint</tt> field.
97      *
98      * <p>The order of iteration is all code points in sorted order,
99      * followed by all strings sorted order.  <tt>codepointEnd</tt> is
100      * undefined after calling this method.  <tt>string</tt> is
101      * undefined unless <tt>codepoint == IS_STRING</tt>.  Do not mix
102      * calls to <tt>next()</tt> and <tt>nextRange()</tt> without
103      * calling <tt>reset()</tt> between them.  The results of doing so
104      * are undefined.
105      * <p><b>Warning: </b>For speed, UnicodeSet iteration does not check for concurrent modification.
106      * Do not alter the UnicodeSet while iterating.
107      * @return true if there was another element in the set and this
108      * object contains the element.
109      */
next()110     public boolean next() {
111         if (nextElement <= endElement) {
112             codepoint = codepointEnd = nextElement++;
113             return true;
114         }
115         if (range < endRange) {
116             loadRange(++range);
117             codepoint = codepointEnd = nextElement++;
118             return true;
119         }
120 
121         // stringIterator == null iff there are no string elements remaining
122 
123         if (stringIterator == null) {
124             return false;
125         }
126         codepoint = IS_STRING; // signal that value is actually a string
127         string = stringIterator.next();
128         if (!stringIterator.hasNext()) {
129             stringIterator = null;
130         }
131         return true;
132     }
133 
134     /**
135      * Returns the next element in the set, either a code point range
136      * or a string.  If there are no more elements in the set, return
137      * false.  If <tt>codepoint == IS_STRING</tt>, the value is a
138      * string in the <tt>string</tt> field.  Otherwise the value is a
139      * range of one or more code points from <tt>codepoint</tt> to
140      * <tt>codepointeEnd</tt> inclusive.
141      *
142      * <p>The order of iteration is all code points ranges in sorted
143      * order, followed by all strings sorted order.  Ranges are
144      * disjoint and non-contiguous.  <tt>string</tt> is undefined
145      * unless <tt>codepoint == IS_STRING</tt>.  Do not mix calls to
146      * <tt>next()</tt> and <tt>nextRange()</tt> without calling
147      * <tt>reset()</tt> between them.  The results of doing so are
148      * undefined.
149      *
150      * @return true if there was another element in the set and this
151      * object contains the element.
152      */
nextRange()153     public boolean nextRange() {
154         if (nextElement <= endElement) {
155             codepointEnd = endElement;
156             codepoint = nextElement;
157             nextElement = endElement+1;
158             return true;
159         }
160         if (range < endRange) {
161             loadRange(++range);
162             codepointEnd = endElement;
163             codepoint = nextElement;
164             nextElement = endElement+1;
165             return true;
166         }
167 
168         // stringIterator == null iff there are no string elements remaining
169 
170         if (stringIterator == null) {
171             return false;
172         }
173         codepoint = IS_STRING; // signal that value is actually a string
174         string = stringIterator.next();
175         if (!stringIterator.hasNext()) {
176             stringIterator = null;
177         }
178         return true;
179     }
180 
181     /**
182      * Sets this iterator to visit the elements of the given set and
183      * resets it to the start of that set.  The iterator is valid only
184      * so long as <tt>set</tt> is valid.
185      * @param uset the set to iterate over.
186      */
reset(UnicodeSet uset)187     public void reset(UnicodeSet uset) {
188         set = uset;
189         reset();
190     }
191 
192     /**
193      * Resets this iterator to the start of the set.
194      */
reset()195     public void reset() {
196         endRange = set.getRangeCount() - 1;
197         range = 0;
198         endElement = -1;
199         nextElement = 0;
200         if (endRange >= 0) {
201             loadRange(range);
202         }
203         if (set.hasStrings()) {
204             stringIterator = set.strings.iterator();
205         } else {
206             stringIterator = null;
207         }
208     }
209 
210     /**
211      * Gets the current string from the iterator. Only use after calling next(), not nextRange().
212      */
getString()213     public String getString() {
214         if (codepoint != IS_STRING) {
215             return UTF16.valueOf(codepoint);
216         }
217         return string;
218     }
219 
220     // ======================= PRIVATES ===========================
221 
222     private UnicodeSet set;
223     private int endRange = 0;
224     private int range = 0;
225 
226     /**
227      * @deprecated This API is ICU internal only.
228      * @hide deprecated on icu4j-org
229      * @hide draft / provisional / internal are hidden on OHOS
230      */
231     @Deprecated
getSet()232     public UnicodeSet getSet() {
233         return set;
234     }
235 
236     /**
237      * @deprecated This API is ICU internal only.
238      * @hide deprecated on icu4j-org
239      * @hide draft / provisional / internal are hidden on OHOS
240      */
241     @Deprecated
242     protected int endElement;
243     /**
244      * @deprecated This API is ICU internal only.
245      * @hide deprecated on icu4j-org
246      * @hide draft / provisional / internal are hidden on OHOS
247      */
248     @Deprecated
249     protected int nextElement;
250     private Iterator<String> stringIterator = null;
251 
252     /**
253      * Invariant: stringIterator is null when there are no (more) strings remaining
254      */
255 
256     /**
257      * @deprecated This API is ICU internal only.
258      * @hide deprecated on icu4j-org
259      * @hide draft / provisional / internal are hidden on OHOS
260      */
261     @Deprecated
loadRange(int aRange)262     protected void loadRange(int aRange) {
263         nextElement = set.getRangeStart(aRange);
264         endElement = set.getRangeEnd(aRange);
265     }
266 }
267