• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *******************************************************************************
3  * Copyright (C) 1996-2016, International Business Machines Corporation and
4  * others. All Rights Reserved.
5  *******************************************************************************
6  */
7 package com.ibm.icu.text;
8 
9 
10 import java.text.CharacterIterator;
11 
12 import com.ibm.icu.impl.CharacterIteratorWrapper;
13 import com.ibm.icu.impl.ReplaceableUCharacterIterator;
14 import com.ibm.icu.impl.UCharArrayIterator;
15 import com.ibm.icu.impl.UCharacterIteratorWrapper;
16 
17 
18 /**
19  * Abstract class that defines an API for iteration on text objects.This is an
20  * interface for forward and backward iteration and random access into a text
21  * object. Forward iteration is done with post-increment and backward iteration
22  * is done with pre-decrement semantics, while the
23  * <code>java.text.CharacterIterator</code> interface methods provided forward
24  * iteration with "pre-increment" and backward iteration with pre-decrement
25  * semantics. This API is more efficient for forward iteration over code points.
26  * The other major difference is that this API can do both code unit and code point
27  * iteration, <code>java.text.CharacterIterator</code> can only iterate over
28  * code units and is limited to BMP (0 - 0xFFFF)
29  * @author Ram
30  * @stable ICU 2.4
31  */
32 public abstract class UCharacterIterator
33                       implements Cloneable,UForwardCharacterIterator {
34 
35     /**
36      * Protected default constructor for the subclasses
37      * @stable ICU 2.4
38      */
UCharacterIterator()39     protected UCharacterIterator(){
40     }
41 
42     // static final methods ----------------------------------------------------
43 
44     /**
45      * Returns a <code>UCharacterIterator</code> object given a
46      * <code>Replaceable</code> object.
47      * @param source a valid source as a <code>Replaceable</code> object
48      * @return UCharacterIterator object
49      * @exception IllegalArgumentException if the argument is null
50      * @stable ICU 2.4
51      */
getInstance(Replaceable source)52     public static final UCharacterIterator getInstance(Replaceable source){
53         return new ReplaceableUCharacterIterator(source);
54     }
55 
56     /**
57      * Returns a <code>UCharacterIterator</code> object given a
58      * source string.
59      * @param source a string
60      * @return UCharacterIterator object
61      * @exception IllegalArgumentException if the argument is null
62      * @stable ICU 2.4
63      */
getInstance(String source)64     public static final UCharacterIterator getInstance(String source){
65         return new ReplaceableUCharacterIterator(source);
66     }
67 
68     /**
69      * Returns a <code>UCharacterIterator</code> object given a
70      * source character array.
71      * @param source an array of UTF-16 code units
72      * @return UCharacterIterator object
73      * @exception IllegalArgumentException if the argument is null
74      * @stable ICU 2.4
75      */
getInstance(char[] source)76     public static final UCharacterIterator getInstance(char[] source){
77         return getInstance(source,0,source.length);
78     }
79 
80     /**
81      * Returns a <code>UCharacterIterator</code> object given a
82      * source character array.
83      * @param source an array of UTF-16 code units
84      * @return UCharacterIterator object
85      * @exception IllegalArgumentException if the argument is null
86      * @stable ICU 2.4
87      */
getInstance(char[] source, int start, int limit)88     public static final UCharacterIterator getInstance(char[] source, int start, int limit){
89         return new UCharArrayIterator(source,start,limit);
90     }
91     /**
92      * Returns a <code>UCharacterIterator</code> object given a
93      * source StringBuffer.
94      * @param source an string buffer of UTF-16 code units
95      * @return UCharacterIterator object
96      * @exception IllegalArgumentException if the argument is null
97      * @stable ICU 2.4
98      */
getInstance(StringBuffer source)99     public static final UCharacterIterator getInstance(StringBuffer source){
100         return new ReplaceableUCharacterIterator(source);
101     }
102 
103     /**
104      * Returns a <code>UCharacterIterator</code> object given a
105      * CharacterIterator.
106      * @param source a valid CharacterIterator object.
107      * @return UCharacterIterator object
108      * @exception IllegalArgumentException if the argument is null
109      * @stable ICU 2.4
110      */
getInstance(CharacterIterator source)111     public static final UCharacterIterator getInstance(CharacterIterator source){
112         return new CharacterIteratorWrapper(source);
113     }
114 
115     // public methods ----------------------------------------------------------
116     /**
117      * Returns a <code>java.text.CharacterIterator</code> object for
118      * the underlying text of this iterator.  The returned iterator is
119      * independent of this iterator.
120      * @return java.text.CharacterIterator object
121      * @stable ICU 2.4
122      */
getCharacterIterator()123     public CharacterIterator getCharacterIterator(){
124         return new UCharacterIteratorWrapper(this);
125     }
126 
127     /**
128      * Returns the code unit at the current index.  If index is out
129      * of range, returns DONE.  Index is not changed.
130      * @return current code unit
131      * @stable ICU 2.4
132      */
current()133     public abstract int current();
134 
135     /**
136      * Returns the codepoint at the current index.
137      * If the current index is invalid, DONE is returned.
138      * If the current index points to a lead surrogate, and there is a following
139      * trail surrogate, then the code point is returned.  Otherwise, the code
140      * unit at index is returned.  Index is not changed.
141      * @return current codepoint
142      * @stable ICU 2.4
143      */
currentCodePoint()144     public int currentCodePoint(){
145         int ch = current();
146         if(UTF16.isLeadSurrogate((char)ch)){
147             // advance the index to get the
148             // next code point
149             next();
150             // due to post increment semantics
151             // current() after next() actually
152             // returns the char we want
153             int ch2 = current();
154             // current should never change
155             // the current index so back off
156             previous();
157 
158             if(UTF16.isTrailSurrogate((char)ch2)){
159                 // we found a surrogate pair
160                 // return the codepoint
161                 return Character.toCodePoint((char)ch, (char)ch2);
162             }
163         }
164         return ch;
165     }
166 
167     /**
168      * Returns the length of the text
169      * @return length of the text
170      * @stable ICU 2.4
171      */
getLength()172     public abstract int getLength();
173 
174 
175     /**
176      * Gets the current index in text.
177      * @return current index in text.
178      * @stable ICU 2.4
179      */
getIndex()180     public abstract int getIndex();
181 
182 
183     /**
184      * Returns the UTF16 code unit at index, and increments to the next
185      * code unit (post-increment semantics).  If index is out of
186      * range, DONE is returned, and the iterator is reset to the limit
187      * of the text.
188      * @return the next UTF16 code unit, or DONE if the index is at the limit
189      *         of the text.
190      * @stable ICU 2.4
191      */
next()192     public abstract int next();
193 
194     /**
195      * Returns the code point at index, and increments to the next code
196      * point (post-increment semantics).  If index does not point to a
197      * valid surrogate pair, the behavior is the same as
198      * <code>next()</code>.  Otherwise the iterator is incremented past
199      * the surrogate pair, and the code point represented by the pair
200      * is returned.
201      * @return the next codepoint in text, or DONE if the index is at
202      *         the limit of the text.
203      * @stable ICU 2.4
204      */
nextCodePoint()205     public int nextCodePoint(){
206         int ch1 = next();
207         if(UTF16.isLeadSurrogate((char)ch1)){
208             int ch2 = next();
209             if(UTF16.isTrailSurrogate((char)ch2)){
210                 return Character.toCodePoint((char)ch1, (char)ch2);
211             }else if (ch2 != DONE) {
212                 // unmatched surrogate so back out
213                 previous();
214             }
215         }
216         return ch1;
217     }
218 
219     /**
220      * Decrement to the position of the previous code unit in the
221      * text, and return it (pre-decrement semantics).  If the
222      * resulting index is less than 0, the index is reset to 0 and
223      * DONE is returned.
224      * @return the previous code unit in the text, or DONE if the new
225      *         index is before the start of the text.
226      * @stable ICU 2.4
227      */
previous()228     public abstract int previous();
229 
230 
231     /**
232      * Retreat to the start of the previous code point in the text,
233      * and return it (pre-decrement semantics).  If the index is not
234      * preceeded by a valid surrogate pair, the behavior is the same
235      * as <code>previous()</code>.  Otherwise the iterator is
236      * decremented to the start of the surrogate pair, and the code
237      * point represented by the pair is returned.
238      * @return the previous code point in the text, or DONE if the new
239      *         index is before the start of the text.
240      * @stable ICU 2.4
241      */
previousCodePoint()242     public int previousCodePoint(){
243         int ch1 = previous();
244         if(UTF16.isTrailSurrogate((char)ch1)){
245             int ch2 = previous();
246             if(UTF16.isLeadSurrogate((char)ch2)){
247                 return Character.toCodePoint((char)ch2, (char)ch1);
248             }else if (ch2 != DONE) {
249                 //unmatched trail surrogate so back out
250                 next();
251             }
252         }
253         return ch1;
254     }
255 
256     /**
257      * Sets the index to the specified index in the text.
258      * @param index the index within the text.
259      * @exception IndexOutOfBoundsException is thrown if an invalid index is
260      *            supplied
261      * @stable ICU 2.4
262      */
setIndex(int index)263     public abstract void setIndex(int index);
264 
265     /**
266      * Sets the current index to the limit.
267      * @stable ICU 2.4
268      */
setToLimit()269     public void setToLimit() {
270         setIndex(getLength());
271     }
272 
273     /**
274      * Sets the current index to the start.
275      * @stable ICU 2.4
276      */
setToStart()277     public void setToStart() {
278         setIndex(0);
279     }
280 
281     /**
282      * Fills the buffer with the underlying text storage of the iterator
283      * If the buffer capacity is not enough a exception is thrown. The capacity
284      * of the fill in buffer should at least be equal to length of text in the
285      * iterator obtained by calling <code>getLength()</code>).
286      * <b>Usage:</b>
287      *
288      * <pre>
289      *         UChacterIterator iter = new UCharacterIterator.getInstance(text);
290      *         char[] buf = new char[iter.getLength()];
291      *         iter.getText(buf);
292      *
293      *         OR
294      *         char[] buf= new char[1];
295      *         int len = 0;
296      *         for(;;){
297      *             try{
298      *                 len = iter.getText(buf);
299      *                 break;
300      *             }catch(IndexOutOfBoundsException e){
301      *                 buf = new char[iter.getLength()];
302      *             }
303      *         }
304      * </pre>
305      *
306      * @param fillIn an array of chars to fill with the underlying UTF-16 code
307      *         units.
308      * @param offset the position within the array to start putting the data.
309      * @return the number of code units added to fillIn, as a convenience
310      * @exception IndexOutOfBoundsException exception if there is not enough
311      *            room after offset in the array, or if offset &lt; 0.
312      * @stable ICU 2.4
313      */
getText(char[] fillIn, int offset)314     public abstract int getText(char[] fillIn, int offset);
315 
316     /**
317      * Convenience override for <code>getText(char[], int)</code> that provides
318      * an offset of 0.
319      * @param fillIn an array of chars to fill with the underlying UTF-16 code
320      *         units.
321      * @return the number of code units added to fillIn, as a convenience
322      * @exception IndexOutOfBoundsException exception if there is not enough
323      *            room in the array.
324      * @stable ICU 2.4
325      */
getText(char[] fillIn)326     public final int getText(char[] fillIn) {
327         return getText(fillIn, 0);
328     }
329 
330     /**
331      * Convenience method for returning the underlying text storage as as string
332      * @return the underlying text storage in the iterator as a string
333      * @stable ICU 2.4
334      */
getText()335     public String getText() {
336         char[] text = new char[getLength()];
337         getText(text);
338         return new String(text);
339     }
340 
341     /**
342      * Moves the current position by the number of code units
343      * specified, either forward or backward depending on the sign
344      * of delta (positive or negative respectively).  If the resulting
345      * index would be less than zero, the index is set to zero, and if
346      * the resulting index would be greater than limit, the index is
347      * set to limit.
348      *
349      * @param delta the number of code units to move the current
350      *              index.
351      * @return the new index.
352      * @exception IndexOutOfBoundsException is thrown if an invalid index is
353      *            supplied
354      * @stable ICU 2.4
355      *
356      */
moveIndex(int delta)357     public int moveIndex(int delta) {
358         int x = Math.max(0, Math.min(getIndex() + delta, getLength()));
359         setIndex(x);
360         return x;
361     }
362 
363     /**
364      * Moves the current position by the number of code points
365      * specified, either forward or backward depending on the sign of
366      * delta (positive or negative respectively). If the current index
367      * is at a trail surrogate then the first adjustment is by code
368      * unit, and the remaining adjustments are by code points.  If the
369      * resulting index would be less than zero, the index is set to
370      * zero, and if the resulting index would be greater than limit,
371      * the index is set to limit.
372      * @param delta the number of code units to move the current index.
373      * @return the new index
374      * @exception IndexOutOfBoundsException is thrown if an invalid delta is
375      *            supplied
376      * @stable ICU 2.4
377      */
moveCodePointIndex(int delta)378     public int moveCodePointIndex(int delta){
379         if(delta>0){
380             while(delta>0 && nextCodePoint() != DONE){delta--;}
381         }else{
382             while(delta<0 && previousCodePoint() != DONE){delta++;}
383         }
384         if(delta!=0){
385             throw new IndexOutOfBoundsException();
386         }
387 
388         return getIndex();
389     }
390 
391     /**
392      * Creates a copy of this iterator, independent from other iterators.
393      * If it is not possible to clone the iterator, returns null.
394      * @return copy of this iterator
395      * @stable ICU 2.4
396      */
clone()397     public Object clone() throws CloneNotSupportedException{
398         return super.clone();
399     }
400 
401 }
402 
403