1 /* 2 ******************************************************************************* 3 * Copyright (C) 1996-2016, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ******************************************************************************* 6 */ 7 package com.ibm.icu.text; 8 9 10 import java.text.CharacterIterator; 11 12 import com.ibm.icu.impl.CharacterIteratorWrapper; 13 import com.ibm.icu.impl.ReplaceableUCharacterIterator; 14 import com.ibm.icu.impl.UCharArrayIterator; 15 import com.ibm.icu.impl.UCharacterIteratorWrapper; 16 17 18 /** 19 * Abstract class that defines an API for iteration on text objects.This is an 20 * interface for forward and backward iteration and random access into a text 21 * object. Forward iteration is done with post-increment and backward iteration 22 * is done with pre-decrement semantics, while the 23 * <code>java.text.CharacterIterator</code> interface methods provided forward 24 * iteration with "pre-increment" and backward iteration with pre-decrement 25 * semantics. This API is more efficient for forward iteration over code points. 26 * The other major difference is that this API can do both code unit and code point 27 * iteration, <code>java.text.CharacterIterator</code> can only iterate over 28 * code units and is limited to BMP (0 - 0xFFFF) 29 * @author Ram 30 * @stable ICU 2.4 31 */ 32 public abstract class UCharacterIterator 33 implements Cloneable,UForwardCharacterIterator { 34 35 /** 36 * Protected default constructor for the subclasses 37 * @stable ICU 2.4 38 */ UCharacterIterator()39 protected UCharacterIterator(){ 40 } 41 42 // static final methods ---------------------------------------------------- 43 44 /** 45 * Returns a <code>UCharacterIterator</code> object given a 46 * <code>Replaceable</code> object. 47 * @param source a valid source as a <code>Replaceable</code> object 48 * @return UCharacterIterator object 49 * @exception IllegalArgumentException if the argument is null 50 * @stable ICU 2.4 51 */ getInstance(Replaceable source)52 public static final UCharacterIterator getInstance(Replaceable source){ 53 return new ReplaceableUCharacterIterator(source); 54 } 55 56 /** 57 * Returns a <code>UCharacterIterator</code> object given a 58 * source string. 59 * @param source a string 60 * @return UCharacterIterator object 61 * @exception IllegalArgumentException if the argument is null 62 * @stable ICU 2.4 63 */ getInstance(String source)64 public static final UCharacterIterator getInstance(String source){ 65 return new ReplaceableUCharacterIterator(source); 66 } 67 68 /** 69 * Returns a <code>UCharacterIterator</code> object given a 70 * source character array. 71 * @param source an array of UTF-16 code units 72 * @return UCharacterIterator object 73 * @exception IllegalArgumentException if the argument is null 74 * @stable ICU 2.4 75 */ getInstance(char[] source)76 public static final UCharacterIterator getInstance(char[] source){ 77 return getInstance(source,0,source.length); 78 } 79 80 /** 81 * Returns a <code>UCharacterIterator</code> object given a 82 * source character array. 83 * @param source an array of UTF-16 code units 84 * @return UCharacterIterator object 85 * @exception IllegalArgumentException if the argument is null 86 * @stable ICU 2.4 87 */ getInstance(char[] source, int start, int limit)88 public static final UCharacterIterator getInstance(char[] source, int start, int limit){ 89 return new UCharArrayIterator(source,start,limit); 90 } 91 /** 92 * Returns a <code>UCharacterIterator</code> object given a 93 * source StringBuffer. 94 * @param source an string buffer of UTF-16 code units 95 * @return UCharacterIterator object 96 * @exception IllegalArgumentException if the argument is null 97 * @stable ICU 2.4 98 */ getInstance(StringBuffer source)99 public static final UCharacterIterator getInstance(StringBuffer source){ 100 return new ReplaceableUCharacterIterator(source); 101 } 102 103 /** 104 * Returns a <code>UCharacterIterator</code> object given a 105 * CharacterIterator. 106 * @param source a valid CharacterIterator object. 107 * @return UCharacterIterator object 108 * @exception IllegalArgumentException if the argument is null 109 * @stable ICU 2.4 110 */ getInstance(CharacterIterator source)111 public static final UCharacterIterator getInstance(CharacterIterator source){ 112 return new CharacterIteratorWrapper(source); 113 } 114 115 // public methods ---------------------------------------------------------- 116 /** 117 * Returns a <code>java.text.CharacterIterator</code> object for 118 * the underlying text of this iterator. The returned iterator is 119 * independent of this iterator. 120 * @return java.text.CharacterIterator object 121 * @stable ICU 2.4 122 */ getCharacterIterator()123 public CharacterIterator getCharacterIterator(){ 124 return new UCharacterIteratorWrapper(this); 125 } 126 127 /** 128 * Returns the code unit at the current index. If index is out 129 * of range, returns DONE. Index is not changed. 130 * @return current code unit 131 * @stable ICU 2.4 132 */ current()133 public abstract int current(); 134 135 /** 136 * Returns the codepoint at the current index. 137 * If the current index is invalid, DONE is returned. 138 * If the current index points to a lead surrogate, and there is a following 139 * trail surrogate, then the code point is returned. Otherwise, the code 140 * unit at index is returned. Index is not changed. 141 * @return current codepoint 142 * @stable ICU 2.4 143 */ currentCodePoint()144 public int currentCodePoint(){ 145 int ch = current(); 146 if(UTF16.isLeadSurrogate((char)ch)){ 147 // advance the index to get the 148 // next code point 149 next(); 150 // due to post increment semantics 151 // current() after next() actually 152 // returns the char we want 153 int ch2 = current(); 154 // current should never change 155 // the current index so back off 156 previous(); 157 158 if(UTF16.isTrailSurrogate((char)ch2)){ 159 // we found a surrogate pair 160 // return the codepoint 161 return Character.toCodePoint((char)ch, (char)ch2); 162 } 163 } 164 return ch; 165 } 166 167 /** 168 * Returns the length of the text 169 * @return length of the text 170 * @stable ICU 2.4 171 */ getLength()172 public abstract int getLength(); 173 174 175 /** 176 * Gets the current index in text. 177 * @return current index in text. 178 * @stable ICU 2.4 179 */ getIndex()180 public abstract int getIndex(); 181 182 183 /** 184 * Returns the UTF16 code unit at index, and increments to the next 185 * code unit (post-increment semantics). If index is out of 186 * range, DONE is returned, and the iterator is reset to the limit 187 * of the text. 188 * @return the next UTF16 code unit, or DONE if the index is at the limit 189 * of the text. 190 * @stable ICU 2.4 191 */ next()192 public abstract int next(); 193 194 /** 195 * Returns the code point at index, and increments to the next code 196 * point (post-increment semantics). If index does not point to a 197 * valid surrogate pair, the behavior is the same as 198 * <code>next()</code>. Otherwise the iterator is incremented past 199 * the surrogate pair, and the code point represented by the pair 200 * is returned. 201 * @return the next codepoint in text, or DONE if the index is at 202 * the limit of the text. 203 * @stable ICU 2.4 204 */ nextCodePoint()205 public int nextCodePoint(){ 206 int ch1 = next(); 207 if(UTF16.isLeadSurrogate((char)ch1)){ 208 int ch2 = next(); 209 if(UTF16.isTrailSurrogate((char)ch2)){ 210 return Character.toCodePoint((char)ch1, (char)ch2); 211 }else if (ch2 != DONE) { 212 // unmatched surrogate so back out 213 previous(); 214 } 215 } 216 return ch1; 217 } 218 219 /** 220 * Decrement to the position of the previous code unit in the 221 * text, and return it (pre-decrement semantics). If the 222 * resulting index is less than 0, the index is reset to 0 and 223 * DONE is returned. 224 * @return the previous code unit in the text, or DONE if the new 225 * index is before the start of the text. 226 * @stable ICU 2.4 227 */ previous()228 public abstract int previous(); 229 230 231 /** 232 * Retreat to the start of the previous code point in the text, 233 * and return it (pre-decrement semantics). If the index is not 234 * preceeded by a valid surrogate pair, the behavior is the same 235 * as <code>previous()</code>. Otherwise the iterator is 236 * decremented to the start of the surrogate pair, and the code 237 * point represented by the pair is returned. 238 * @return the previous code point in the text, or DONE if the new 239 * index is before the start of the text. 240 * @stable ICU 2.4 241 */ previousCodePoint()242 public int previousCodePoint(){ 243 int ch1 = previous(); 244 if(UTF16.isTrailSurrogate((char)ch1)){ 245 int ch2 = previous(); 246 if(UTF16.isLeadSurrogate((char)ch2)){ 247 return Character.toCodePoint((char)ch2, (char)ch1); 248 }else if (ch2 != DONE) { 249 //unmatched trail surrogate so back out 250 next(); 251 } 252 } 253 return ch1; 254 } 255 256 /** 257 * Sets the index to the specified index in the text. 258 * @param index the index within the text. 259 * @exception IndexOutOfBoundsException is thrown if an invalid index is 260 * supplied 261 * @stable ICU 2.4 262 */ setIndex(int index)263 public abstract void setIndex(int index); 264 265 /** 266 * Sets the current index to the limit. 267 * @stable ICU 2.4 268 */ setToLimit()269 public void setToLimit() { 270 setIndex(getLength()); 271 } 272 273 /** 274 * Sets the current index to the start. 275 * @stable ICU 2.4 276 */ setToStart()277 public void setToStart() { 278 setIndex(0); 279 } 280 281 /** 282 * Fills the buffer with the underlying text storage of the iterator 283 * If the buffer capacity is not enough a exception is thrown. The capacity 284 * of the fill in buffer should at least be equal to length of text in the 285 * iterator obtained by calling <code>getLength()</code>). 286 * <b>Usage:</b> 287 * 288 * <pre> 289 * UChacterIterator iter = new UCharacterIterator.getInstance(text); 290 * char[] buf = new char[iter.getLength()]; 291 * iter.getText(buf); 292 * 293 * OR 294 * char[] buf= new char[1]; 295 * int len = 0; 296 * for(;;){ 297 * try{ 298 * len = iter.getText(buf); 299 * break; 300 * }catch(IndexOutOfBoundsException e){ 301 * buf = new char[iter.getLength()]; 302 * } 303 * } 304 * </pre> 305 * 306 * @param fillIn an array of chars to fill with the underlying UTF-16 code 307 * units. 308 * @param offset the position within the array to start putting the data. 309 * @return the number of code units added to fillIn, as a convenience 310 * @exception IndexOutOfBoundsException exception if there is not enough 311 * room after offset in the array, or if offset < 0. 312 * @stable ICU 2.4 313 */ getText(char[] fillIn, int offset)314 public abstract int getText(char[] fillIn, int offset); 315 316 /** 317 * Convenience override for <code>getText(char[], int)</code> that provides 318 * an offset of 0. 319 * @param fillIn an array of chars to fill with the underlying UTF-16 code 320 * units. 321 * @return the number of code units added to fillIn, as a convenience 322 * @exception IndexOutOfBoundsException exception if there is not enough 323 * room in the array. 324 * @stable ICU 2.4 325 */ getText(char[] fillIn)326 public final int getText(char[] fillIn) { 327 return getText(fillIn, 0); 328 } 329 330 /** 331 * Convenience method for returning the underlying text storage as as string 332 * @return the underlying text storage in the iterator as a string 333 * @stable ICU 2.4 334 */ getText()335 public String getText() { 336 char[] text = new char[getLength()]; 337 getText(text); 338 return new String(text); 339 } 340 341 /** 342 * Moves the current position by the number of code units 343 * specified, either forward or backward depending on the sign 344 * of delta (positive or negative respectively). If the resulting 345 * index would be less than zero, the index is set to zero, and if 346 * the resulting index would be greater than limit, the index is 347 * set to limit. 348 * 349 * @param delta the number of code units to move the current 350 * index. 351 * @return the new index. 352 * @exception IndexOutOfBoundsException is thrown if an invalid index is 353 * supplied 354 * @stable ICU 2.4 355 * 356 */ moveIndex(int delta)357 public int moveIndex(int delta) { 358 int x = Math.max(0, Math.min(getIndex() + delta, getLength())); 359 setIndex(x); 360 return x; 361 } 362 363 /** 364 * Moves the current position by the number of code points 365 * specified, either forward or backward depending on the sign of 366 * delta (positive or negative respectively). If the current index 367 * is at a trail surrogate then the first adjustment is by code 368 * unit, and the remaining adjustments are by code points. If the 369 * resulting index would be less than zero, the index is set to 370 * zero, and if the resulting index would be greater than limit, 371 * the index is set to limit. 372 * @param delta the number of code units to move the current index. 373 * @return the new index 374 * @exception IndexOutOfBoundsException is thrown if an invalid delta is 375 * supplied 376 * @stable ICU 2.4 377 */ moveCodePointIndex(int delta)378 public int moveCodePointIndex(int delta){ 379 if(delta>0){ 380 while(delta>0 && nextCodePoint() != DONE){delta--;} 381 }else{ 382 while(delta<0 && previousCodePoint() != DONE){delta++;} 383 } 384 if(delta!=0){ 385 throw new IndexOutOfBoundsException(); 386 } 387 388 return getIndex(); 389 } 390 391 /** 392 * Creates a copy of this iterator, independent from other iterators. 393 * If it is not possible to clone the iterator, returns null. 394 * @return copy of this iterator 395 * @stable ICU 2.4 396 */ clone()397 public Object clone() throws CloneNotSupportedException{ 398 return super.clone(); 399 } 400 401 } 402 403