1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // © 2016 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html#License 4 /* 5 ****************************************************************************** 6 * Copyright (C) 1996-2015, International Business Machines Corporation and 7 * others. All Rights Reserved. 8 ****************************************************************************** 9 */ 10 11 package ohos.global.icu.impl; 12 13 import java.util.NoSuchElementException; 14 15 import ohos.global.icu.lang.UCharacter; 16 import ohos.global.icu.text.UTF16; 17 import ohos.global.icu.util.RangeValueIterator; 18 19 /** 20 * <p>Class enabling iteration of the values in a Trie.</p> 21 * 22 * <p>2015-sep-03 TODO: Only used in test code, move there. 23 * 24 * <p>Result of each iteration contains the interval of codepoints that have 25 * the same value type and the value type itself.</p> 26 * <p>The comparison of each codepoint value is done via extract(), which the 27 * default implementation is to return the value as it is.</p> 28 * <p>Method extract() can be overwritten to perform manipulations on 29 * codepoint values in order to perform specialized comparison.</p> 30 * <p>TrieIterator is designed to be a generic iterator for the CharTrie 31 * and the IntTrie, hence to accommodate both types of data, the return 32 * result will be in terms of int (32 bit) values.</p> 33 * <p>See ohos.global.icu.text.UCharacterTypeIterator for examples of use.</p> 34 * <p>Notes for porting utrie_enum from icu4c to icu4j:<br> 35 * Internally, icu4c's utrie_enum performs all iterations in its body. In Java 36 * sense, the caller will have to pass a object with a callback function 37 * UTrieEnumRange(const void *context, UChar32 start, UChar32 limit, 38 * uint32_t value) into utrie_enum. utrie_enum will then find ranges of 39 * codepoints with the same value as determined by 40 * UTrieEnumValue(const void *context, uint32_t value). for each range, 41 * utrie_enum calls the callback function to perform a task. In this way, 42 * icu4c performs the iteration within utrie_enum. 43 * To follow the JDK model, icu4j is slightly different from icu4c. 44 * Instead of requesting the caller to implement an object for a callback. 45 * The caller will have to implement a subclass of TrieIterator, fleshing out 46 * the method extract(int) (equivalent to UTrieEnumValue). Independent of icu4j, 47 * the caller will have to code his own iteration and flesh out the task 48 * (equivalent to UTrieEnumRange) to be performed in the iteration loop. 49 * </p> 50 * <p>There are basically 3 usage scenarios for porting:</p> 51 * <p>1) UTrieEnumValue is the only implemented callback then just implement a 52 * subclass of TrieIterator and override the extract(int) method. The 53 * extract(int) method is analogus to UTrieEnumValue callback. 54 * </p> 55 * <p>2) UTrieEnumValue and UTrieEnumRange both are implemented then implement 56 * a subclass of TrieIterator, override the extract method and iterate, e.g 57 * </p> 58 * <p>utrie_enum(&normTrie, _enumPropertyStartsValue, _enumPropertyStartsRange, 59 * set);<br> 60 * In Java :<br> 61 * <pre> 62 * class TrieIteratorImpl extends TrieIterator{ 63 * public TrieIteratorImpl(Trie data){ 64 * super(data); 65 * } 66 * public int extract(int value){ 67 * // port the implementation of _enumPropertyStartsValue here 68 * } 69 * } 70 * .... 71 * TrieIterator fcdIter = new TrieIteratorImpl(fcdTrieImpl.fcdTrie); 72 * while(fcdIter.next(result)) { 73 * // port the implementation of _enumPropertyStartsRange 74 * } 75 * </pre> 76 * </p> 77 * <p>3) UTrieEnumRange is the only implemented callback then just implement 78 * the while loop, when utrie_enum is called 79 * <pre> 80 * // utrie_enum(&fcdTrie, NULL, _enumPropertyStartsRange, set); 81 * TrieIterator fcdIter = new TrieIterator(fcdTrieImpl.fcdTrie); 82 * while(fcdIter.next(result)){ 83 * set.add(result.start); 84 * } 85 * </p> 86 * @author synwee 87 * @see ohos.global.icu.impl.Trie 88 * @hide exposed on OHOS 89 */ 90 public class TrieIterator implements RangeValueIterator 91 92 { 93 // public constructor --------------------------------------------- 94 95 /** 96 * TrieEnumeration constructor 97 * @param trie to be used 98 * @exception IllegalArgumentException throw when argument is null. 99 */ TrieIterator(Trie trie)100 public TrieIterator(Trie trie) 101 { 102 if (trie == null) { 103 throw new IllegalArgumentException( 104 "Argument trie cannot be null"); 105 } 106 m_trie_ = trie; 107 // synwee: check that extract belongs to the child class 108 m_initialValue_ = extract(m_trie_.getInitialValue()); 109 reset(); 110 } 111 112 // public methods ------------------------------------------------- 113 114 /** 115 * <p>Returns true if we are not at the end of the iteration, false 116 * otherwise.</p> 117 * <p>The next set of codepoints with the same value type will be 118 * calculated during this call and returned in the arguement element.</p> 119 * @param element return result 120 * @return true if we are not at the end of the iteration, false otherwise. 121 * @exception NoSuchElementException - if no more elements exist. 122 * @see ohos.global.icu.util.RangeValueIterator.Element 123 */ 124 @Override next(Element element)125 public final boolean next(Element element) 126 { 127 if (m_nextCodepoint_ > UCharacter.MAX_VALUE) { 128 return false; 129 } 130 if (m_nextCodepoint_ < UCharacter.SUPPLEMENTARY_MIN_VALUE && 131 calculateNextBMPElement(element)) { 132 return true; 133 } 134 calculateNextSupplementaryElement(element); 135 return true; 136 } 137 138 /** 139 * Resets the iterator to the beginning of the iteration 140 */ 141 @Override reset()142 public final void reset() 143 { 144 m_currentCodepoint_ = 0; 145 m_nextCodepoint_ = 0; 146 m_nextIndex_ = 0; 147 m_nextBlock_ = m_trie_.m_index_[0] << Trie.INDEX_STAGE_2_SHIFT_; 148 if (m_nextBlock_ == m_trie_.m_dataOffset_) { 149 m_nextValue_ = m_initialValue_; 150 } 151 else { 152 m_nextValue_ = extract(m_trie_.getValue(m_nextBlock_)); 153 } 154 m_nextBlockIndex_ = 0; 155 m_nextTrailIndexOffset_ = TRAIL_SURROGATE_INDEX_BLOCK_LENGTH_; 156 } 157 158 // protected methods ---------------------------------------------- 159 160 /** 161 * Called by next() to extracts a 32 bit value from a trie value 162 * used for comparison. 163 * This method is to be overwritten if special manipulation is to be done 164 * to retrieve a relevant comparison. 165 * The default function is to return the value as it is. 166 * @param value a value from the trie 167 * @return extracted value 168 */ extract(int value)169 protected int extract(int value) 170 { 171 return value; 172 } 173 174 // private methods ------------------------------------------------ 175 176 /** 177 * Set the result values 178 * @param element return result object 179 * @param start codepoint of range 180 * @param limit (end + 1) codepoint of range 181 * @param value common value of range 182 */ setResult(Element element, int start, int limit, int value)183 private final void setResult(Element element, int start, int limit, 184 int value) 185 { 186 element.start = start; 187 element.limit = limit; 188 element.value = value; 189 } 190 191 /** 192 * Finding the next element. 193 * This method is called just before returning the result of 194 * next(). 195 * We always store the next element before it is requested. 196 * In the case that we have to continue calculations into the 197 * supplementary planes, a false will be returned. 198 * @param element return result object 199 * @return true if the next range is found, false if we have to proceed to 200 * the supplementary range. 201 */ calculateNextBMPElement(Element element)202 private final boolean calculateNextBMPElement(Element element) 203 { 204 int currentValue = m_nextValue_; 205 m_currentCodepoint_ = m_nextCodepoint_; 206 m_nextCodepoint_ ++; 207 m_nextBlockIndex_ ++; 208 if (!checkBlockDetail(currentValue)) { 209 setResult(element, m_currentCodepoint_, m_nextCodepoint_, 210 currentValue); 211 return true; 212 } 213 // synwee check that next block index == 0 here 214 // enumerate BMP - the main loop enumerates data blocks 215 while (m_nextCodepoint_ < UCharacter.SUPPLEMENTARY_MIN_VALUE) { 216 // because of the way the character is split to form the index 217 // the lead surrogate and trail surrogate can not be in the 218 // mid of a block 219 if (m_nextCodepoint_ == LEAD_SURROGATE_MIN_VALUE_) { 220 // skip lead surrogate code units, 221 // go to lead surrogate codepoints 222 m_nextIndex_ = BMP_INDEX_LENGTH_; 223 } 224 else if (m_nextCodepoint_ == TRAIL_SURROGATE_MIN_VALUE_) { 225 // go back to regular BMP code points 226 m_nextIndex_ = m_nextCodepoint_ >> Trie.INDEX_STAGE_1_SHIFT_; 227 } else { 228 m_nextIndex_ ++; 229 } 230 231 m_nextBlockIndex_ = 0; 232 if (!checkBlock(currentValue)) { 233 setResult(element, m_currentCodepoint_, m_nextCodepoint_, 234 currentValue); 235 return true; 236 } 237 } 238 m_nextCodepoint_ --; // step one back since this value has not been 239 m_nextBlockIndex_ --; // retrieved yet. 240 return false; 241 } 242 243 /** 244 * Finds the next supplementary element. 245 * For each entry in the trie, the value to be delivered is passed through 246 * extract(). 247 * We always store the next element before it is requested. 248 * Called after calculateNextBMP() completes its round of BMP characters. 249 * There is a slight difference in the usage of m_currentCodepoint_ 250 * here as compared to calculateNextBMP(). Though both represents the 251 * lower bound of the next element, in calculateNextBMP() it gets set 252 * at the start of any loop, where-else, in calculateNextSupplementary() 253 * since m_currentCodepoint_ already contains the lower bound of the 254 * next element (passed down from calculateNextBMP()), we keep it till 255 * the end before resetting it to the new value. 256 * Note, if there are no more iterations, it will never get to here. 257 * Blocked out by next(). 258 * @param element return result object 259 */ calculateNextSupplementaryElement(Element element)260 private final void calculateNextSupplementaryElement(Element element) 261 { 262 int currentValue = m_nextValue_; 263 m_nextCodepoint_ ++; 264 m_nextBlockIndex_ ++; 265 266 if (UTF16.getTrailSurrogate(m_nextCodepoint_) 267 != UTF16.TRAIL_SURROGATE_MIN_VALUE) { 268 // this piece is only called when we are in the middle of a lead 269 // surrogate block 270 if (!checkNullNextTrailIndex() && !checkBlockDetail(currentValue)) { 271 setResult(element, m_currentCodepoint_, m_nextCodepoint_, 272 currentValue); 273 m_currentCodepoint_ = m_nextCodepoint_; 274 return; 275 } 276 // we have cleared one block 277 m_nextIndex_ ++; 278 m_nextTrailIndexOffset_ ++; 279 if (!checkTrailBlock(currentValue)) { 280 setResult(element, m_currentCodepoint_, m_nextCodepoint_, 281 currentValue); 282 m_currentCodepoint_ = m_nextCodepoint_; 283 return; 284 } 285 } 286 int nextLead = UTF16.getLeadSurrogate(m_nextCodepoint_); 287 // enumerate supplementary code points 288 while (nextLead < TRAIL_SURROGATE_MIN_VALUE_) { 289 // lead surrogate access 290 final int leadBlock = 291 m_trie_.m_index_[nextLead >> Trie.INDEX_STAGE_1_SHIFT_] << 292 Trie.INDEX_STAGE_2_SHIFT_; 293 if (leadBlock == m_trie_.m_dataOffset_) { 294 // no entries for a whole block of lead surrogates 295 if (currentValue != m_initialValue_) { 296 m_nextValue_ = m_initialValue_; 297 m_nextBlock_ = leadBlock; // == m_trie_.m_dataOffset_ 298 m_nextBlockIndex_ = 0; 299 setResult(element, m_currentCodepoint_, m_nextCodepoint_, 300 currentValue); 301 m_currentCodepoint_ = m_nextCodepoint_; 302 return; 303 } 304 305 nextLead += DATA_BLOCK_LENGTH_; 306 // number of total affected supplementary codepoints in one 307 // block 308 // this is not a simple addition of 309 // DATA_BLOCK_SUPPLEMENTARY_LENGTH since we need to consider 310 // that we might have moved some of the codepoints 311 m_nextCodepoint_ = Character.toCodePoint((char)nextLead, (char)UTF16.TRAIL_SURROGATE_MIN_VALUE); 312 continue; 313 } 314 if (m_trie_.m_dataManipulate_ == null) { 315 throw new NullPointerException( 316 "The field DataManipulate in this Trie is null"); 317 } 318 // enumerate trail surrogates for this lead surrogate 319 m_nextIndex_ = m_trie_.m_dataManipulate_.getFoldingOffset( 320 m_trie_.getValue(leadBlock + 321 (nextLead & Trie.INDEX_STAGE_3_MASK_))); 322 if (m_nextIndex_ <= 0) { 323 // no data for this lead surrogate 324 if (currentValue != m_initialValue_) { 325 m_nextValue_ = m_initialValue_; 326 m_nextBlock_ = m_trie_.m_dataOffset_; 327 m_nextBlockIndex_ = 0; 328 setResult(element, m_currentCodepoint_, m_nextCodepoint_, 329 currentValue); 330 m_currentCodepoint_ = m_nextCodepoint_; 331 return; 332 } 333 m_nextCodepoint_ += TRAIL_SURROGATE_COUNT_; 334 } else { 335 m_nextTrailIndexOffset_ = 0; 336 if (!checkTrailBlock(currentValue)) { 337 setResult(element, m_currentCodepoint_, m_nextCodepoint_, 338 currentValue); 339 m_currentCodepoint_ = m_nextCodepoint_; 340 return; 341 } 342 } 343 nextLead ++; 344 } 345 346 // deliver last range 347 setResult(element, m_currentCodepoint_, UCharacter.MAX_VALUE + 1, 348 currentValue); 349 } 350 351 /** 352 * Internal block value calculations 353 * Performs calculations on a data block to find codepoints in m_nextBlock_ 354 * after the index m_nextBlockIndex_ that has the same value. 355 * Note m_*_ variables at this point is the next codepoint whose value 356 * has not been calculated. 357 * But when returned with false, it will be the last codepoint whose 358 * value has been calculated. 359 * @param currentValue the value which other codepoints are tested against 360 * @return true if the whole block has the same value as currentValue or if 361 * the whole block has been calculated, false otherwise. 362 */ checkBlockDetail(int currentValue)363 private final boolean checkBlockDetail(int currentValue) 364 { 365 while (m_nextBlockIndex_ < DATA_BLOCK_LENGTH_) { 366 m_nextValue_ = extract(m_trie_.getValue(m_nextBlock_ + 367 m_nextBlockIndex_)); 368 if (m_nextValue_ != currentValue) { 369 return false; 370 } 371 ++ m_nextBlockIndex_; 372 ++ m_nextCodepoint_; 373 } 374 return true; 375 } 376 377 /** 378 * Internal block value calculations 379 * Performs calculations on a data block to find codepoints in m_nextBlock_ 380 * that has the same value. 381 * Will call checkBlockDetail() if highlevel check fails. 382 * Note m_*_ variables at this point is the next codepoint whose value 383 * has not been calculated. 384 * @param currentBlock the initial block containing all currentValue 385 * @param currentValue the value which other codepoints are tested against 386 * @return true if the whole block has the same value as currentValue or if 387 * the whole block has been calculated, false otherwise. 388 */ checkBlock(int currentValue)389 private final boolean checkBlock(int currentValue) 390 { 391 int currentBlock = m_nextBlock_; 392 m_nextBlock_ = m_trie_.m_index_[m_nextIndex_] << 393 Trie.INDEX_STAGE_2_SHIFT_; 394 if (m_nextBlock_ == currentBlock && 395 (m_nextCodepoint_ - m_currentCodepoint_) >= DATA_BLOCK_LENGTH_) { 396 // the block is the same as the previous one, filled with 397 // currentValue 398 m_nextCodepoint_ += DATA_BLOCK_LENGTH_; 399 } 400 else if (m_nextBlock_ == m_trie_.m_dataOffset_) { 401 // this is the all-initial-value block 402 if (currentValue != m_initialValue_) { 403 m_nextValue_ = m_initialValue_; 404 m_nextBlockIndex_ = 0; 405 return false; 406 } 407 m_nextCodepoint_ += DATA_BLOCK_LENGTH_; 408 } 409 else { 410 if (!checkBlockDetail(currentValue)) { 411 return false; 412 } 413 } 414 return true; 415 } 416 417 /** 418 * Internal block value calculations 419 * Performs calculations on multiple data blocks for a set of trail 420 * surrogates to find codepoints in m_nextBlock_ that has the same value. 421 * Will call checkBlock() for internal block checks. 422 * Note m_*_ variables at this point is the next codepoint whose value 423 * has not been calculated. 424 * @param currentValue the value which other codepoints are tested against 425 * @return true if the whole block has the same value as currentValue or if 426 * the whole block has been calculated, false otherwise. 427 */ checkTrailBlock(int currentValue)428 private final boolean checkTrailBlock(int currentValue) 429 { 430 // enumerate code points for this lead surrogate 431 while (m_nextTrailIndexOffset_ < TRAIL_SURROGATE_INDEX_BLOCK_LENGTH_) 432 { 433 // if we ever reach here, we are at the start of a new block 434 m_nextBlockIndex_ = 0; 435 // copy of most of the body of the BMP loop 436 if (!checkBlock(currentValue)) { 437 return false; 438 } 439 m_nextTrailIndexOffset_ ++; 440 m_nextIndex_ ++; 441 } 442 return true; 443 } 444 445 /** 446 * Checks if we are beginning at the start of a initial block. 447 * If we are then the rest of the codepoints in this initial block 448 * has the same values. 449 * We increment m_nextCodepoint_ and relevant data members if so. 450 * This is used only in for the supplementary codepoints because 451 * the offset to the trail indexes could be 0. 452 * @return true if we are at the start of a initial block. 453 */ checkNullNextTrailIndex()454 private final boolean checkNullNextTrailIndex() 455 { 456 if (m_nextIndex_ <= 0) { 457 m_nextCodepoint_ += TRAIL_SURROGATE_COUNT_ - 1; 458 int nextLead = UTF16.getLeadSurrogate(m_nextCodepoint_); 459 int leadBlock = 460 m_trie_.m_index_[nextLead >> Trie.INDEX_STAGE_1_SHIFT_] << 461 Trie.INDEX_STAGE_2_SHIFT_; 462 if (m_trie_.m_dataManipulate_ == null) { 463 throw new NullPointerException( 464 "The field DataManipulate in this Trie is null"); 465 } 466 m_nextIndex_ = m_trie_.m_dataManipulate_.getFoldingOffset( 467 m_trie_.getValue(leadBlock + 468 (nextLead & Trie.INDEX_STAGE_3_MASK_))); 469 m_nextIndex_ --; 470 m_nextBlockIndex_ = DATA_BLOCK_LENGTH_; 471 return true; 472 } 473 return false; 474 } 475 476 // private data members -------------------------------------------- 477 478 /** 479 * Size of the stage 1 BMP indexes 480 */ 481 private static final int BMP_INDEX_LENGTH_ = 482 0x10000 >> Trie.INDEX_STAGE_1_SHIFT_; 483 /** 484 * Lead surrogate minimum value 485 */ 486 private static final int LEAD_SURROGATE_MIN_VALUE_ = 0xD800; 487 /** 488 * Trail surrogate minimum value 489 */ 490 private static final int TRAIL_SURROGATE_MIN_VALUE_ = 0xDC00; 491 /* 492 * Trail surrogate maximum value 493 */ 494 //private static final int TRAIL_SURROGATE_MAX_VALUE_ = 0xDFFF; 495 /** 496 * Number of trail surrogate 497 */ 498 private static final int TRAIL_SURROGATE_COUNT_ = 0x400; 499 /** 500 * Number of stage 1 indexes for supplementary calculations that maps to 501 * each lead surrogate character. 502 * See second pass into getRawOffset for the trail surrogate character. 503 * 10 for significant number of bits for trail surrogates, 5 for what we 504 * discard during shifting. 505 */ 506 private static final int TRAIL_SURROGATE_INDEX_BLOCK_LENGTH_ = 507 1 << (10 - Trie.INDEX_STAGE_1_SHIFT_); 508 /** 509 * Number of data values in a stage 2 (data array) block. 510 */ 511 private static final int DATA_BLOCK_LENGTH_ = 512 1 << Trie.INDEX_STAGE_1_SHIFT_; 513 // /** 514 // * Number of codepoints in a stage 2 block 515 // */ 516 // private static final int DATA_BLOCK_SUPPLEMENTARY_LENGTH_ = 517 // DATA_BLOCK_LENGTH_ << 10; 518 /** 519 * Trie instance 520 */ 521 private Trie m_trie_; 522 /** 523 * Initial value for trie values 524 */ 525 private int m_initialValue_; 526 /** 527 * Next element results and data. 528 */ 529 private int m_currentCodepoint_; 530 private int m_nextCodepoint_; 531 private int m_nextValue_; 532 private int m_nextIndex_; 533 private int m_nextBlock_; 534 private int m_nextBlockIndex_; 535 private int m_nextTrailIndexOffset_; 536 } 537