1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // © 2016 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html#License 4 /* 5 ******************************************************************************* 6 * Copyright (C) 2012-2014, International Business Machines 7 * Corporation and others. All Rights Reserved. 8 ******************************************************************************* 9 * FCDIterCollationIterator.java, ported from uitercollationiterator.h/.cpp 10 * 11 * C++ version created on: 2012sep23 (from utf16collationiterator.h) 12 * created by: Markus W. Scherer 13 */ 14 15 package ohos.global.icu.impl.coll; 16 17 import ohos.global.icu.impl.Normalizer2Impl; 18 import ohos.global.icu.text.UCharacterIterator; 19 20 /** 21 * Incrementally checks the input text for FCD and normalizes where necessary. 22 * @hide exposed on OHOS 23 */ 24 public final class FCDIterCollationIterator extends IterCollationIterator { FCDIterCollationIterator(CollationData data, boolean numeric, UCharacterIterator ui, int startIndex)25 public FCDIterCollationIterator(CollationData data, boolean numeric, 26 UCharacterIterator ui, int startIndex) { 27 super(data, numeric, ui); 28 state = State.ITER_CHECK_FWD; 29 start = startIndex; 30 nfcImpl = data.nfcImpl; 31 } 32 33 @Override resetToOffset(int newOffset)34 public void resetToOffset(int newOffset) { 35 super.resetToOffset(newOffset); 36 start = newOffset; 37 state = State.ITER_CHECK_FWD; 38 } 39 40 @Override getOffset()41 public int getOffset() { 42 if(state.compareTo(State.ITER_CHECK_BWD) <= 0) { 43 return iter.getIndex(); 44 } else if(state == State.ITER_IN_FCD_SEGMENT) { 45 return pos; 46 } else if(pos == 0) { 47 return start; 48 } else { 49 return limit; 50 } 51 } 52 53 @Override nextCodePoint()54 public int nextCodePoint() { 55 int c; 56 for(;;) { 57 if(state == State.ITER_CHECK_FWD) { 58 c = iter.next(); 59 if(c < 0) { 60 return c; 61 } 62 if(CollationFCD.hasTccc(c)) { 63 if(CollationFCD.maybeTibetanCompositeVowel(c) || 64 CollationFCD.hasLccc(iter.current())) { 65 iter.previous(); 66 if(!nextSegment()) { 67 return Collation.SENTINEL_CP; 68 } 69 continue; 70 } 71 } 72 if(isLeadSurrogate(c)) { 73 int trail = iter.next(); 74 if(isTrailSurrogate(trail)) { 75 return Character.toCodePoint((char)c, (char)trail); 76 } else if(trail >= 0) { 77 iter.previous(); 78 } 79 } 80 return c; 81 } else if(state == State.ITER_IN_FCD_SEGMENT && pos != limit) { 82 c = iter.nextCodePoint(); 83 pos += Character.charCount(c); 84 assert(c >= 0); 85 return c; 86 } else if(state.compareTo(State.IN_NORM_ITER_AT_LIMIT) >= 0 && 87 pos != normalized.length()) { 88 c = normalized.codePointAt(pos); 89 pos += Character.charCount(c); 90 return c; 91 } else { 92 switchToForward(); 93 } 94 } 95 } 96 97 @Override previousCodePoint()98 public int previousCodePoint() { 99 int c; 100 for(;;) { 101 if(state == State.ITER_CHECK_BWD) { 102 c = iter.previous(); 103 if(c < 0) { 104 start = pos = 0; 105 state = State.ITER_IN_FCD_SEGMENT; 106 return Collation.SENTINEL_CP; 107 } 108 if(CollationFCD.hasLccc(c)) { 109 int prev = Collation.SENTINEL_CP; 110 if(CollationFCD.maybeTibetanCompositeVowel(c) || 111 CollationFCD.hasTccc(prev = iter.previous())) { 112 iter.next(); 113 if(prev >= 0) { 114 iter.next(); 115 } 116 if(!previousSegment()) { 117 return Collation.SENTINEL_CP; 118 } 119 continue; 120 } 121 // hasLccc(trail)=true for all trail surrogates 122 if(isTrailSurrogate(c)) { 123 if(prev < 0) { 124 prev = iter.previous(); 125 } 126 if(isLeadSurrogate(prev)) { 127 return Character.toCodePoint((char)prev, (char)c); 128 } 129 } 130 if(prev >= 0) { 131 iter.next(); 132 } 133 } 134 return c; 135 } else if(state == State.ITER_IN_FCD_SEGMENT && pos != start) { 136 c = iter.previousCodePoint(); 137 pos -= Character.charCount(c); 138 assert(c >= 0); 139 return c; 140 } else if(state.compareTo(State.IN_NORM_ITER_AT_LIMIT) >= 0 && pos != 0) { 141 c = normalized.codePointBefore(pos); 142 pos -= Character.charCount(c); 143 return c; 144 } else { 145 switchToBackward(); 146 } 147 } 148 } 149 150 @Override handleNextCE32()151 protected long handleNextCE32() { 152 int c; 153 for(;;) { 154 if(state == State.ITER_CHECK_FWD) { 155 c = iter.next(); 156 if(c < 0) { 157 return NO_CP_AND_CE32; 158 } 159 if(CollationFCD.hasTccc(c)) { 160 if(CollationFCD.maybeTibetanCompositeVowel(c) || 161 CollationFCD.hasLccc(iter.current())) { 162 iter.previous(); 163 if(!nextSegment()) { 164 c = Collation.SENTINEL_CP; 165 return Collation.FALLBACK_CE32; 166 } 167 continue; 168 } 169 } 170 break; 171 } else if(state == State.ITER_IN_FCD_SEGMENT && pos != limit) { 172 c = iter.next(); 173 ++pos; 174 assert(c >= 0); 175 break; 176 } else if(state.compareTo(State.IN_NORM_ITER_AT_LIMIT) >= 0 && 177 pos != normalized.length()) { 178 c = normalized.charAt(pos++); 179 break; 180 } else { 181 switchToForward(); 182 } 183 } 184 return makeCodePointAndCE32Pair(c, trie.getFromU16SingleLead((char)c)); 185 } 186 187 @Override handleGetTrailSurrogate()188 protected char handleGetTrailSurrogate() { 189 if(state.compareTo(State.ITER_IN_FCD_SEGMENT) <= 0) { 190 int trail = iter.next(); 191 if(isTrailSurrogate(trail)) { 192 if(state == State.ITER_IN_FCD_SEGMENT) { ++pos; } 193 } else if(trail >= 0) { 194 iter.previous(); 195 } 196 return (char)trail; 197 } else { 198 assert(pos < normalized.length()); 199 char trail; 200 if(Character.isLowSurrogate(trail = normalized.charAt(pos))) { ++pos; } 201 return trail; 202 } 203 } 204 205 @Override 206 protected void forwardNumCodePoints(int num) { 207 // Specify the class to avoid a virtual-function indirection. 208 // In Java, we would declare this class final. 209 while(num > 0 && nextCodePoint() >= 0) { 210 --num; 211 } 212 } 213 214 @Override backwardNumCodePoints(int num)215 protected void backwardNumCodePoints(int num) { 216 // Specify the class to avoid a virtual-function indirection. 217 // In Java, we would declare this class final. 218 while(num > 0 && previousCodePoint() >= 0) { 219 --num; 220 } 221 } 222 223 /** 224 * Switches to forward checking if possible. 225 */ switchToForward()226 private void switchToForward() { 227 assert(state == State.ITER_CHECK_BWD || 228 (state == State.ITER_IN_FCD_SEGMENT && pos == limit) || 229 (state.compareTo(State.IN_NORM_ITER_AT_LIMIT) >= 0 && pos == normalized.length())); 230 if(state == State.ITER_CHECK_BWD) { 231 // Turn around from backward checking. 232 start = pos = iter.getIndex(); 233 if(pos == limit) { 234 state = State.ITER_CHECK_FWD; // Check forward. 235 } else { // pos < limit 236 state = State.ITER_IN_FCD_SEGMENT; // Stay in FCD segment. 237 } 238 } else { 239 // Reached the end of the FCD segment. 240 if(state == State.ITER_IN_FCD_SEGMENT) { 241 // The input text segment is FCD, extend it forward. 242 } else { 243 // The input text segment needed to be normalized. 244 // Switch to checking forward from it. 245 if(state == State.IN_NORM_ITER_AT_START) { 246 iter.moveIndex(limit - start); 247 } 248 start = limit; 249 } 250 state = State.ITER_CHECK_FWD; 251 } 252 } 253 254 /** 255 * Extends the FCD text segment forward or normalizes around pos. 256 * @return true if success 257 */ nextSegment()258 private boolean nextSegment() { 259 assert(state == State.ITER_CHECK_FWD); 260 // The input text [start..(iter index)[ passes the FCD check. 261 pos = iter.getIndex(); 262 // Collect the characters being checked, in case they need to be normalized. 263 if(s == null) { 264 s = new StringBuilder(); 265 } else { 266 s.setLength(0); 267 } 268 int prevCC = 0; 269 for(;;) { 270 // Fetch the next character and its fcd16 value. 271 int c = iter.nextCodePoint(); 272 if(c < 0) { break; } 273 int fcd16 = nfcImpl.getFCD16(c); 274 int leadCC = fcd16 >> 8; 275 if(leadCC == 0 && s.length() != 0) { 276 // FCD boundary before this character. 277 iter.previousCodePoint(); 278 break; 279 } 280 s.appendCodePoint(c); 281 if(leadCC != 0 && (prevCC > leadCC || CollationFCD.isFCD16OfTibetanCompositeVowel(fcd16))) { 282 // Fails FCD check. Find the next FCD boundary and normalize. 283 for(;;) { 284 c = iter.nextCodePoint(); 285 if(c < 0) { break; } 286 if(nfcImpl.getFCD16(c) <= 0xff) { 287 iter.previousCodePoint(); 288 break; 289 } 290 s.appendCodePoint(c); 291 } 292 normalize(s); 293 start = pos; 294 limit = pos + s.length(); 295 state = State.IN_NORM_ITER_AT_LIMIT; 296 pos = 0; 297 return true; 298 } 299 prevCC = fcd16 & 0xff; 300 if(prevCC == 0) { 301 // FCD boundary after the last character. 302 break; 303 } 304 } 305 limit = pos + s.length(); 306 assert(pos != limit); 307 iter.moveIndex(-s.length()); 308 state = State.ITER_IN_FCD_SEGMENT; 309 return true; 310 } 311 312 /** 313 * Switches to backward checking. 314 */ switchToBackward()315 private void switchToBackward() { 316 assert(state == State.ITER_CHECK_FWD || 317 (state == State.ITER_IN_FCD_SEGMENT && pos == start) || 318 (state.compareTo(State.IN_NORM_ITER_AT_LIMIT) >= 0 && pos == 0)); 319 if(state == State.ITER_CHECK_FWD) { 320 // Turn around from forward checking. 321 limit = pos = iter.getIndex(); 322 if(pos == start) { 323 state = State.ITER_CHECK_BWD; // Check backward. 324 } else { // pos > start 325 state = State.ITER_IN_FCD_SEGMENT; // Stay in FCD segment. 326 } 327 } else { 328 // Reached the start of the FCD segment. 329 if(state == State.ITER_IN_FCD_SEGMENT) { 330 // The input text segment is FCD, extend it backward. 331 } else { 332 // The input text segment needed to be normalized. 333 // Switch to checking backward from it. 334 if(state == State.IN_NORM_ITER_AT_LIMIT) { 335 iter.moveIndex(start - limit); 336 } 337 limit = start; 338 } 339 state = State.ITER_CHECK_BWD; 340 } 341 } 342 343 /** 344 * Extends the FCD text segment backward or normalizes around pos. 345 * @return true if success 346 */ previousSegment()347 private boolean previousSegment() { 348 assert(state == State.ITER_CHECK_BWD); 349 // The input text [(iter index)..limit[ passes the FCD check. 350 pos = iter.getIndex(); 351 // Collect the characters being checked, in case they need to be normalized. 352 if(s == null) { 353 s = new StringBuilder(); 354 } else { 355 s.setLength(0); 356 } 357 int nextCC = 0; 358 for(;;) { 359 // Fetch the previous character and its fcd16 value. 360 int c = iter.previousCodePoint(); 361 if(c < 0) { break; } 362 int fcd16 = nfcImpl.getFCD16(c); 363 int trailCC = fcd16 & 0xff; 364 if(trailCC == 0 && s.length() != 0) { 365 // FCD boundary after this character. 366 iter.nextCodePoint(); 367 break; 368 } 369 s.appendCodePoint(c); 370 if(trailCC != 0 && ((nextCC != 0 && trailCC > nextCC) || 371 CollationFCD.isFCD16OfTibetanCompositeVowel(fcd16))) { 372 // Fails FCD check. Find the previous FCD boundary and normalize. 373 while(fcd16 > 0xff) { 374 c = iter.previousCodePoint(); 375 if(c < 0) { break; } 376 fcd16 = nfcImpl.getFCD16(c); 377 if(fcd16 == 0) { 378 iter.nextCodePoint(); 379 break; 380 } 381 s.appendCodePoint(c); 382 } 383 s.reverse(); 384 normalize(s); 385 limit = pos; 386 start = pos - s.length(); 387 state = State.IN_NORM_ITER_AT_START; 388 pos = normalized.length(); 389 return true; 390 } 391 nextCC = fcd16 >> 8; 392 if(nextCC == 0) { 393 // FCD boundary before the following character. 394 break; 395 } 396 } 397 start = pos - s.length(); 398 assert(pos != start); 399 iter.moveIndex(s.length()); 400 state = State.ITER_IN_FCD_SEGMENT; 401 return true; 402 } 403 normalize(CharSequence s)404 private void normalize(CharSequence s) { 405 if(normalized == null) { 406 normalized = new StringBuilder(); 407 } 408 // NFD without argument checking. 409 nfcImpl.decompose(s, normalized); 410 } 411 412 private enum State { 413 /** 414 * The input text [start..(iter index)[ passes the FCD check. 415 * Moving forward checks incrementally. 416 * pos & limit are undefined. 417 */ 418 ITER_CHECK_FWD, 419 /** 420 * The input text [(iter index)..limit[ passes the FCD check. 421 * Moving backward checks incrementally. 422 * start & pos are undefined. 423 */ 424 ITER_CHECK_BWD, 425 /** 426 * The input text [start..limit[ passes the FCD check. 427 * pos tracks the current text index. 428 */ 429 ITER_IN_FCD_SEGMENT, 430 /** 431 * The input text [start..limit[ failed the FCD check and was normalized. 432 * pos tracks the current index in the normalized string. 433 * The text iterator is at the limit index. 434 */ 435 IN_NORM_ITER_AT_LIMIT, 436 /** 437 * The input text [start..limit[ failed the FCD check and was normalized. 438 * pos tracks the current index in the normalized string. 439 * The text iterator is at the start index. 440 */ 441 IN_NORM_ITER_AT_START 442 } 443 444 private State state; 445 446 private int start; 447 private int pos; 448 private int limit; 449 450 private final Normalizer2Impl nfcImpl; 451 private StringBuilder s; 452 private StringBuilder normalized; 453 } 454