1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // © 2016 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html#License 4 /* 5 ******************************************************************************* 6 * Copyright (C) 2010-2014, International Business Machines 7 * Corporation and others. All Rights Reserved. 8 ******************************************************************************* 9 * FCDUTF16CollationIterator.java, ported from utf16collationiterator.h/.cpp 10 * 11 * C++ version created on: 2010oct27 12 * created by: Markus W. Scherer 13 */ 14 15 package ohos.global.icu.impl.coll; 16 17 import ohos.global.icu.impl.Normalizer2Impl; 18 19 /** 20 * Incrementally checks the input text for FCD and normalizes where necessary. 21 * @hide exposed on OHOS 22 */ 23 public final class FCDUTF16CollationIterator extends UTF16CollationIterator { 24 /** 25 * Partial constructor, see {@link CollationIterator#CollationIterator(CollationData)}. 26 */ FCDUTF16CollationIterator(CollationData d)27 public FCDUTF16CollationIterator(CollationData d) { 28 super(d); 29 nfcImpl = d.nfcImpl; 30 } 31 FCDUTF16CollationIterator(CollationData data, boolean numeric, CharSequence s, int p)32 public FCDUTF16CollationIterator(CollationData data, boolean numeric, CharSequence s, int p) { 33 super(data, numeric, s, p); 34 rawSeq = s; 35 segmentStart = p; 36 rawLimit = s.length(); 37 nfcImpl = data.nfcImpl; 38 checkDir = 1; 39 } 40 41 @Override equals(Object other)42 public boolean equals(Object other) { 43 // Skip the UTF16CollationIterator and call its parent. 44 if (!(other instanceof CollationIterator) 45 || !((CollationIterator)this).equals(other) 46 || !(other instanceof FCDUTF16CollationIterator)) 47 { 48 return false; 49 } 50 FCDUTF16CollationIterator o = (FCDUTF16CollationIterator)other; 51 // Compare the iterator state but not the text: Assume that the caller does that. 52 if (checkDir != o.checkDir) { 53 return false; 54 } 55 if (checkDir == 0 && (seq == rawSeq) != (o.seq == o.rawSeq)) { 56 return false; 57 } 58 if (checkDir != 0 || seq == rawSeq) { 59 return (pos - rawStart) == (o.pos - /*o.*/ rawStart); 60 } 61 else { 62 return (segmentStart - rawStart) == (o.segmentStart - /*o.*/ rawStart) && 63 (pos - start) == (o.pos - o.start); 64 } 65 } 66 67 @Override hashCode()68 public int hashCode() { 69 assert false : "hashCode not designed"; 70 return 42; // any arbitrary constant will do 71 } 72 73 @Override resetToOffset(int newOffset)74 public void resetToOffset(int newOffset) { 75 reset(); 76 seq = rawSeq; 77 start = segmentStart = pos = rawStart + newOffset; 78 limit = rawLimit; 79 checkDir = 1; 80 } 81 82 @Override getOffset()83 public int getOffset() { 84 if(checkDir != 0 || seq == rawSeq) { 85 return pos - rawStart; 86 } else if(pos == start) { 87 return segmentStart - rawStart; 88 } else { 89 return segmentLimit - rawStart; 90 } 91 } 92 93 @Override setText(boolean numeric, CharSequence s, int p)94 public void setText(boolean numeric, CharSequence s, int p) { 95 super.setText(numeric, s, p); 96 rawSeq = s; 97 segmentStart = p; 98 rawLimit = limit = s.length(); 99 checkDir = 1; 100 } 101 102 @Override nextCodePoint()103 public int nextCodePoint() { 104 char c; 105 for(;;) { 106 if(checkDir > 0) { 107 if(pos == limit) { 108 return Collation.SENTINEL_CP; 109 } 110 c = seq.charAt(pos++); 111 if(CollationFCD.hasTccc(c)) { 112 if(CollationFCD.maybeTibetanCompositeVowel(c) || 113 (pos != limit && CollationFCD.hasLccc(seq.charAt(pos)))) { 114 --pos; 115 nextSegment(); 116 c = seq.charAt(pos++); 117 } 118 } 119 break; 120 } else if(checkDir == 0 && pos != limit) { 121 c = seq.charAt(pos++); 122 break; 123 } else { 124 switchToForward(); 125 } 126 } 127 char trail; 128 if(Character.isHighSurrogate(c) && pos != limit && 129 Character.isLowSurrogate(trail = seq.charAt(pos))) { 130 ++pos; 131 return Character.toCodePoint(c, trail); 132 } else { 133 return c; 134 } 135 } 136 137 @Override previousCodePoint()138 public int previousCodePoint() { 139 char c; 140 for(;;) { 141 if(checkDir < 0) { 142 if(pos == start) { 143 return Collation.SENTINEL_CP; 144 } 145 c = seq.charAt(--pos); 146 if(CollationFCD.hasLccc(c)) { 147 if(CollationFCD.maybeTibetanCompositeVowel(c) || 148 (pos != start && CollationFCD.hasTccc(seq.charAt(pos - 1)))) { 149 ++pos; 150 previousSegment(); 151 c = seq.charAt(--pos); 152 } 153 } 154 break; 155 } else if(checkDir == 0 && pos != start) { 156 c = seq.charAt(--pos); 157 break; 158 } else { 159 switchToBackward(); 160 } 161 } 162 char lead; 163 if(Character.isLowSurrogate(c) && pos != start && 164 Character.isHighSurrogate(lead = seq.charAt(pos - 1))) { 165 --pos; 166 return Character.toCodePoint(lead, c); 167 } else { 168 return c; 169 } 170 } 171 172 @Override handleNextCE32()173 protected long handleNextCE32() { 174 char c; 175 for(;;) { 176 if(checkDir > 0) { 177 if(pos == limit) { 178 return NO_CP_AND_CE32; 179 } 180 c = seq.charAt(pos++); 181 if(CollationFCD.hasTccc(c)) { 182 if(CollationFCD.maybeTibetanCompositeVowel(c) || 183 (pos != limit && CollationFCD.hasLccc(seq.charAt(pos)))) { 184 --pos; 185 nextSegment(); 186 c = seq.charAt(pos++); 187 } 188 } 189 break; 190 } else if(checkDir == 0 && pos != limit) { 191 c = seq.charAt(pos++); 192 break; 193 } else { 194 switchToForward(); 195 } 196 } 197 return makeCodePointAndCE32Pair(c, trie.getFromU16SingleLead(c)); 198 } 199 200 /* boolean foundNULTerminator(); */ 201 202 @Override forwardNumCodePoints(int num)203 protected void forwardNumCodePoints(int num) { 204 // Specify the class to avoid a virtual-function indirection. 205 // In Java, we would declare this class final. 206 while(num > 0 && nextCodePoint() >= 0) { 207 --num; 208 } 209 } 210 211 @Override backwardNumCodePoints(int num)212 protected void backwardNumCodePoints(int num) { 213 // Specify the class to avoid a virtual-function indirection. 214 // In Java, we would declare this class final. 215 while(num > 0 && previousCodePoint() >= 0) { 216 --num; 217 } 218 } 219 220 /** 221 * Switches to forward checking if possible. 222 * To be called when checkDir < 0 || (checkDir == 0 && pos == limit). 223 * Returns with checkDir > 0 || (checkDir == 0 && pos != limit). 224 */ switchToForward()225 private void switchToForward() { 226 assert((checkDir < 0 && seq == rawSeq) || (checkDir == 0 && pos == limit)); 227 if(checkDir < 0) { 228 // Turn around from backward checking. 229 start = segmentStart = pos; 230 if(pos == segmentLimit) { 231 limit = rawLimit; 232 checkDir = 1; // Check forward. 233 } else { // pos < segmentLimit 234 checkDir = 0; // Stay in FCD segment. 235 } 236 } else { 237 // Reached the end of the FCD segment. 238 if(seq == rawSeq) { 239 // The input text segment is FCD, extend it forward. 240 } else { 241 // The input text segment needed to be normalized. 242 // Switch to checking forward from it. 243 seq = rawSeq; 244 pos = start = segmentStart = segmentLimit; 245 // Note: If this segment is at the end of the input text, 246 // then it might help to return false to indicate that, so that 247 // we do not have to re-check and normalize when we turn around and go backwards. 248 // However, that would complicate the call sites for an optimization of an unusual case. 249 } 250 limit = rawLimit; 251 checkDir = 1; 252 } 253 } 254 255 /** 256 * Extend the FCD text segment forward or normalize around pos. 257 * To be called when checkDir > 0 && pos != limit. 258 * Returns with checkDir == 0 and pos != limit. 259 */ 260 private void nextSegment() { 261 assert(checkDir > 0 && seq == rawSeq && pos != limit); 262 // The input text [segmentStart..pos[ passes the FCD check. 263 int p = pos; 264 int prevCC = 0; 265 for(;;) { 266 // Fetch the next character's fcd16 value. 267 int q = p; 268 int c = Character.codePointAt(seq, p); 269 p += Character.charCount(c); 270 int fcd16 = nfcImpl.getFCD16(c); 271 int leadCC = fcd16 >> 8; 272 if(leadCC == 0 && q != pos) { 273 // FCD boundary before the [q, p[ character. 274 limit = segmentLimit = q; 275 break; 276 } 277 if(leadCC != 0 && (prevCC > leadCC || CollationFCD.isFCD16OfTibetanCompositeVowel(fcd16))) { 278 // Fails FCD check. Find the next FCD boundary and normalize. 279 do { 280 q = p; 281 if(p == rawLimit) { break; } 282 c = Character.codePointAt(seq, p); 283 p += Character.charCount(c); 284 } while(nfcImpl.getFCD16(c) > 0xff); 285 normalize(pos, q); 286 pos = start; 287 break; 288 } 289 prevCC = fcd16 & 0xff; 290 if(p == rawLimit || prevCC == 0) { 291 // FCD boundary after the last character. 292 limit = segmentLimit = p; 293 break; 294 } 295 } 296 assert(pos != limit); 297 checkDir = 0; 298 } 299 300 /** 301 * Switches to backward checking. 302 * To be called when checkDir > 0 || (checkDir == 0 && pos == start). 303 * Returns with checkDir < 0 || (checkDir == 0 && pos != start). 304 */ 305 private void switchToBackward() { 306 assert((checkDir > 0 && seq == rawSeq) || (checkDir == 0 && pos == start)); 307 if(checkDir > 0) { 308 // Turn around from forward checking. 309 limit = segmentLimit = pos; 310 if(pos == segmentStart) { 311 start = rawStart; 312 checkDir = -1; // Check backward. 313 } else { // pos > segmentStart 314 checkDir = 0; // Stay in FCD segment. 315 } 316 } else { 317 // Reached the start of the FCD segment. 318 if(seq == rawSeq) { 319 // The input text segment is FCD, extend it backward. 320 } else { 321 // The input text segment needed to be normalized. 322 // Switch to checking backward from it. 323 seq = rawSeq; 324 pos = limit = segmentLimit = segmentStart; 325 } 326 start = rawStart; 327 checkDir = -1; 328 } 329 } 330 331 /** 332 * Extend the FCD text segment backward or normalize around pos. 333 * To be called when checkDir < 0 && pos != start. 334 * Returns with checkDir == 0 and pos != start. 335 */ 336 private void previousSegment() { 337 assert(checkDir < 0 && seq == rawSeq && pos != start); 338 // The input text [pos..segmentLimit[ passes the FCD check. 339 int p = pos; 340 int nextCC = 0; 341 for(;;) { 342 // Fetch the previous character's fcd16 value. 343 int q = p; 344 int c = Character.codePointBefore(seq, p); 345 p -= Character.charCount(c); 346 int fcd16 = nfcImpl.getFCD16(c); 347 int trailCC = fcd16 & 0xff; 348 if(trailCC == 0 && q != pos) { 349 // FCD boundary after the [p, q[ character. 350 start = segmentStart = q; 351 break; 352 } 353 if(trailCC != 0 && ((nextCC != 0 && trailCC > nextCC) || 354 CollationFCD.isFCD16OfTibetanCompositeVowel(fcd16))) { 355 // Fails FCD check. Find the previous FCD boundary and normalize. 356 do { 357 q = p; 358 if(fcd16 <= 0xff || p == rawStart) { break; } 359 c = Character.codePointBefore(seq, p); 360 p -= Character.charCount(c); 361 } while((fcd16 = nfcImpl.getFCD16(c)) != 0); 362 normalize(q, pos); 363 pos = limit; 364 break; 365 } 366 nextCC = fcd16 >> 8; 367 if(p == rawStart || nextCC == 0) { 368 // FCD boundary before the following character. 369 start = segmentStart = p; 370 break; 371 } 372 } 373 assert(pos != start); 374 checkDir = 0; 375 } 376 377 private void normalize(int from, int to) { 378 if(normalized == null) { 379 normalized = new StringBuilder(); 380 } 381 // NFD without argument checking. 382 nfcImpl.decompose(rawSeq, from, to, normalized, to - from); 383 // Switch collation processing into the FCD buffer 384 // with the result of normalizing [segmentStart, segmentLimit[. 385 segmentStart = from; 386 segmentLimit = to; 387 seq = normalized; 388 start = 0; 389 limit = start + normalized.length(); 390 } 391 392 // Text pointers: The input text is rawSeq[rawStart, rawLimit[. 393 // (In C++, these are const UChar * pointers. 394 // In Java, we use CharSequence rawSeq and the parent class' seq 395 // together with int indexes.) 396 // 397 // checkDir > 0: 398 // 399 // The input text rawSeq[segmentStart..pos[ passes the FCD check. 400 // Moving forward checks incrementally. 401 // segmentLimit is undefined. seq == rawSeq. limit == rawLimit. 402 // 403 // checkDir < 0: 404 // The input text rawSeq[pos..segmentLimit[ passes the FCD check. 405 // Moving backward checks incrementally. 406 // segmentStart is undefined. seq == rawSeq. start == rawStart. 407 // 408 // checkDir == 0: 409 // 410 // The input text rawSeq[segmentStart..segmentLimit[ is being processed. 411 // These pointers are at FCD boundaries. 412 // Either this text segment already passes the FCD check 413 // and seq==rawSeq && segmentStart==start<=pos<=limit==segmentLimit, 414 // or the current segment had to be normalized so that 415 // rawSeq[segmentStart..segmentLimit[ turned into the normalized string, 416 // corresponding to seq==normalized && 0==start<=pos<=limit==start+normalized.length(). 417 private CharSequence rawSeq; 418 private static final int rawStart = 0; 419 private int segmentStart; 420 private int segmentLimit; 421 private int rawLimit; 422 423 private final Normalizer2Impl nfcImpl; 424 private StringBuilder normalized; 425 // Direction of incremental FCD check. See comments before rawStart. 426 private int checkDir; 427 } 428