1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // © 2016 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html#License 4 /* 5 ******************************************************************************* 6 * 7 * Copyright (C) 2004-2015, International Business Machines 8 * Corporation and others. All Rights Reserved. 9 * 10 ******************************************************************************* 11 * file name: UCaseProps.java 12 * encoding: US-ASCII 13 * tab size: 8 (not used) 14 * indentation:4 15 * 16 * created on: 2005jan29 17 * created by: Markus W. Scherer 18 * 19 * Low-level Unicode character/string case mapping code. 20 * Java port of ucase.h/.c. 21 */ 22 23 package ohos.global.icu.impl; 24 25 import java.io.IOException; 26 import java.nio.ByteBuffer; 27 import java.util.Iterator; 28 import java.util.Locale; 29 30 import ohos.global.icu.lang.UCharacter; 31 import ohos.global.icu.lang.UProperty; 32 import ohos.global.icu.text.UTF16; 33 import ohos.global.icu.text.UnicodeSet; 34 import ohos.global.icu.util.ICUUncheckedIOException; 35 import ohos.global.icu.util.ULocale; 36 37 /** 38 * @hide exposed on OHOS 39 */ 40 public final class UCaseProps { 41 42 // constructors etc. --------------------------------------------------- *** 43 44 // port of ucase_openProps() UCaseProps()45 private UCaseProps() throws IOException { 46 ByteBuffer bytes=ICUBinary.getRequiredData(DATA_FILE_NAME); 47 readData(bytes); 48 } 49 readData(ByteBuffer bytes)50 private final void readData(ByteBuffer bytes) throws IOException { 51 // read the header 52 ICUBinary.readHeader(bytes, FMT, new IsAcceptable()); 53 54 // read indexes[] 55 int count=bytes.getInt(); 56 if(count<IX_TOP) { 57 throw new IOException("indexes[0] too small in "+DATA_FILE_NAME); 58 } 59 indexes=new int[count]; 60 61 indexes[0]=count; 62 for(int i=1; i<count; ++i) { 63 indexes[i]=bytes.getInt(); 64 } 65 66 // read the trie 67 trie=Trie2_16.createFromSerialized(bytes); 68 int expectedTrieLength=indexes[IX_TRIE_SIZE]; 69 int trieLength=trie.getSerializedLength(); 70 if(trieLength>expectedTrieLength) { 71 throw new IOException(DATA_FILE_NAME+": not enough bytes for the trie"); 72 } 73 // skip padding after trie bytes 74 ICUBinary.skipBytes(bytes, expectedTrieLength-trieLength); 75 76 // read exceptions[] 77 count=indexes[IX_EXC_LENGTH]; 78 if(count>0) { 79 exceptions=ICUBinary.getString(bytes, count, 0); 80 } 81 82 // read unfold[] 83 count=indexes[IX_UNFOLD_LENGTH]; 84 if(count>0) { 85 unfold=ICUBinary.getChars(bytes, count, 0); 86 } 87 } 88 89 // implement ICUBinary.Authenticate 90 private final static class IsAcceptable implements ICUBinary.Authenticate { 91 @Override isDataVersionAcceptable(byte version[])92 public boolean isDataVersionAcceptable(byte version[]) { 93 return version[0]==4; 94 } 95 } 96 97 // set of property starts for UnicodeSet ------------------------------- *** 98 addPropertyStarts(UnicodeSet set)99 public final void addPropertyStarts(UnicodeSet set) { 100 /* add the start code point of each same-value range of the trie */ 101 Iterator<Trie2.Range> trieIterator=trie.iterator(); 102 Trie2.Range range; 103 while(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) { 104 set.add(range.startCodePoint); 105 } 106 107 /* add code points with hardcoded properties, plus the ones following them */ 108 109 /* (none right now, see comment below) */ 110 111 /* 112 * Omit code points with hardcoded specialcasing properties 113 * because we do not build property UnicodeSets for them right now. 114 */ 115 } 116 117 // data access primitives ---------------------------------------------- *** getExceptionsOffset(int props)118 private static final int getExceptionsOffset(int props) { 119 return props>>EXC_SHIFT; 120 } 121 propsHasException(int props)122 static final boolean propsHasException(int props) { 123 return (props&EXCEPTION)!=0; 124 } 125 126 /* number of bits in an 8-bit integer value */ 127 private static final byte flagsOffset[/*256*/]={ 128 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 129 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 130 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 131 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 132 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 133 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 134 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 135 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 136 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 137 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 138 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 139 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 140 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 141 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 142 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 143 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 144 }; 145 hasSlot(int flags, int index)146 private static final boolean hasSlot(int flags, int index) { 147 return (flags&(1<<index))!=0; 148 } slotOffset(int flags, int index)149 private static final byte slotOffset(int flags, int index) { 150 return flagsOffset[flags&((1<<index)-1)]; 151 } 152 153 /* 154 * Get the value of an optional-value slot where hasSlot(excWord, index). 155 * 156 * @param excWord (in) initial exceptions word 157 * @param index (in) desired slot index 158 * @param excOffset (in) offset into exceptions[] after excWord=exceptions.charAt(excOffset++); 159 * @return bits 31..0: slot value 160 * 63..32: modified excOffset, moved to the last char of the value, use +1 for beginning of next slot 161 */ getSlotValueAndOffset(int excWord, int index, int excOffset)162 private final long getSlotValueAndOffset(int excWord, int index, int excOffset) { 163 long value; 164 if((excWord&EXC_DOUBLE_SLOTS)==0) { 165 excOffset+=slotOffset(excWord, index); 166 value=exceptions.charAt(excOffset); 167 } else { 168 excOffset+=2*slotOffset(excWord, index); 169 value=exceptions.charAt(excOffset++); 170 value=(value<<16)|exceptions.charAt(excOffset); 171 } 172 return value |((long)excOffset<<32); 173 } 174 175 /* same as getSlotValueAndOffset() but does not return the slot offset */ getSlotValue(int excWord, int index, int excOffset)176 private final int getSlotValue(int excWord, int index, int excOffset) { 177 int value; 178 if((excWord&EXC_DOUBLE_SLOTS)==0) { 179 excOffset+=slotOffset(excWord, index); 180 value=exceptions.charAt(excOffset); 181 } else { 182 excOffset+=2*slotOffset(excWord, index); 183 value=exceptions.charAt(excOffset++); 184 value=(value<<16)|exceptions.charAt(excOffset); 185 } 186 return value; 187 } 188 189 // simple case mappings ------------------------------------------------ *** 190 tolower(int c)191 public final int tolower(int c) { 192 int props=trie.get(c); 193 if(!propsHasException(props)) { 194 if(isUpperOrTitleFromProps(props)) { 195 c+=getDelta(props); 196 } 197 } else { 198 int excOffset=getExceptionsOffset(props); 199 int excWord=exceptions.charAt(excOffset++); 200 if(hasSlot(excWord, EXC_DELTA) && isUpperOrTitleFromProps(props)) { 201 int delta=getSlotValue(excWord, EXC_DELTA, excOffset); 202 return (excWord&EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta; 203 } 204 if(hasSlot(excWord, EXC_LOWER)) { 205 c=getSlotValue(excWord, EXC_LOWER, excOffset); 206 } 207 } 208 return c; 209 } 210 toupper(int c)211 public final int toupper(int c) { 212 int props=trie.get(c); 213 if(!propsHasException(props)) { 214 if(getTypeFromProps(props)==LOWER) { 215 c+=getDelta(props); 216 } 217 } else { 218 int excOffset=getExceptionsOffset(props); 219 int excWord=exceptions.charAt(excOffset++); 220 if(hasSlot(excWord, EXC_DELTA) && getTypeFromProps(props)==LOWER) { 221 int delta=getSlotValue(excWord, EXC_DELTA, excOffset); 222 return (excWord&EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta; 223 } 224 if(hasSlot(excWord, EXC_UPPER)) { 225 c=getSlotValue(excWord, EXC_UPPER, excOffset); 226 } 227 } 228 return c; 229 } 230 totitle(int c)231 public final int totitle(int c) { 232 int props=trie.get(c); 233 if(!propsHasException(props)) { 234 if(getTypeFromProps(props)==LOWER) { 235 c+=getDelta(props); 236 } 237 } else { 238 int excOffset=getExceptionsOffset(props); 239 int excWord=exceptions.charAt(excOffset++); 240 if(hasSlot(excWord, EXC_DELTA) && getTypeFromProps(props)==LOWER) { 241 int delta=getSlotValue(excWord, EXC_DELTA, excOffset); 242 return (excWord&EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta; 243 } 244 int index; 245 if(hasSlot(excWord, EXC_TITLE)) { 246 index=EXC_TITLE; 247 } else if(hasSlot(excWord, EXC_UPPER)) { 248 index=EXC_UPPER; 249 } else { 250 return c; 251 } 252 c=getSlotValue(excWord, index, excOffset); 253 } 254 return c; 255 } 256 257 /** 258 * Adds all simple case mappings and the full case folding for c to sa, 259 * and also adds special case closure mappings. 260 * c itself is not added. 261 * For example, the mappings 262 * - for s include long s 263 * - for sharp s include ss 264 * - for k include the Kelvin sign 265 */ addCaseClosure(int c, UnicodeSet set)266 public final void addCaseClosure(int c, UnicodeSet set) { 267 /* 268 * Hardcode the case closure of i and its relatives and ignore the 269 * data file data for these characters. 270 * The Turkic dotless i and dotted I with their case mapping conditions 271 * and case folding option make the related characters behave specially. 272 * This code matches their closure behavior to their case folding behavior. 273 */ 274 275 switch(c) { 276 case 0x49: 277 /* regular i and I are in one equivalence class */ 278 set.add(0x69); 279 return; 280 case 0x69: 281 set.add(0x49); 282 return; 283 case 0x130: 284 /* dotted I is in a class with <0069 0307> (for canonical equivalence with <0049 0307>) */ 285 set.add(iDot); 286 return; 287 case 0x131: 288 /* dotless i is in a class by itself */ 289 return; 290 default: 291 /* otherwise use the data file data */ 292 break; 293 } 294 295 int props=trie.get(c); 296 if(!propsHasException(props)) { 297 if(getTypeFromProps(props)!=NONE) { 298 /* add the one simple case mapping, no matter what type it is */ 299 int delta=getDelta(props); 300 if(delta!=0) { 301 set.add(c+delta); 302 } 303 } 304 } else { 305 /* 306 * c has exceptions, so there may be multiple simple and/or 307 * full case mappings. Add them all. 308 */ 309 int excOffset0, excOffset=getExceptionsOffset(props); 310 int closureOffset; 311 int excWord=exceptions.charAt(excOffset++); 312 int index, closureLength, fullLength, length; 313 314 excOffset0=excOffset; 315 316 /* add all simple case mappings */ 317 for(index=EXC_LOWER; index<=EXC_TITLE; ++index) { 318 if(hasSlot(excWord, index)) { 319 excOffset=excOffset0; 320 c=getSlotValue(excWord, index, excOffset); 321 set.add(c); 322 } 323 } 324 if(hasSlot(excWord, EXC_DELTA)) { 325 excOffset=excOffset0; 326 int delta=getSlotValue(excWord, EXC_DELTA, excOffset); 327 set.add((excWord&EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta); 328 } 329 330 /* get the closure string pointer & length */ 331 if(hasSlot(excWord, EXC_CLOSURE)) { 332 excOffset=excOffset0; 333 long value=getSlotValueAndOffset(excWord, EXC_CLOSURE, excOffset); 334 closureLength=(int)value&CLOSURE_MAX_LENGTH; /* higher bits are reserved */ 335 closureOffset=(int)(value>>32)+1; /* behind this slot, unless there are full case mappings */ 336 } else { 337 closureLength=0; 338 closureOffset=0; 339 } 340 341 /* add the full case folding */ 342 if(hasSlot(excWord, EXC_FULL_MAPPINGS)) { 343 excOffset=excOffset0; 344 long value=getSlotValueAndOffset(excWord, EXC_FULL_MAPPINGS, excOffset); 345 fullLength=(int)value; 346 347 /* start of full case mapping strings */ 348 excOffset=(int)(value>>32)+1; 349 350 fullLength&=0xffff; /* bits 16 and higher are reserved */ 351 352 /* skip the lowercase result string */ 353 excOffset+=fullLength&FULL_LOWER; 354 fullLength>>=4; 355 356 /* add the full case folding string */ 357 length=fullLength&0xf; 358 if(length!=0) { 359 set.add(exceptions.substring(excOffset, excOffset+length)); 360 excOffset+=length; 361 } 362 363 /* skip the uppercase and titlecase strings */ 364 fullLength>>=4; 365 excOffset+=fullLength&0xf; 366 fullLength>>=4; 367 excOffset+=fullLength; 368 369 closureOffset=excOffset; /* behind full case mappings */ 370 } 371 372 /* add each code point in the closure string */ 373 int limit=closureOffset+closureLength; 374 for(index=closureOffset; index<limit; index+=UTF16.getCharCount(c)) { 375 c=exceptions.codePointAt(index); 376 set.add(c); 377 } 378 } 379 } 380 381 /* 382 * compare s, which has a length, with t=unfold[unfoldOffset..], which has a maximum length or is NUL-terminated 383 * must be s.length()>0 and max>0 and s.length()<=max 384 */ strcmpMax(String s, int unfoldOffset, int max)385 private final int strcmpMax(String s, int unfoldOffset, int max) { 386 int i1, length, c1, c2; 387 388 length=s.length(); 389 max-=length; /* we require length<=max, so no need to decrement max in the loop */ 390 i1=0; 391 do { 392 c1=s.charAt(i1++); 393 c2=unfold[unfoldOffset++]; 394 if(c2==0) { 395 return 1; /* reached the end of t but not of s */ 396 } 397 c1-=c2; 398 if(c1!=0) { 399 return c1; /* return difference result */ 400 } 401 } while(--length>0); 402 /* ends with length==0 */ 403 404 if(max==0 || unfold[unfoldOffset]==0) { 405 return 0; /* equal to length of both strings */ 406 } else { 407 return -max; /* return lengh difference */ 408 } 409 } 410 411 /** 412 * Maps the string to single code points and adds the associated case closure 413 * mappings. 414 * The string is mapped to code points if it is their full case folding string. 415 * In other words, this performs a reverse full case folding and then 416 * adds the case closure items of the resulting code points. 417 * If the string is found and its closure applied, then 418 * the string itself is added as well as part of its code points' closure. 419 * 420 * @return true if the string was found 421 */ addStringCaseClosure(String s, UnicodeSet set)422 public final boolean addStringCaseClosure(String s, UnicodeSet set) { 423 int i, length, start, limit, result, unfoldOffset, unfoldRows, unfoldRowWidth, unfoldStringWidth; 424 425 if(unfold==null || s==null) { 426 return false; /* no reverse case folding data, or no string */ 427 } 428 length=s.length(); 429 if(length<=1) { 430 /* the string is too short to find any match */ 431 /* 432 * more precise would be: 433 * if(!u_strHasMoreChar32Than(s, length, 1)) 434 * but this does not make much practical difference because 435 * a single supplementary code point would just not be found 436 */ 437 return false; 438 } 439 440 unfoldRows=unfold[UNFOLD_ROWS]; 441 unfoldRowWidth=unfold[UNFOLD_ROW_WIDTH]; 442 unfoldStringWidth=unfold[UNFOLD_STRING_WIDTH]; 443 //unfoldCPWidth=unfoldRowWidth-unfoldStringWidth; 444 445 if(length>unfoldStringWidth) { 446 /* the string is too long to find any match */ 447 return false; 448 } 449 450 /* do a binary search for the string */ 451 start=0; 452 limit=unfoldRows; 453 while(start<limit) { 454 i=(start+limit)/2; 455 unfoldOffset=((i+1)*unfoldRowWidth); // +1 to skip the header values above 456 result=strcmpMax(s, unfoldOffset, unfoldStringWidth); 457 458 if(result==0) { 459 /* found the string: add each code point, and its case closure */ 460 int c; 461 462 for(i=unfoldStringWidth; i<unfoldRowWidth && unfold[unfoldOffset+i]!=0; i+=UTF16.getCharCount(c)) { 463 c=UTF16.charAt(unfold, unfoldOffset, unfold.length, i); 464 set.add(c); 465 addCaseClosure(c, set); 466 } 467 return true; 468 } else if(result<0) { 469 limit=i; 470 } else /* result>0 */ { 471 start=i+1; 472 } 473 } 474 475 return false; /* string not found */ 476 } 477 478 /** @return NONE, LOWER, UPPER, TITLE */ getType(int c)479 public final int getType(int c) { 480 return getTypeFromProps(trie.get(c)); 481 } 482 483 /** @return like getType() but also sets IGNORABLE if c is case-ignorable */ getTypeOrIgnorable(int c)484 public final int getTypeOrIgnorable(int c) { 485 return getTypeAndIgnorableFromProps(trie.get(c)); 486 } 487 488 /** @return NO_DOT, SOFT_DOTTED, ABOVE, OTHER_ACCENT */ getDotType(int c)489 public final int getDotType(int c) { 490 int props=trie.get(c); 491 if(!propsHasException(props)) { 492 return props&DOT_MASK; 493 } else { 494 return (exceptions.charAt(getExceptionsOffset(props))>>EXC_DOT_SHIFT)&DOT_MASK; 495 } 496 } 497 isSoftDotted(int c)498 public final boolean isSoftDotted(int c) { 499 return getDotType(c)==SOFT_DOTTED; 500 } 501 isCaseSensitive(int c)502 public final boolean isCaseSensitive(int c) { 503 int props=trie.get(c); 504 if(!propsHasException(props)) { 505 return (props&SENSITIVE)!=0; 506 } else { 507 return (exceptions.charAt(getExceptionsOffset(props))&EXC_SENSITIVE)!=0; 508 } 509 } 510 511 // string casing ------------------------------------------------------- *** 512 513 /* 514 * These internal functions form the core of string case mappings. 515 * They map single code points to result code points or strings and take 516 * all necessary conditions (context, locale ID, options) into account. 517 * 518 * They do not iterate over the source or write to the destination 519 * so that the same functions are useful for non-standard string storage, 520 * such as in a Replaceable (for Transliterator) or UTF-8/32 strings etc. 521 * For the same reason, the "surrounding text" context is passed in as a 522 * ContextIterator which does not make any assumptions about 523 * the underlying storage. 524 * 525 * This section contains helper functions that check for conditions 526 * in the input text surrounding the current code point 527 * according to SpecialCasing.txt. 528 * 529 * Each helper function gets the index 530 * - after the current code point if it looks at following text 531 * - before the current code point if it looks at preceding text 532 * 533 * Unicode 3.2 UAX 21 "Case Mappings" defines the conditions as follows: 534 * 535 * Final_Sigma 536 * C is preceded by a sequence consisting of 537 * a cased letter and a case-ignorable sequence, 538 * and C is not followed by a sequence consisting of 539 * an ignorable sequence and then a cased letter. 540 * 541 * More_Above 542 * C is followed by one or more characters of combining class 230 (ABOVE) 543 * in the combining character sequence. 544 * 545 * After_Soft_Dotted 546 * The last preceding character with combining class of zero before C 547 * was Soft_Dotted, 548 * and there is no intervening combining character class 230 (ABOVE). 549 * 550 * Before_Dot 551 * C is followed by combining dot above (U+0307). 552 * Any sequence of characters with a combining class that is neither 0 nor 230 553 * may intervene between the current character and the combining dot above. 554 * 555 * The erratum from 2002-10-31 adds the condition 556 * 557 * After_I 558 * The last preceding base character was an uppercase I, and there is no 559 * intervening combining character class 230 (ABOVE). 560 * 561 * (See Jitterbug 2344 and the comments on After_I below.) 562 * 563 * Helper definitions in Unicode 3.2 UAX 21: 564 * 565 * D1. A character C is defined to be cased 566 * if it meets any of the following criteria: 567 * 568 * - The general category of C is Titlecase Letter (Lt) 569 * - In [CoreProps], C has one of the properties Uppercase, or Lowercase 570 * - Given D = NFD(C), then it is not the case that: 571 * D = UCD_lower(D) = UCD_upper(D) = UCD_title(D) 572 * (This third criterium does not add any characters to the list 573 * for Unicode 3.2. Ignored.) 574 * 575 * D2. A character C is defined to be case-ignorable 576 * if it meets either of the following criteria: 577 * 578 * - The general category of C is 579 * Nonspacing Mark (Mn), or Enclosing Mark (Me), or Format Control (Cf), or 580 * Letter Modifier (Lm), or Symbol Modifier (Sk) 581 * - C is one of the following characters 582 * U+0027 APOSTROPHE 583 * U+00AD SOFT HYPHEN (SHY) 584 * U+2019 RIGHT SINGLE QUOTATION MARK 585 * (the preferred character for apostrophe) 586 * 587 * D3. A case-ignorable sequence is a sequence of 588 * zero or more case-ignorable characters. 589 */ 590 591 /** 592 * Iterator for string case mappings, which need to look at the 593 * context (surrounding text) of a given character for conditional mappings. 594 * 595 * The iterator only needs to go backward or forward away from the 596 * character in question. It does not use any indexes on this interface. 597 * It does not support random access or an arbitrary change of 598 * iteration direction. 599 * 600 * The code point being case-mapped itself is never returned by 601 * this iterator. 602 * @hide exposed on OHOS 603 */ 604 public interface ContextIterator { 605 /** 606 * Reset the iterator for forward or backward iteration. 607 * @param dir >0: Begin iterating forward from the first code point 608 * after the one that is being case-mapped. 609 * <0: Begin iterating backward from the first code point 610 * before the one that is being case-mapped. 611 */ reset(int dir)612 public void reset(int dir); 613 /** 614 * Iterate and return the next code point, moving in the direction 615 * determined by the reset() call. 616 * @return Next code point, or <0 when the iteration is done. 617 */ next()618 public int next(); 619 } 620 621 /** 622 * Fast case mapping data for ASCII/Latin. 623 * Linear arrays of delta bytes: 0=no mapping; EXC=exception. 624 * Deltas must not cross the ASCII boundary, or else they cannot be easily used 625 * in simple UTF-8 code. 626 */ 627 static final class LatinCase { 628 /** Case mapping/folding data for code points up to U+017F. */ 629 static final char LIMIT = 0x180; 630 /** U+017F case-folds and uppercases crossing the ASCII boundary. */ 631 static final char LONG_S = 0x17f; 632 /** Exception: Complex mapping, or too-large delta. */ 633 static final byte EXC = -0x80; 634 635 /** Deltas for lowercasing for most locales, and default case folding. */ 636 static final byte[] TO_LOWER_NORMAL = { 637 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 638 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 639 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 640 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 641 642 0, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 643 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 0, 0, 0, 0, 0, 644 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 645 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 646 647 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 648 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 649 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 650 0, 0, 0, 0, 0, EXC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 651 652 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 653 32, 32, 32, 32, 32, 32, 32, 0, 32, 32, 32, 32, 32, 32, 32, EXC, 654 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 655 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 656 657 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 658 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 659 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 660 EXC, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 661 662 0, 1, 0, 1, 0, 1, 0, 1, 0, EXC, 1, 0, 1, 0, 1, 0, 663 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 664 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 665 1, 0, 1, 0, 1, 0, 1, 0, -121, 1, 0, 1, 0, 1, 0, EXC 666 }; 667 668 /** Deltas for lowercasing for tr/az/lt, and Turkic case folding. */ 669 static final byte[] TO_LOWER_TR_LT = { 670 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 671 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 672 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 673 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 674 675 0, 32, 32, 32, 32, 32, 32, 32, 32, EXC, EXC, 32, 32, 32, 32, 32, 676 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 0, 0, 0, 0, 0, 677 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 678 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 679 680 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 681 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 682 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 683 0, 0, 0, 0, 0, EXC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 684 685 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, EXC, EXC, 32, 32, 686 32, 32, 32, 32, 32, 32, 32, 0, 32, 32, 32, 32, 32, 32, 32, EXC, 687 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 688 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 689 690 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 691 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 692 1, 0, 1, 0, 1, 0, 1, 0, EXC, 0, 1, 0, 1, 0, EXC, 0, 693 EXC, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 694 695 0, 1, 0, 1, 0, 1, 0, 1, 0, EXC, 1, 0, 1, 0, 1, 0, 696 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 697 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 698 1, 0, 1, 0, 1, 0, 1, 0, -121, 1, 0, 1, 0, 1, 0, EXC 699 }; 700 701 /** Deltas for uppercasing for most locales. */ 702 static final byte[] TO_UPPER_NORMAL = { 703 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 704 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 705 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 706 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 707 708 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 709 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 710 0, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, 711 -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, 0, 0, 0, 0, 0, 712 713 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 714 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 715 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 716 0, 0, 0, 0, 0, EXC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 717 718 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 719 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, EXC, 720 -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, 721 -32, -32, -32, -32, -32, -32, -32, 0, -32, -32, -32, -32, -32, -32, -32, 121, 722 723 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 724 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 725 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 726 0, EXC, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, 0, 727 728 -1, 0, -1, 0, -1, 0, -1, 0, -1, EXC, 0, -1, 0, -1, 0, -1, 729 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 730 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 731 0, -1, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, EXC 732 }; 733 734 /** Deltas for uppercasing for tr/az. */ 735 static final byte[] TO_UPPER_TR = { 736 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 737 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 738 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 739 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 740 741 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 742 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 743 0, -32, -32, -32, -32, -32, -32, -32, -32, EXC, -32, -32, -32, -32, -32, -32, 744 -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, 0, 0, 0, 0, 0, 745 746 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 747 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 748 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 749 0, 0, 0, 0, 0, EXC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 750 751 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 752 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, EXC, 753 -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, 754 -32, -32, -32, -32, -32, -32, -32, 0, -32, -32, -32, -32, -32, -32, -32, 121, 755 756 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 757 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 758 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 759 0, EXC, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, 0, 760 761 -1, 0, -1, 0, -1, 0, -1, 0, -1, EXC, 0, -1, 0, -1, 0, -1, 762 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 763 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 764 0, -1, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, EXC 765 }; 766 } 767 768 /** 769 * For string case mappings, a single character (a code point) is mapped 770 * either to itself (in which case in-place mapping functions do nothing), 771 * or to another single code point, or to a string. 772 * Aside from the string contents, these are indicated with a single int 773 * value as follows: 774 * 775 * Mapping to self: Negative values (~self instead of -self to support U+0000) 776 * 777 * Mapping to another code point: Positive values >MAX_STRING_LENGTH 778 * 779 * Mapping to a string: The string length (0..MAX_STRING_LENGTH) is 780 * returned. Note that the string result may indeed have zero length. 781 */ 782 public static final int MAX_STRING_LENGTH=0x1f; 783 784 //ivate static final int LOC_UNKNOWN=0; 785 public static final int LOC_ROOT=1; 786 static final int LOC_TURKISH=2; 787 static final int LOC_LITHUANIAN=3; 788 static final int LOC_GREEK=4; 789 public static final int LOC_DUTCH=5; 790 getCaseLocale(Locale locale)791 public static final int getCaseLocale(Locale locale) { 792 return getCaseLocale(locale.getLanguage()); 793 } getCaseLocale(ULocale locale)794 public static final int getCaseLocale(ULocale locale) { 795 return getCaseLocale(locale.getLanguage()); 796 } 797 /** Accepts both 2- and 3-letter language subtags. */ getCaseLocale(String language)798 private static final int getCaseLocale(String language) { 799 // Check the subtag length to reduce the number of comparisons 800 // for locales without special behavior. 801 // Fastpath for English "en" which is often used for default (=root locale) case mappings, 802 // and for Chinese "zh": Very common but no special case mapping behavior. 803 if(language.length()==2) { 804 if(language.equals("en") || language.charAt(0)>'t') { 805 return LOC_ROOT; 806 } else if(language.equals("tr") || language.equals("az")) { 807 return LOC_TURKISH; 808 } else if(language.equals("el")) { 809 return LOC_GREEK; 810 } else if(language.equals("lt")) { 811 return LOC_LITHUANIAN; 812 } else if(language.equals("nl")) { 813 return LOC_DUTCH; 814 } 815 } else if(language.length()==3) { 816 if(language.equals("tur") || language.equals("aze")) { 817 return LOC_TURKISH; 818 } else if(language.equals("ell")) { 819 return LOC_GREEK; 820 } else if(language.equals("lit")) { 821 return LOC_LITHUANIAN; 822 } else if(language.equals("nld")) { 823 return LOC_DUTCH; 824 } 825 } 826 return LOC_ROOT; 827 } 828 829 /* Is followed by {case-ignorable}* cased ? (dir determines looking forward/backward) */ isFollowedByCasedLetter(ContextIterator iter, int dir)830 private final boolean isFollowedByCasedLetter(ContextIterator iter, int dir) { 831 int c; 832 833 if(iter==null) { 834 return false; 835 } 836 837 for(iter.reset(dir); (c=iter.next())>=0;) { 838 int type=getTypeOrIgnorable(c); 839 if((type&4)!=0) { 840 /* case-ignorable, continue with the loop */ 841 } else if(type!=NONE) { 842 return true; /* followed by cased letter */ 843 } else { 844 return false; /* uncased and not case-ignorable */ 845 } 846 } 847 848 return false; /* not followed by cased letter */ 849 } 850 851 /* Is preceded by Soft_Dotted character with no intervening cc=230 ? */ isPrecededBySoftDotted(ContextIterator iter)852 private final boolean isPrecededBySoftDotted(ContextIterator iter) { 853 int c; 854 int dotType; 855 856 if(iter==null) { 857 return false; 858 } 859 860 for(iter.reset(-1); (c=iter.next())>=0;) { 861 dotType=getDotType(c); 862 if(dotType==SOFT_DOTTED) { 863 return true; /* preceded by TYPE_i */ 864 } else if(dotType!=OTHER_ACCENT) { 865 return false; /* preceded by different base character (not TYPE_i), or intervening cc==230 */ 866 } 867 } 868 869 return false; /* not preceded by TYPE_i */ 870 } 871 872 /* 873 * See Jitterbug 2344: 874 * The condition After_I for Turkic-lowercasing of U+0307 combining dot above 875 * is checked in ICU 2.0, 2.1, 2.6 but was not in 2.2 & 2.4 because 876 * we made those releases compatible with Unicode 3.2 which had not fixed 877 * a related bug in SpecialCasing.txt. 878 * 879 * From the Jitterbug 2344 text: 880 * ... this bug is listed as a Unicode erratum 881 * from 2002-10-31 at http://www.unicode.org/uni2errata/UnicodeErrata.html 882 * <quote> 883 * There are two errors in SpecialCasing.txt. 884 * 1. Missing semicolons on two lines. ... [irrelevant for ICU] 885 * 2. An incorrect context definition. Correct as follows: 886 * < 0307; ; 0307; 0307; tr After_Soft_Dotted; # COMBINING DOT ABOVE 887 * < 0307; ; 0307; 0307; az After_Soft_Dotted; # COMBINING DOT ABOVE 888 * --- 889 * > 0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE 890 * > 0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE 891 * where the context After_I is defined as: 892 * The last preceding base character was an uppercase I, and there is no 893 * intervening combining character class 230 (ABOVE). 894 * </quote> 895 * 896 * Note that SpecialCasing.txt even in Unicode 3.2 described the condition as: 897 * 898 * # When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i. 899 * # This matches the behavior of the canonically equivalent I-dot_above 900 * 901 * See also the description in this place in older versions of uchar.c (revision 1.100). 902 * 903 * Markus W. Scherer 2003-feb-15 904 */ 905 906 /* Is preceded by base character 'I' with no intervening cc=230 ? */ isPrecededBy_I(ContextIterator iter)907 private final boolean isPrecededBy_I(ContextIterator iter) { 908 int c; 909 int dotType; 910 911 if(iter==null) { 912 return false; 913 } 914 915 for(iter.reset(-1); (c=iter.next())>=0;) { 916 if(c==0x49) { 917 return true; /* preceded by I */ 918 } 919 dotType=getDotType(c); 920 if(dotType!=OTHER_ACCENT) { 921 return false; /* preceded by different base character (not I), or intervening cc==230 */ 922 } 923 } 924 925 return false; /* not preceded by I */ 926 } 927 928 /* Is followed by one or more cc==230 ? */ isFollowedByMoreAbove(ContextIterator iter)929 private final boolean isFollowedByMoreAbove(ContextIterator iter) { 930 int c; 931 int dotType; 932 933 if(iter==null) { 934 return false; 935 } 936 937 for(iter.reset(1); (c=iter.next())>=0;) { 938 dotType=getDotType(c); 939 if(dotType==ABOVE) { 940 return true; /* at least one cc==230 following */ 941 } else if(dotType!=OTHER_ACCENT) { 942 return false; /* next base character, no more cc==230 following */ 943 } 944 } 945 946 return false; /* no more cc==230 following */ 947 } 948 949 /* Is followed by a dot above (without cc==230 in between) ? */ isFollowedByDotAbove(ContextIterator iter)950 private final boolean isFollowedByDotAbove(ContextIterator iter) { 951 int c; 952 int dotType; 953 954 if(iter==null) { 955 return false; 956 } 957 958 for(iter.reset(1); (c=iter.next())>=0; ) { 959 if(c==0x307) { 960 return true; 961 } 962 dotType=getDotType(c); 963 if(dotType!=OTHER_ACCENT) { 964 return false; /* next base character or cc==230 in between */ 965 } 966 } 967 968 return false; /* no dot above following */ 969 } 970 971 private static final String 972 iDot= "i\u0307", 973 jDot= "j\u0307", 974 iOgonekDot= "\u012f\u0307", 975 iDotGrave= "i\u0307\u0300", 976 iDotAcute= "i\u0307\u0301", 977 iDotTilde= "i\u0307\u0303"; 978 979 /** 980 * Get the full lowercase mapping for c. 981 * 982 * @param c Character to be mapped. 983 * @param iter Character iterator, used for context-sensitive mappings. 984 * See ContextIterator for details. 985 * If iter==null then a context-independent result is returned. 986 * @param out If the mapping result is a string, then it is appended to out. 987 * @param caseLocale Case locale value from ucase_getCaseLocale(). 988 * @return Output code point or string length, see MAX_STRING_LENGTH. 989 * 990 * @see ContextIterator 991 * @see #MAX_STRING_LENGTH 992 * @hide draft / provisional / internal are hidden on OHOS 993 */ toFullLower(int c, ContextIterator iter, Appendable out, int caseLocale)994 public final int toFullLower(int c, ContextIterator iter, Appendable out, int caseLocale) { 995 int result, props; 996 997 result=c; 998 props=trie.get(c); 999 if(!propsHasException(props)) { 1000 if(isUpperOrTitleFromProps(props)) { 1001 result=c+getDelta(props); 1002 } 1003 } else { 1004 int excOffset=getExceptionsOffset(props), excOffset2; 1005 int excWord=exceptions.charAt(excOffset++); 1006 int full; 1007 1008 excOffset2=excOffset; 1009 1010 if((excWord&EXC_CONDITIONAL_SPECIAL)!=0) { 1011 /* use hardcoded conditions and mappings */ 1012 /* 1013 * Test for conditional mappings first 1014 * (otherwise the unconditional default mappings are always taken), 1015 * then test for characters that have unconditional mappings in SpecialCasing.txt, 1016 * then get the UnicodeData.txt mappings. 1017 */ 1018 if( caseLocale==LOC_LITHUANIAN && 1019 /* base characters, find accents above */ 1020 (((c==0x49 || c==0x4a || c==0x12e) && 1021 isFollowedByMoreAbove(iter)) || 1022 /* precomposed with accent above, no need to find one */ 1023 (c==0xcc || c==0xcd || c==0x128)) 1024 ) { 1025 /* 1026 # Lithuanian 1027 1028 # Lithuanian retains the dot in a lowercase i when followed by accents. 1029 1030 # Introduce an explicit dot above when lowercasing capital I's and J's 1031 # whenever there are more accents above. 1032 # (of the accents used in Lithuanian: grave, acute, tilde above, and ogonek) 1033 1034 0049; 0069 0307; 0049; 0049; lt More_Above; # LATIN CAPITAL LETTER I 1035 004A; 006A 0307; 004A; 004A; lt More_Above; # LATIN CAPITAL LETTER J 1036 012E; 012F 0307; 012E; 012E; lt More_Above; # LATIN CAPITAL LETTER I WITH OGONEK 1037 00CC; 0069 0307 0300; 00CC; 00CC; lt; # LATIN CAPITAL LETTER I WITH GRAVE 1038 00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE 1039 0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE 1040 */ 1041 try { 1042 switch(c) { 1043 case 0x49: /* LATIN CAPITAL LETTER I */ 1044 out.append(iDot); 1045 return 2; 1046 case 0x4a: /* LATIN CAPITAL LETTER J */ 1047 out.append(jDot); 1048 return 2; 1049 case 0x12e: /* LATIN CAPITAL LETTER I WITH OGONEK */ 1050 out.append(iOgonekDot); 1051 return 2; 1052 case 0xcc: /* LATIN CAPITAL LETTER I WITH GRAVE */ 1053 out.append(iDotGrave); 1054 return 3; 1055 case 0xcd: /* LATIN CAPITAL LETTER I WITH ACUTE */ 1056 out.append(iDotAcute); 1057 return 3; 1058 case 0x128: /* LATIN CAPITAL LETTER I WITH TILDE */ 1059 out.append(iDotTilde); 1060 return 3; 1061 default: 1062 return 0; /* will not occur */ 1063 } 1064 } catch (IOException e) { 1065 throw new ICUUncheckedIOException(e); 1066 } 1067 /* # Turkish and Azeri */ 1068 } else if(caseLocale==LOC_TURKISH && c==0x130) { 1069 /* 1070 # I and i-dotless; I-dot and i are case pairs in Turkish and Azeri 1071 # The following rules handle those cases. 1072 1073 0130; 0069; 0130; 0130; tr # LATIN CAPITAL LETTER I WITH DOT ABOVE 1074 0130; 0069; 0130; 0130; az # LATIN CAPITAL LETTER I WITH DOT ABOVE 1075 */ 1076 return 0x69; 1077 } else if(caseLocale==LOC_TURKISH && c==0x307 && isPrecededBy_I(iter)) { 1078 /* 1079 # When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i. 1080 # This matches the behavior of the canonically equivalent I-dot_above 1081 1082 0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE 1083 0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE 1084 */ 1085 return 0; /* remove the dot (continue without output) */ 1086 } else if(caseLocale==LOC_TURKISH && c==0x49 && !isFollowedByDotAbove(iter)) { 1087 /* 1088 # When lowercasing, unless an I is before a dot_above, it turns into a dotless i. 1089 1090 0049; 0131; 0049; 0049; tr Not_Before_Dot; # LATIN CAPITAL LETTER I 1091 0049; 0131; 0049; 0049; az Not_Before_Dot; # LATIN CAPITAL LETTER I 1092 */ 1093 return 0x131; 1094 } else if(c==0x130) { 1095 /* 1096 # Preserve canonical equivalence for I with dot. Turkic is handled below. 1097 1098 0130; 0069 0307; 0130; 0130; # LATIN CAPITAL LETTER I WITH DOT ABOVE 1099 */ 1100 try { 1101 out.append(iDot); 1102 return 2; 1103 } catch (IOException e) { 1104 throw new ICUUncheckedIOException(e); 1105 } 1106 } else if( c==0x3a3 && 1107 !isFollowedByCasedLetter(iter, 1) && 1108 isFollowedByCasedLetter(iter, -1) /* -1=preceded */ 1109 ) { 1110 /* greek capital sigma maps depending on surrounding cased letters (see SpecialCasing.txt) */ 1111 /* 1112 # Special case for final form of sigma 1113 1114 03A3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK CAPITAL LETTER SIGMA 1115 */ 1116 return 0x3c2; /* greek small final sigma */ 1117 } else { 1118 /* no known conditional special case mapping, use a normal mapping */ 1119 } 1120 } else if(hasSlot(excWord, EXC_FULL_MAPPINGS)) { 1121 long value=getSlotValueAndOffset(excWord, EXC_FULL_MAPPINGS, excOffset); 1122 full=(int)value&FULL_LOWER; 1123 if(full!=0) { 1124 /* start of full case mapping strings */ 1125 excOffset=(int)(value>>32)+1; 1126 1127 try { 1128 // append the lowercase mapping 1129 out.append(exceptions, excOffset, excOffset+full); 1130 1131 /* return the string length */ 1132 return full; 1133 } catch (IOException e) { 1134 throw new ICUUncheckedIOException(e); 1135 } 1136 } 1137 } 1138 1139 if(hasSlot(excWord, EXC_DELTA) && isUpperOrTitleFromProps(props)) { 1140 int delta=getSlotValue(excWord, EXC_DELTA, excOffset2); 1141 return (excWord&EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta; 1142 } 1143 if(hasSlot(excWord, EXC_LOWER)) { 1144 result=getSlotValue(excWord, EXC_LOWER, excOffset2); 1145 } 1146 } 1147 1148 return (result==c) ? ~result : result; 1149 } 1150 1151 /* internal */ toUpperOrTitle(int c, ContextIterator iter, Appendable out, int loc, boolean upperNotTitle)1152 private final int toUpperOrTitle(int c, ContextIterator iter, 1153 Appendable out, 1154 int loc, 1155 boolean upperNotTitle) { 1156 int result; 1157 int props; 1158 1159 result=c; 1160 props=trie.get(c); 1161 if(!propsHasException(props)) { 1162 if(getTypeFromProps(props)==LOWER) { 1163 result=c+getDelta(props); 1164 } 1165 } else { 1166 int excOffset=getExceptionsOffset(props), excOffset2; 1167 int excWord=exceptions.charAt(excOffset++); 1168 int full, index; 1169 1170 excOffset2=excOffset; 1171 1172 if((excWord&EXC_CONDITIONAL_SPECIAL)!=0) { 1173 /* use hardcoded conditions and mappings */ 1174 if(loc==LOC_TURKISH && c==0x69) { 1175 /* 1176 # Turkish and Azeri 1177 1178 # I and i-dotless; I-dot and i are case pairs in Turkish and Azeri 1179 # The following rules handle those cases. 1180 1181 # When uppercasing, i turns into a dotted capital I 1182 1183 0069; 0069; 0130; 0130; tr; # LATIN SMALL LETTER I 1184 0069; 0069; 0130; 0130; az; # LATIN SMALL LETTER I 1185 */ 1186 return 0x130; 1187 } else if(loc==LOC_LITHUANIAN && c==0x307 && isPrecededBySoftDotted(iter)) { 1188 /* 1189 # Lithuanian 1190 1191 # Lithuanian retains the dot in a lowercase i when followed by accents. 1192 1193 # Remove DOT ABOVE after "i" with upper or titlecase 1194 1195 0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE 1196 */ 1197 return 0; /* remove the dot (continue without output) */ 1198 } else { 1199 /* no known conditional special case mapping, use a normal mapping */ 1200 } 1201 } else if(hasSlot(excWord, EXC_FULL_MAPPINGS)) { 1202 long value=getSlotValueAndOffset(excWord, EXC_FULL_MAPPINGS, excOffset); 1203 full=(int)value&0xffff; 1204 1205 /* start of full case mapping strings */ 1206 excOffset=(int)(value>>32)+1; 1207 1208 /* skip the lowercase and case-folding result strings */ 1209 excOffset+=full&FULL_LOWER; 1210 full>>=4; 1211 excOffset+=full&0xf; 1212 full>>=4; 1213 1214 if(upperNotTitle) { 1215 full&=0xf; 1216 } else { 1217 /* skip the uppercase result string */ 1218 excOffset+=full&0xf; 1219 full=(full>>4)&0xf; 1220 } 1221 1222 if(full!=0) { 1223 try { 1224 // append the result string 1225 out.append(exceptions, excOffset, excOffset+full); 1226 1227 /* return the string length */ 1228 return full; 1229 } catch (IOException e) { 1230 throw new ICUUncheckedIOException(e); 1231 } 1232 } 1233 } 1234 1235 if(hasSlot(excWord, EXC_DELTA) && getTypeFromProps(props)==LOWER) { 1236 int delta=getSlotValue(excWord, EXC_DELTA, excOffset2); 1237 return (excWord&EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta; 1238 } 1239 if(!upperNotTitle && hasSlot(excWord, EXC_TITLE)) { 1240 index=EXC_TITLE; 1241 } else if(hasSlot(excWord, EXC_UPPER)) { 1242 /* here, titlecase is same as uppercase */ 1243 index=EXC_UPPER; 1244 } else { 1245 return ~c; 1246 } 1247 result=getSlotValue(excWord, index, excOffset2); 1248 } 1249 1250 return (result==c) ? ~result : result; 1251 } 1252 toFullUpper(int c, ContextIterator iter, Appendable out, int caseLocale)1253 public final int toFullUpper(int c, ContextIterator iter, 1254 Appendable out, 1255 int caseLocale) { 1256 return toUpperOrTitle(c, iter, out, caseLocale, true); 1257 } 1258 toFullTitle(int c, ContextIterator iter, Appendable out, int caseLocale)1259 public final int toFullTitle(int c, ContextIterator iter, 1260 Appendable out, 1261 int caseLocale) { 1262 return toUpperOrTitle(c, iter, out, caseLocale, false); 1263 } 1264 1265 /* case folding ------------------------------------------------------------- */ 1266 1267 /* 1268 * Case folding is similar to lowercasing. 1269 * The result may be a simple mapping, i.e., a single code point, or 1270 * a full mapping, i.e., a string. 1271 * If the case folding for a code point is the same as its simple (1:1) lowercase mapping, 1272 * then only the lowercase mapping is stored. 1273 * 1274 * Some special cases are hardcoded because their conditions cannot be 1275 * parsed and processed from CaseFolding.txt. 1276 * 1277 * Unicode 3.2 CaseFolding.txt specifies for its status field: 1278 1279 # C: common case folding, common mappings shared by both simple and full mappings. 1280 # F: full case folding, mappings that cause strings to grow in length. Multiple characters are separated by spaces. 1281 # S: simple case folding, mappings to single characters where different from F. 1282 # T: special case for uppercase I and dotted uppercase I 1283 # - For non-Turkic languages, this mapping is normally not used. 1284 # - For Turkic languages (tr, az), this mapping can be used instead of the normal mapping for these characters. 1285 # 1286 # Usage: 1287 # A. To do a simple case folding, use the mappings with status C + S. 1288 # B. To do a full case folding, use the mappings with status C + F. 1289 # 1290 # The mappings with status T can be used or omitted depending on the desired case-folding 1291 # behavior. (The default option is to exclude them.) 1292 1293 * Unicode 3.2 has 'T' mappings as follows: 1294 1295 0049; T; 0131; # LATIN CAPITAL LETTER I 1296 0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE 1297 1298 * while the default mappings for these code points are: 1299 1300 0049; C; 0069; # LATIN CAPITAL LETTER I 1301 0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE 1302 1303 * U+0130 has no simple case folding (simple-case-folds to itself). 1304 */ 1305 1306 /** 1307 * Bit mask for getting just the options from a string compare options word 1308 * that are relevant for case folding (of a single string or code point). 1309 * 1310 * Currently only bit 0 for FOLD_CASE_EXCLUDE_SPECIAL_I. 1311 * It is conceivable that at some point we might use one more bit for using uppercase sharp s. 1312 * It is conceivable that at some point we might want the option to use only simple case foldings 1313 * when operating on strings. 1314 * 1315 * @hide draft / provisional / internal are hidden on OHOS 1316 */ 1317 static final int FOLD_CASE_OPTIONS_MASK = 7; 1318 1319 /* return the simple case folding mapping for c */ fold(int c, int options)1320 public final int fold(int c, int options) { 1321 int props=trie.get(c); 1322 if(!propsHasException(props)) { 1323 if(isUpperOrTitleFromProps(props)) { 1324 c+=getDelta(props); 1325 } 1326 } else { 1327 int excOffset=getExceptionsOffset(props); 1328 int excWord=exceptions.charAt(excOffset++); 1329 int index; 1330 if((excWord&EXC_CONDITIONAL_FOLD)!=0) { 1331 /* special case folding mappings, hardcoded */ 1332 if((options&FOLD_CASE_OPTIONS_MASK)==UCharacter.FOLD_CASE_DEFAULT) { 1333 /* default mappings */ 1334 if(c==0x49) { 1335 /* 0049; C; 0069; # LATIN CAPITAL LETTER I */ 1336 return 0x69; 1337 } else if(c==0x130) { 1338 /* no simple case folding for U+0130 */ 1339 return c; 1340 } 1341 } else { 1342 /* Turkic mappings */ 1343 if(c==0x49) { 1344 /* 0049; T; 0131; # LATIN CAPITAL LETTER I */ 1345 return 0x131; 1346 } else if(c==0x130) { 1347 /* 0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE */ 1348 return 0x69; 1349 } 1350 } 1351 } 1352 if((excWord&EXC_NO_SIMPLE_CASE_FOLDING)!=0) { 1353 return c; 1354 } 1355 if(hasSlot(excWord, EXC_DELTA) && isUpperOrTitleFromProps(props)) { 1356 int delta=getSlotValue(excWord, EXC_DELTA, excOffset); 1357 return (excWord&EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta; 1358 } 1359 if(hasSlot(excWord, EXC_FOLD)) { 1360 index=EXC_FOLD; 1361 } else if(hasSlot(excWord, EXC_LOWER)) { 1362 index=EXC_LOWER; 1363 } else { 1364 return c; 1365 } 1366 c=getSlotValue(excWord, index, excOffset); 1367 } 1368 return c; 1369 } 1370 1371 /* 1372 * Issue for canonical caseless match (UAX #21): 1373 * Turkic casefolding (using "T" mappings in CaseFolding.txt) does not preserve 1374 * canonical equivalence, unlike default-option casefolding. 1375 * For example, I-grave and I + grave fold to strings that are not canonically 1376 * equivalent. 1377 * For more details, see the comment in unorm_compare() in unorm.cpp 1378 * and the intermediate prototype changes for Jitterbug 2021. 1379 * (For example, revision 1.104 of uchar.c and 1.4 of CaseFolding.txt.) 1380 * 1381 * This did not get fixed because it appears that it is not possible to fix 1382 * it for uppercase and lowercase characters (I-grave vs. i-grave) 1383 * together in a way that they still fold to common result strings. 1384 */ 1385 toFullFolding(int c, Appendable out, int options)1386 public final int toFullFolding(int c, Appendable out, int options) { 1387 int result; 1388 int props; 1389 1390 result=c; 1391 props=trie.get(c); 1392 if(!propsHasException(props)) { 1393 if(isUpperOrTitleFromProps(props)) { 1394 result=c+getDelta(props); 1395 } 1396 } else { 1397 int excOffset=getExceptionsOffset(props), excOffset2; 1398 int excWord=exceptions.charAt(excOffset++); 1399 int full, index; 1400 1401 excOffset2=excOffset; 1402 1403 if((excWord&EXC_CONDITIONAL_FOLD)!=0) { 1404 /* use hardcoded conditions and mappings */ 1405 if((options&FOLD_CASE_OPTIONS_MASK)==UCharacter.FOLD_CASE_DEFAULT) { 1406 /* default mappings */ 1407 if(c==0x49) { 1408 /* 0049; C; 0069; # LATIN CAPITAL LETTER I */ 1409 return 0x69; 1410 } else if(c==0x130) { 1411 /* 0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE */ 1412 try { 1413 out.append(iDot); 1414 return 2; 1415 } catch (IOException e) { 1416 throw new ICUUncheckedIOException(e); 1417 } 1418 } 1419 } else { 1420 /* Turkic mappings */ 1421 if(c==0x49) { 1422 /* 0049; T; 0131; # LATIN CAPITAL LETTER I */ 1423 return 0x131; 1424 } else if(c==0x130) { 1425 /* 0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE */ 1426 return 0x69; 1427 } 1428 } 1429 } else if(hasSlot(excWord, EXC_FULL_MAPPINGS)) { 1430 long value=getSlotValueAndOffset(excWord, EXC_FULL_MAPPINGS, excOffset); 1431 full=(int)value&0xffff; 1432 1433 /* start of full case mapping strings */ 1434 excOffset=(int)(value>>32)+1; 1435 1436 /* skip the lowercase result string */ 1437 excOffset+=full&FULL_LOWER; 1438 full=(full>>4)&0xf; 1439 1440 if(full!=0) { 1441 try { 1442 // append the result string 1443 out.append(exceptions, excOffset, excOffset+full); 1444 1445 /* return the string length */ 1446 return full; 1447 } catch (IOException e) { 1448 throw new ICUUncheckedIOException(e); 1449 } 1450 } 1451 } 1452 1453 if((excWord&EXC_NO_SIMPLE_CASE_FOLDING)!=0) { 1454 return ~c; 1455 } 1456 if(hasSlot(excWord, EXC_DELTA) && isUpperOrTitleFromProps(props)) { 1457 int delta=getSlotValue(excWord, EXC_DELTA, excOffset2); 1458 return (excWord&EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta; 1459 } 1460 if(hasSlot(excWord, EXC_FOLD)) { 1461 index=EXC_FOLD; 1462 } else if(hasSlot(excWord, EXC_LOWER)) { 1463 index=EXC_LOWER; 1464 } else { 1465 return ~c; 1466 } 1467 result=getSlotValue(excWord, index, excOffset2); 1468 } 1469 1470 return (result==c) ? ~result : result; 1471 } 1472 1473 /* case mapping properties API ---------------------------------------------- */ 1474 1475 /* 1476 * We need a StringBuilder for multi-code point output from the 1477 * full case mapping functions. However, we do not actually use that output, 1478 * we just check whether the input character was mapped to anything else. 1479 * We use a shared StringBuilder to avoid allocating a new one in each call. 1480 * We remove its contents each time so that it does not grow large over time. 1481 * 1482 * @internal 1483 */ 1484 public static final StringBuilder dummyStringBuilder = new StringBuilder(); 1485 hasBinaryProperty(int c, int which)1486 public final boolean hasBinaryProperty(int c, int which) { 1487 switch(which) { 1488 case UProperty.LOWERCASE: 1489 return LOWER==getType(c); 1490 case UProperty.UPPERCASE: 1491 return UPPER==getType(c); 1492 case UProperty.SOFT_DOTTED: 1493 return isSoftDotted(c); 1494 case UProperty.CASE_SENSITIVE: 1495 return isCaseSensitive(c); 1496 case UProperty.CASED: 1497 return NONE!=getType(c); 1498 case UProperty.CASE_IGNORABLE: 1499 return (getTypeOrIgnorable(c)>>2)!=0; 1500 /* 1501 * Note: The following Changes_When_Xyz are defined as testing whether 1502 * the NFD form of the input changes when Xyz-case-mapped. 1503 * However, this simpler implementation of these properties, 1504 * ignoring NFD, passes the tests. 1505 * The implementation needs to be changed if the tests start failing. 1506 * When that happens, optimizations should be used to work with the 1507 * per-single-code point ucase_toFullXyz() functions unless 1508 * the NFD form has more than one code point, 1509 * and the property starts set needs to be the union of the 1510 * start sets for normalization and case mappings. 1511 */ 1512 case UProperty.CHANGES_WHEN_LOWERCASED: 1513 dummyStringBuilder.setLength(0); 1514 return toFullLower(c, null, dummyStringBuilder, LOC_ROOT)>=0; 1515 case UProperty.CHANGES_WHEN_UPPERCASED: 1516 dummyStringBuilder.setLength(0); 1517 return toFullUpper(c, null, dummyStringBuilder, LOC_ROOT)>=0; 1518 case UProperty.CHANGES_WHEN_TITLECASED: 1519 dummyStringBuilder.setLength(0); 1520 return toFullTitle(c, null, dummyStringBuilder, LOC_ROOT)>=0; 1521 /* case UProperty.CHANGES_WHEN_CASEFOLDED: -- in UCharacterProperty.java */ 1522 case UProperty.CHANGES_WHEN_CASEMAPPED: 1523 dummyStringBuilder.setLength(0); 1524 return 1525 toFullLower(c, null, dummyStringBuilder, LOC_ROOT)>=0 || 1526 toFullUpper(c, null, dummyStringBuilder, LOC_ROOT)>=0 || 1527 toFullTitle(c, null, dummyStringBuilder, LOC_ROOT)>=0; 1528 default: 1529 return false; 1530 } 1531 } 1532 1533 // data members -------------------------------------------------------- *** 1534 private int indexes[]; 1535 private String exceptions; 1536 private char unfold[]; 1537 1538 private Trie2_16 trie; 1539 1540 // data format constants ----------------------------------------------- *** 1541 private static final String DATA_NAME="ucase"; 1542 private static final String DATA_TYPE="icu"; 1543 private static final String DATA_FILE_NAME=DATA_NAME+"."+DATA_TYPE; 1544 1545 /* format "cAsE" */ 1546 private static final int FMT=0x63415345; 1547 1548 /* indexes into indexes[] */ 1549 //private static final int IX_INDEX_TOP=0; 1550 //private static final int IX_LENGTH=1; 1551 private static final int IX_TRIE_SIZE=2; 1552 private static final int IX_EXC_LENGTH=3; 1553 private static final int IX_UNFOLD_LENGTH=4; 1554 1555 //private static final int IX_MAX_FULL_LENGTH=15; 1556 private static final int IX_TOP=16; 1557 1558 // definitions for 16-bit case properties word ------------------------- *** 1559 getTrie()1560 static Trie2_16 getTrie() { 1561 return INSTANCE.trie; 1562 } 1563 1564 /* 2-bit constants for types of cased characters */ 1565 public static final int TYPE_MASK=3; 1566 public static final int NONE=0; 1567 public static final int LOWER=1; 1568 public static final int UPPER=2; 1569 public static final int TITLE=3; 1570 1571 /** @return NONE, LOWER, UPPER, TITLE */ getTypeFromProps(int props)1572 static final int getTypeFromProps(int props) { 1573 return props&TYPE_MASK; 1574 } 1575 1576 /** @return like getTypeFromProps() but also sets IGNORABLE if props indicate case-ignorable */ getTypeAndIgnorableFromProps(int props)1577 private static final int getTypeAndIgnorableFromProps(int props) { 1578 return props&7; 1579 } 1580 isUpperOrTitleFromProps(int props)1581 static final boolean isUpperOrTitleFromProps(int props) { 1582 return (props & 2) != 0; 1583 } 1584 1585 static final int IGNORABLE=4; 1586 private static final int EXCEPTION= 8; 1587 private static final int SENSITIVE= 0x10; 1588 1589 private static final int DOT_MASK= 0x60; 1590 //private static final int NO_DOT= 0; /* normal characters with cc=0 */ 1591 private static final int SOFT_DOTTED= 0x20; /* soft-dotted characters with cc=0 */ 1592 private static final int ABOVE= 0x40; /* "above" accents with cc=230 */ 1593 private static final int OTHER_ACCENT= 0x60; /* other accent character (0<cc!=230) */ 1594 1595 /* no exception: bits 15..7 are a 9-bit signed case mapping delta */ 1596 private static final int DELTA_SHIFT= 7; 1597 //private static final int DELTA_MASK= 0xff80; 1598 //private static final int MAX_DELTA= 0xff; 1599 //private static final int MIN_DELTA= (-MAX_DELTA-1); 1600 getDelta(int props)1601 static final int getDelta(int props) { 1602 return (short)props>>DELTA_SHIFT; 1603 } 1604 1605 /* exception: bits 15..4 are an unsigned 12-bit index into the exceptions array */ 1606 private static final int EXC_SHIFT= 4; 1607 //private static final int EXC_MASK= 0xfff0; 1608 //private static final int MAX_EXCEPTIONS=((EXC_MASK>>EXC_SHIFT)+1); 1609 1610 /* definitions for 16-bit main exceptions word ------------------------------ */ 1611 1612 /* first 8 bits indicate values in optional slots */ 1613 private static final int EXC_LOWER=0; 1614 private static final int EXC_FOLD=1; 1615 private static final int EXC_UPPER=2; 1616 private static final int EXC_TITLE=3; 1617 private static final int EXC_DELTA=4; 1618 //private static final int EXC_5=5; /* reserved */ 1619 private static final int EXC_CLOSURE=6; 1620 private static final int EXC_FULL_MAPPINGS=7; 1621 //private static final int EXC_ALL_SLOTS=8; /* one past the last slot */ 1622 1623 /* each slot is 2 uint16_t instead of 1 */ 1624 private static final int EXC_DOUBLE_SLOTS= 0x100; 1625 1626 private static final int EXC_NO_SIMPLE_CASE_FOLDING=0x200; 1627 private static final int EXC_DELTA_IS_NEGATIVE=0x400; 1628 private static final int EXC_SENSITIVE=0x800; 1629 1630 /* EXC_DOT_MASK=DOT_MASK<<EXC_DOT_SHIFT */ 1631 private static final int EXC_DOT_SHIFT=7; 1632 1633 /* normally stored in the main word, but pushed out for larger exception indexes */ 1634 //private static final int EXC_DOT_MASK= 0x3000; 1635 //private static final int EXC_NO_DOT= 0; 1636 //private static final int EXC_SOFT_DOTTED= 0x1000; 1637 //private static final int EXC_ABOVE= 0x2000; /* "above" accents with cc=230 */ 1638 //private static final int EXC_OTHER_ACCENT= 0x3000; /* other character (0<cc!=230) */ 1639 1640 /* complex/conditional mappings */ 1641 private static final int EXC_CONDITIONAL_SPECIAL= 0x4000; 1642 private static final int EXC_CONDITIONAL_FOLD= 0x8000; 1643 1644 /* definitions for lengths word for full case mappings */ 1645 private static final int FULL_LOWER= 0xf; 1646 //private static final int FULL_FOLDING= 0xf0; 1647 //private static final int FULL_UPPER= 0xf00; 1648 //private static final int FULL_TITLE= 0xf000; 1649 1650 /* maximum lengths */ 1651 //private static final int FULL_MAPPINGS_MAX_LENGTH=4*0xf; 1652 private static final int CLOSURE_MAX_LENGTH=0xf; 1653 1654 /* constants for reverse case folding ("unfold") data */ 1655 private static final int UNFOLD_ROWS=0; 1656 private static final int UNFOLD_ROW_WIDTH=1; 1657 private static final int UNFOLD_STRING_WIDTH=2; 1658 1659 /* 1660 * public singleton instance 1661 */ 1662 public static final UCaseProps INSTANCE; 1663 1664 // This static initializer block must be placed after 1665 // other static member initialization 1666 static { 1667 try { 1668 INSTANCE = new UCaseProps(); 1669 } catch (IOException e) { 1670 throw new ICUUncheckedIOException(e); 1671 } 1672 } 1673 } 1674