1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // © 2016 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html 4 /* 5 ******************************************************************************* 6 * 7 * Copyright (C) 2004-2015, International Business Machines 8 * Corporation and others. All Rights Reserved. 9 * 10 ******************************************************************************* 11 * file name: UCaseProps.java 12 * encoding: US-ASCII 13 * tab size: 8 (not used) 14 * indentation:4 15 * 16 * created on: 2005jan29 17 * created by: Markus W. Scherer 18 * 19 * Low-level Unicode character/string case mapping code. 20 * Java port of ucase.h/.c. 21 */ 22 23 package android.icu.impl; 24 25 import java.io.IOException; 26 import java.nio.ByteBuffer; 27 import java.util.Iterator; 28 import java.util.Locale; 29 30 import android.icu.lang.UCharacter; 31 import android.icu.lang.UProperty; 32 import android.icu.text.UTF16; 33 import android.icu.text.UnicodeSet; 34 import android.icu.util.ICUUncheckedIOException; 35 import android.icu.util.ULocale; 36 37 /** 38 * @hide Only a subset of ICU is exposed in Android 39 */ 40 public final class UCaseProps { 41 42 // constructors etc. --------------------------------------------------- *** 43 44 // port of ucase_openProps() UCaseProps()45 private UCaseProps() throws IOException { 46 ByteBuffer bytes=ICUBinary.getRequiredData(DATA_FILE_NAME); 47 readData(bytes); 48 } 49 readData(ByteBuffer bytes)50 private final void readData(ByteBuffer bytes) throws IOException { 51 // read the header 52 ICUBinary.readHeader(bytes, FMT, new IsAcceptable()); 53 54 // read indexes[] 55 int count=bytes.getInt(); 56 if(count<IX_TOP) { 57 throw new IOException("indexes[0] too small in "+DATA_FILE_NAME); 58 } 59 indexes=new int[count]; 60 61 indexes[0]=count; 62 for(int i=1; i<count; ++i) { 63 indexes[i]=bytes.getInt(); 64 } 65 66 // read the trie 67 trie=Trie2_16.createFromSerialized(bytes); 68 int expectedTrieLength=indexes[IX_TRIE_SIZE]; 69 int trieLength=trie.getSerializedLength(); 70 if(trieLength>expectedTrieLength) { 71 throw new IOException(DATA_FILE_NAME+": not enough bytes for the trie"); 72 } 73 // skip padding after trie bytes 74 ICUBinary.skipBytes(bytes, expectedTrieLength-trieLength); 75 76 // read exceptions[] 77 count=indexes[IX_EXC_LENGTH]; 78 if(count>0) { 79 exceptions=ICUBinary.getString(bytes, count, 0); 80 } 81 82 // read unfold[] 83 count=indexes[IX_UNFOLD_LENGTH]; 84 if(count>0) { 85 unfold=ICUBinary.getChars(bytes, count, 0); 86 } 87 } 88 89 // implement ICUBinary.Authenticate 90 private final static class IsAcceptable implements ICUBinary.Authenticate { 91 @Override isDataVersionAcceptable(byte version[])92 public boolean isDataVersionAcceptable(byte version[]) { 93 return version[0]==4; 94 } 95 } 96 97 // set of property starts for UnicodeSet ------------------------------- *** 98 addPropertyStarts(UnicodeSet set)99 public final void addPropertyStarts(UnicodeSet set) { 100 /* add the start code point of each same-value range of the trie */ 101 Iterator<Trie2.Range> trieIterator=trie.iterator(); 102 Trie2.Range range; 103 while(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) { 104 set.add(range.startCodePoint); 105 } 106 107 /* add code points with hardcoded properties, plus the ones following them */ 108 109 /* (none right now, see comment below) */ 110 111 /* 112 * Omit code points with hardcoded specialcasing properties 113 * because we do not build property UnicodeSets for them right now. 114 */ 115 } 116 117 // data access primitives ---------------------------------------------- *** getExceptionsOffset(int props)118 private static final int getExceptionsOffset(int props) { 119 return props>>EXC_SHIFT; 120 } 121 propsHasException(int props)122 static final boolean propsHasException(int props) { 123 return (props&EXCEPTION)!=0; 124 } 125 126 /* number of bits in an 8-bit integer value */ 127 private static final byte flagsOffset[/*256*/]={ 128 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 129 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 130 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 131 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 132 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 133 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 134 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 135 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 136 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 137 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 138 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 139 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 140 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 141 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 142 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 143 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 144 }; 145 hasSlot(int flags, int index)146 private static final boolean hasSlot(int flags, int index) { 147 return (flags&(1<<index))!=0; 148 } slotOffset(int flags, int index)149 private static final byte slotOffset(int flags, int index) { 150 return flagsOffset[flags&((1<<index)-1)]; 151 } 152 153 /* 154 * Get the value of an optional-value slot where hasSlot(excWord, index). 155 * 156 * @param excWord (in) initial exceptions word 157 * @param index (in) desired slot index 158 * @param excOffset (in) offset into exceptions[] after excWord=exceptions.charAt(excOffset++); 159 * @return bits 31..0: slot value 160 * 63..32: modified excOffset, moved to the last char of the value, use +1 for beginning of next slot 161 */ getSlotValueAndOffset(int excWord, int index, int excOffset)162 private final long getSlotValueAndOffset(int excWord, int index, int excOffset) { 163 long value; 164 if((excWord&EXC_DOUBLE_SLOTS)==0) { 165 excOffset+=slotOffset(excWord, index); 166 value=exceptions.charAt(excOffset); 167 } else { 168 excOffset+=2*slotOffset(excWord, index); 169 value=exceptions.charAt(excOffset++); 170 value=(value<<16)|exceptions.charAt(excOffset); 171 } 172 return value |((long)excOffset<<32); 173 } 174 175 /* same as getSlotValueAndOffset() but does not return the slot offset */ getSlotValue(int excWord, int index, int excOffset)176 private final int getSlotValue(int excWord, int index, int excOffset) { 177 int value; 178 if((excWord&EXC_DOUBLE_SLOTS)==0) { 179 excOffset+=slotOffset(excWord, index); 180 value=exceptions.charAt(excOffset); 181 } else { 182 excOffset+=2*slotOffset(excWord, index); 183 value=exceptions.charAt(excOffset++); 184 value=(value<<16)|exceptions.charAt(excOffset); 185 } 186 return value; 187 } 188 189 // simple case mappings ------------------------------------------------ *** 190 tolower(int c)191 public final int tolower(int c) { 192 int props=trie.get(c); 193 if(!propsHasException(props)) { 194 if(isUpperOrTitleFromProps(props)) { 195 c+=getDelta(props); 196 } 197 } else { 198 int excOffset=getExceptionsOffset(props); 199 int excWord=exceptions.charAt(excOffset++); 200 if(hasSlot(excWord, EXC_DELTA) && isUpperOrTitleFromProps(props)) { 201 int delta=getSlotValue(excWord, EXC_DELTA, excOffset); 202 return (excWord&EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta; 203 } 204 if(hasSlot(excWord, EXC_LOWER)) { 205 c=getSlotValue(excWord, EXC_LOWER, excOffset); 206 } 207 } 208 return c; 209 } 210 toupper(int c)211 public final int toupper(int c) { 212 int props=trie.get(c); 213 if(!propsHasException(props)) { 214 if(getTypeFromProps(props)==LOWER) { 215 c+=getDelta(props); 216 } 217 } else { 218 int excOffset=getExceptionsOffset(props); 219 int excWord=exceptions.charAt(excOffset++); 220 if(hasSlot(excWord, EXC_DELTA) && getTypeFromProps(props)==LOWER) { 221 int delta=getSlotValue(excWord, EXC_DELTA, excOffset); 222 return (excWord&EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta; 223 } 224 if(hasSlot(excWord, EXC_UPPER)) { 225 c=getSlotValue(excWord, EXC_UPPER, excOffset); 226 } 227 } 228 return c; 229 } 230 totitle(int c)231 public final int totitle(int c) { 232 int props=trie.get(c); 233 if(!propsHasException(props)) { 234 if(getTypeFromProps(props)==LOWER) { 235 c+=getDelta(props); 236 } 237 } else { 238 int excOffset=getExceptionsOffset(props); 239 int excWord=exceptions.charAt(excOffset++); 240 if(hasSlot(excWord, EXC_DELTA) && getTypeFromProps(props)==LOWER) { 241 int delta=getSlotValue(excWord, EXC_DELTA, excOffset); 242 return (excWord&EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta; 243 } 244 int index; 245 if(hasSlot(excWord, EXC_TITLE)) { 246 index=EXC_TITLE; 247 } else if(hasSlot(excWord, EXC_UPPER)) { 248 index=EXC_UPPER; 249 } else { 250 return c; 251 } 252 c=getSlotValue(excWord, index, excOffset); 253 } 254 return c; 255 } 256 257 /** 258 * Adds all simple case mappings and the full case folding for c to sa, 259 * and also adds special case closure mappings. 260 * c itself is not added. 261 * For example, the mappings 262 * - for s include long s 263 * - for sharp s include ss 264 * - for k include the Kelvin sign 265 */ addCaseClosure(int c, UnicodeSet set)266 public final void addCaseClosure(int c, UnicodeSet set) { 267 /* 268 * Hardcode the case closure of i and its relatives and ignore the 269 * data file data for these characters. 270 * The Turkic dotless i and dotted I with their case mapping conditions 271 * and case folding option make the related characters behave specially. 272 * This code matches their closure behavior to their case folding behavior. 273 */ 274 275 switch(c) { 276 case 0x49: 277 /* regular i and I are in one equivalence class */ 278 set.add(0x69); 279 return; 280 case 0x69: 281 set.add(0x49); 282 return; 283 case 0x130: 284 /* dotted I is in a class with <0069 0307> (for canonical equivalence with <0049 0307>) */ 285 set.add(iDot); 286 return; 287 case 0x131: 288 /* dotless i is in a class by itself */ 289 return; 290 default: 291 /* otherwise use the data file data */ 292 break; 293 } 294 295 int props=trie.get(c); 296 if(!propsHasException(props)) { 297 if(getTypeFromProps(props)!=NONE) { 298 /* add the one simple case mapping, no matter what type it is */ 299 int delta=getDelta(props); 300 if(delta!=0) { 301 set.add(c+delta); 302 } 303 } 304 } else { 305 /* 306 * c has exceptions, so there may be multiple simple and/or 307 * full case mappings. Add them all. 308 */ 309 int excOffset0, excOffset=getExceptionsOffset(props); 310 int closureOffset; 311 int excWord=exceptions.charAt(excOffset++); 312 int index, closureLength, fullLength, length; 313 314 excOffset0=excOffset; 315 316 /* add all simple case mappings */ 317 for(index=EXC_LOWER; index<=EXC_TITLE; ++index) { 318 if(hasSlot(excWord, index)) { 319 excOffset=excOffset0; 320 c=getSlotValue(excWord, index, excOffset); 321 set.add(c); 322 } 323 } 324 if(hasSlot(excWord, EXC_DELTA)) { 325 excOffset=excOffset0; 326 int delta=getSlotValue(excWord, EXC_DELTA, excOffset); 327 set.add((excWord&EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta); 328 } 329 330 /* get the closure string pointer & length */ 331 if(hasSlot(excWord, EXC_CLOSURE)) { 332 excOffset=excOffset0; 333 long value=getSlotValueAndOffset(excWord, EXC_CLOSURE, excOffset); 334 closureLength=(int)value&CLOSURE_MAX_LENGTH; /* higher bits are reserved */ 335 closureOffset=(int)(value>>32)+1; /* behind this slot, unless there are full case mappings */ 336 } else { 337 closureLength=0; 338 closureOffset=0; 339 } 340 341 /* add the full case folding */ 342 if(hasSlot(excWord, EXC_FULL_MAPPINGS)) { 343 excOffset=excOffset0; 344 long value=getSlotValueAndOffset(excWord, EXC_FULL_MAPPINGS, excOffset); 345 fullLength=(int)value; 346 347 /* start of full case mapping strings */ 348 excOffset=(int)(value>>32)+1; 349 350 fullLength&=0xffff; /* bits 16 and higher are reserved */ 351 352 /* skip the lowercase result string */ 353 excOffset+=fullLength&FULL_LOWER; 354 fullLength>>=4; 355 356 /* add the full case folding string */ 357 length=fullLength&0xf; 358 if(length!=0) { 359 set.add(exceptions.substring(excOffset, excOffset+length)); 360 excOffset+=length; 361 } 362 363 /* skip the uppercase and titlecase strings */ 364 fullLength>>=4; 365 excOffset+=fullLength&0xf; 366 fullLength>>=4; 367 excOffset+=fullLength; 368 369 closureOffset=excOffset; /* behind full case mappings */ 370 } 371 372 /* add each code point in the closure string */ 373 int limit=closureOffset+closureLength; 374 for(index=closureOffset; index<limit; index+=UTF16.getCharCount(c)) { 375 c=exceptions.codePointAt(index); 376 set.add(c); 377 } 378 } 379 } 380 381 /* 382 * compare s, which has a length, with t=unfold[unfoldOffset..], which has a maximum length or is NUL-terminated 383 * must be s.length()>0 and max>0 and s.length()<=max 384 */ strcmpMax(String s, int unfoldOffset, int max)385 private final int strcmpMax(String s, int unfoldOffset, int max) { 386 int i1, length, c1, c2; 387 388 length=s.length(); 389 max-=length; /* we require length<=max, so no need to decrement max in the loop */ 390 i1=0; 391 do { 392 c1=s.charAt(i1++); 393 c2=unfold[unfoldOffset++]; 394 if(c2==0) { 395 return 1; /* reached the end of t but not of s */ 396 } 397 c1-=c2; 398 if(c1!=0) { 399 return c1; /* return difference result */ 400 } 401 } while(--length>0); 402 /* ends with length==0 */ 403 404 if(max==0 || unfold[unfoldOffset]==0) { 405 return 0; /* equal to length of both strings */ 406 } else { 407 return -max; /* return lengh difference */ 408 } 409 } 410 411 /** 412 * Maps the string to single code points and adds the associated case closure 413 * mappings. 414 * The string is mapped to code points if it is their full case folding string. 415 * In other words, this performs a reverse full case folding and then 416 * adds the case closure items of the resulting code points. 417 * If the string is found and its closure applied, then 418 * the string itself is added as well as part of its code points' closure. 419 * 420 * @return true if the string was found 421 */ addStringCaseClosure(String s, UnicodeSet set)422 public final boolean addStringCaseClosure(String s, UnicodeSet set) { 423 int i, length, start, limit, result, unfoldOffset, unfoldRows, unfoldRowWidth, unfoldStringWidth; 424 425 if(unfold==null || s==null) { 426 return false; /* no reverse case folding data, or no string */ 427 } 428 length=s.length(); 429 if(length<=1) { 430 /* the string is too short to find any match */ 431 /* 432 * more precise would be: 433 * if(!u_strHasMoreChar32Than(s, length, 1)) 434 * but this does not make much practical difference because 435 * a single supplementary code point would just not be found 436 */ 437 return false; 438 } 439 440 unfoldRows=unfold[UNFOLD_ROWS]; 441 unfoldRowWidth=unfold[UNFOLD_ROW_WIDTH]; 442 unfoldStringWidth=unfold[UNFOLD_STRING_WIDTH]; 443 //unfoldCPWidth=unfoldRowWidth-unfoldStringWidth; 444 445 if(length>unfoldStringWidth) { 446 /* the string is too long to find any match */ 447 return false; 448 } 449 450 /* do a binary search for the string */ 451 start=0; 452 limit=unfoldRows; 453 while(start<limit) { 454 i=(start+limit)/2; 455 unfoldOffset=((i+1)*unfoldRowWidth); // +1 to skip the header values above 456 result=strcmpMax(s, unfoldOffset, unfoldStringWidth); 457 458 if(result==0) { 459 /* found the string: add each code point, and its case closure */ 460 int c; 461 462 for(i=unfoldStringWidth; i<unfoldRowWidth && unfold[unfoldOffset+i]!=0; i+=UTF16.getCharCount(c)) { 463 c=UTF16.charAt(unfold, unfoldOffset, unfold.length, i); 464 set.add(c); 465 addCaseClosure(c, set); 466 } 467 return true; 468 } else if(result<0) { 469 limit=i; 470 } else /* result>0 */ { 471 start=i+1; 472 } 473 } 474 475 return false; /* string not found */ 476 } 477 478 /** @return NONE, LOWER, UPPER, TITLE */ getType(int c)479 public final int getType(int c) { 480 return getTypeFromProps(trie.get(c)); 481 } 482 483 /** @return like getType() but also sets IGNORABLE if c is case-ignorable */ getTypeOrIgnorable(int c)484 public final int getTypeOrIgnorable(int c) { 485 return getTypeAndIgnorableFromProps(trie.get(c)); 486 } 487 488 /** @return NO_DOT, SOFT_DOTTED, ABOVE, OTHER_ACCENT */ getDotType(int c)489 public final int getDotType(int c) { 490 int props=trie.get(c); 491 if(!propsHasException(props)) { 492 return props&DOT_MASK; 493 } else { 494 return (exceptions.charAt(getExceptionsOffset(props))>>EXC_DOT_SHIFT)&DOT_MASK; 495 } 496 } 497 isSoftDotted(int c)498 public final boolean isSoftDotted(int c) { 499 return getDotType(c)==SOFT_DOTTED; 500 } 501 isCaseSensitive(int c)502 public final boolean isCaseSensitive(int c) { 503 int props=trie.get(c); 504 if(!propsHasException(props)) { 505 return (props&SENSITIVE)!=0; 506 } else { 507 return (exceptions.charAt(getExceptionsOffset(props))&EXC_SENSITIVE)!=0; 508 } 509 } 510 511 // string casing ------------------------------------------------------- *** 512 513 /* 514 * These internal functions form the core of string case mappings. 515 * They map single code points to result code points or strings and take 516 * all necessary conditions (context, locale ID, options) into account. 517 * 518 * They do not iterate over the source or write to the destination 519 * so that the same functions are useful for non-standard string storage, 520 * such as in a Replaceable (for Transliterator) or UTF-8/32 strings etc. 521 * For the same reason, the "surrounding text" context is passed in as a 522 * ContextIterator which does not make any assumptions about 523 * the underlying storage. 524 * 525 * This section contains helper functions that check for conditions 526 * in the input text surrounding the current code point 527 * according to SpecialCasing.txt. 528 * 529 * Each helper function gets the index 530 * - after the current code point if it looks at following text 531 * - before the current code point if it looks at preceding text 532 * 533 * Unicode 3.2 UAX 21 "Case Mappings" defines the conditions as follows: 534 * 535 * Final_Sigma 536 * C is preceded by a sequence consisting of 537 * a cased letter and a case-ignorable sequence, 538 * and C is not followed by a sequence consisting of 539 * an ignorable sequence and then a cased letter. 540 * 541 * More_Above 542 * C is followed by one or more characters of combining class 230 (ABOVE) 543 * in the combining character sequence. 544 * 545 * After_Soft_Dotted 546 * The last preceding character with combining class of zero before C 547 * was Soft_Dotted, 548 * and there is no intervening combining character class 230 (ABOVE). 549 * 550 * Before_Dot 551 * C is followed by combining dot above (U+0307). 552 * Any sequence of characters with a combining class that is neither 0 nor 230 553 * may intervene between the current character and the combining dot above. 554 * 555 * The erratum from 2002-10-31 adds the condition 556 * 557 * After_I 558 * The last preceding base character was an uppercase I, and there is no 559 * intervening combining character class 230 (ABOVE). 560 * 561 * (See Jitterbug 2344 and the comments on After_I below.) 562 * 563 * Helper definitions in Unicode 3.2 UAX 21: 564 * 565 * D1. A character C is defined to be cased 566 * if it meets any of the following criteria: 567 * 568 * - The general category of C is Titlecase Letter (Lt) 569 * - In [CoreProps], C has one of the properties Uppercase, or Lowercase 570 * - Given D = NFD(C), then it is not the case that: 571 * D = UCD_lower(D) = UCD_upper(D) = UCD_title(D) 572 * (This third criterium does not add any characters to the list 573 * for Unicode 3.2. Ignored.) 574 * 575 * D2. A character C is defined to be case-ignorable 576 * if it meets either of the following criteria: 577 * 578 * - The general category of C is 579 * Nonspacing Mark (Mn), or Enclosing Mark (Me), or Format Control (Cf), or 580 * Letter Modifier (Lm), or Symbol Modifier (Sk) 581 * - C is one of the following characters 582 * U+0027 APOSTROPHE 583 * U+00AD SOFT HYPHEN (SHY) 584 * U+2019 RIGHT SINGLE QUOTATION MARK 585 * (the preferred character for apostrophe) 586 * 587 * D3. A case-ignorable sequence is a sequence of 588 * zero or more case-ignorable characters. 589 */ 590 591 /** 592 * Iterator for string case mappings, which need to look at the 593 * context (surrounding text) of a given character for conditional mappings. 594 * 595 * The iterator only needs to go backward or forward away from the 596 * character in question. It does not use any indexes on this interface. 597 * It does not support random access or an arbitrary change of 598 * iteration direction. 599 * 600 * The code point being case-mapped itself is never returned by 601 * this iterator. 602 * @hide Only a subset of ICU is exposed in Android 603 */ 604 public interface ContextIterator { 605 /** 606 * Reset the iterator for forward or backward iteration. 607 * @param dir >0: Begin iterating forward from the first code point 608 * after the one that is being case-mapped. 609 * <0: Begin iterating backward from the first code point 610 * before the one that is being case-mapped. 611 */ reset(int dir)612 public void reset(int dir); 613 /** 614 * Iterate and return the next code point, moving in the direction 615 * determined by the reset() call. 616 * @return Next code point, or <0 when the iteration is done. 617 */ next()618 public int next(); 619 } 620 621 /** 622 * Fast case mapping data for ASCII/Latin. 623 * Linear arrays of delta bytes: 0=no mapping; EXC=exception. 624 * Deltas must not cross the ASCII boundary, or else they cannot be easily used 625 * in simple UTF-8 code. 626 */ 627 static final class LatinCase { 628 /** Case mapping/folding data for code points up to U+017F. */ 629 static final char LIMIT = 0x180; 630 /** U+017F case-folds and uppercases crossing the ASCII boundary. */ 631 static final char LONG_S = 0x17f; 632 /** Exception: Complex mapping, or too-large delta. */ 633 static final byte EXC = -0x80; 634 635 /** Deltas for lowercasing for most locales, and default case folding. */ 636 static final byte[] TO_LOWER_NORMAL = { 637 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 638 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 639 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 640 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 641 642 0, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 643 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 0, 0, 0, 0, 0, 644 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 645 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 646 647 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 648 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 649 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 650 0, 0, 0, 0, 0, EXC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 651 652 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 653 32, 32, 32, 32, 32, 32, 32, 0, 32, 32, 32, 32, 32, 32, 32, EXC, 654 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 655 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 656 657 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 658 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 659 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 660 EXC, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 661 662 0, 1, 0, 1, 0, 1, 0, 1, 0, EXC, 1, 0, 1, 0, 1, 0, 663 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 664 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 665 1, 0, 1, 0, 1, 0, 1, 0, -121, 1, 0, 1, 0, 1, 0, EXC 666 }; 667 668 /** Deltas for lowercasing for tr/az/lt, and Turkic case folding. */ 669 static final byte[] TO_LOWER_TR_LT = { 670 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 671 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 672 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 673 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 674 675 0, 32, 32, 32, 32, 32, 32, 32, 32, EXC, EXC, 32, 32, 32, 32, 32, 676 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 0, 0, 0, 0, 0, 677 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 678 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 679 680 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 681 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 682 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 683 0, 0, 0, 0, 0, EXC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 684 685 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, EXC, EXC, 32, 32, 686 32, 32, 32, 32, 32, 32, 32, 0, 32, 32, 32, 32, 32, 32, 32, EXC, 687 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 688 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 689 690 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 691 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 692 1, 0, 1, 0, 1, 0, 1, 0, EXC, 0, 1, 0, 1, 0, EXC, 0, 693 EXC, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 694 695 0, 1, 0, 1, 0, 1, 0, 1, 0, EXC, 1, 0, 1, 0, 1, 0, 696 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 697 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 698 1, 0, 1, 0, 1, 0, 1, 0, -121, 1, 0, 1, 0, 1, 0, EXC 699 }; 700 701 /** Deltas for uppercasing for most locales. */ 702 static final byte[] TO_UPPER_NORMAL = { 703 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 704 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 705 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 706 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 707 708 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 709 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 710 0, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, 711 -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, 0, 0, 0, 0, 0, 712 713 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 714 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 715 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 716 0, 0, 0, 0, 0, EXC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 717 718 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 719 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, EXC, 720 -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, 721 -32, -32, -32, -32, -32, -32, -32, 0, -32, -32, -32, -32, -32, -32, -32, 121, 722 723 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 724 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 725 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 726 0, EXC, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, 0, 727 728 -1, 0, -1, 0, -1, 0, -1, 0, -1, EXC, 0, -1, 0, -1, 0, -1, 729 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 730 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 731 0, -1, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, EXC 732 }; 733 734 /** Deltas for uppercasing for tr/az. */ 735 static final byte[] TO_UPPER_TR = { 736 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 737 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 738 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 739 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 740 741 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 742 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 743 0, -32, -32, -32, -32, -32, -32, -32, -32, EXC, -32, -32, -32, -32, -32, -32, 744 -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, 0, 0, 0, 0, 0, 745 746 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 747 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 748 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 749 0, 0, 0, 0, 0, EXC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 750 751 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 752 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, EXC, 753 -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, 754 -32, -32, -32, -32, -32, -32, -32, 0, -32, -32, -32, -32, -32, -32, -32, 121, 755 756 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 757 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 758 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 759 0, EXC, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, 0, 760 761 -1, 0, -1, 0, -1, 0, -1, 0, -1, EXC, 0, -1, 0, -1, 0, -1, 762 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 763 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 764 0, -1, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, EXC 765 }; 766 } 767 768 /** 769 * For string case mappings, a single character (a code point) is mapped 770 * either to itself (in which case in-place mapping functions do nothing), 771 * or to another single code point, or to a string. 772 * Aside from the string contents, these are indicated with a single int 773 * value as follows: 774 * 775 * Mapping to self: Negative values (~self instead of -self to support U+0000) 776 * 777 * Mapping to another code point: Positive values >MAX_STRING_LENGTH 778 * 779 * Mapping to a string: The string length (0..MAX_STRING_LENGTH) is 780 * returned. Note that the string result may indeed have zero length. 781 */ 782 public static final int MAX_STRING_LENGTH=0x1f; 783 784 //ivate static final int LOC_UNKNOWN=0; 785 public static final int LOC_ROOT=1; 786 static final int LOC_TURKISH=2; 787 static final int LOC_LITHUANIAN=3; 788 static final int LOC_GREEK=4; 789 public static final int LOC_DUTCH=5; 790 static final int LOC_ARMENIAN=6; 791 getCaseLocale(Locale locale)792 public static final int getCaseLocale(Locale locale) { 793 return getCaseLocale(locale.getLanguage()); 794 } getCaseLocale(ULocale locale)795 public static final int getCaseLocale(ULocale locale) { 796 return getCaseLocale(locale.getLanguage()); 797 } 798 /** Accepts both 2- and 3-letter language subtags. */ getCaseLocale(String language)799 private static final int getCaseLocale(String language) { 800 // Check the subtag length to reduce the number of comparisons 801 // for locales without special behavior. 802 // Fastpath for English "en" which is often used for default (=root locale) case mappings, 803 // and for Chinese "zh": Very common but no special case mapping behavior. 804 if(language.length()==2) { 805 if(language.equals("en") || language.charAt(0)>'t') { 806 return LOC_ROOT; 807 } else if(language.equals("tr") || language.equals("az")) { 808 return LOC_TURKISH; 809 } else if(language.equals("el")) { 810 return LOC_GREEK; 811 } else if(language.equals("lt")) { 812 return LOC_LITHUANIAN; 813 } else if(language.equals("nl")) { 814 return LOC_DUTCH; 815 } else if(language.equals("hy")) { 816 return LOC_ARMENIAN; 817 } 818 } else if(language.length()==3) { 819 if(language.equals("tur") || language.equals("aze")) { 820 return LOC_TURKISH; 821 } else if(language.equals("ell")) { 822 return LOC_GREEK; 823 } else if(language.equals("lit")) { 824 return LOC_LITHUANIAN; 825 } else if(language.equals("nld")) { 826 return LOC_DUTCH; 827 } else if(language.equals("hye")) { // *not* hyw 828 return LOC_ARMENIAN; 829 } 830 } 831 return LOC_ROOT; 832 } 833 834 /* Is followed by {case-ignorable}* cased ? (dir determines looking forward/backward) */ isFollowedByCasedLetter(ContextIterator iter, int dir)835 private final boolean isFollowedByCasedLetter(ContextIterator iter, int dir) { 836 int c; 837 838 if(iter==null) { 839 return false; 840 } 841 842 for(iter.reset(dir); (c=iter.next())>=0;) { 843 int type=getTypeOrIgnorable(c); 844 if((type&4)!=0) { 845 /* case-ignorable, continue with the loop */ 846 } else if(type!=NONE) { 847 return true; /* followed by cased letter */ 848 } else { 849 return false; /* uncased and not case-ignorable */ 850 } 851 } 852 853 return false; /* not followed by cased letter */ 854 } 855 856 /* Is preceded by Soft_Dotted character with no intervening cc=230 ? */ isPrecededBySoftDotted(ContextIterator iter)857 private final boolean isPrecededBySoftDotted(ContextIterator iter) { 858 int c; 859 int dotType; 860 861 if(iter==null) { 862 return false; 863 } 864 865 for(iter.reset(-1); (c=iter.next())>=0;) { 866 dotType=getDotType(c); 867 if(dotType==SOFT_DOTTED) { 868 return true; /* preceded by TYPE_i */ 869 } else if(dotType!=OTHER_ACCENT) { 870 return false; /* preceded by different base character (not TYPE_i), or intervening cc==230 */ 871 } 872 } 873 874 return false; /* not preceded by TYPE_i */ 875 } 876 877 /* 878 * See Jitterbug 2344: 879 * The condition After_I for Turkic-lowercasing of U+0307 combining dot above 880 * is checked in ICU 2.0, 2.1, 2.6 but was not in 2.2 & 2.4 because 881 * we made those releases compatible with Unicode 3.2 which had not fixed 882 * a related bug in SpecialCasing.txt. 883 * 884 * From the Jitterbug 2344 text: 885 * ... this bug is listed as a Unicode erratum 886 * from 2002-10-31 at http://www.unicode.org/uni2errata/UnicodeErrata.html 887 * <quote> 888 * There are two errors in SpecialCasing.txt. 889 * 1. Missing semicolons on two lines. ... [irrelevant for ICU] 890 * 2. An incorrect context definition. Correct as follows: 891 * < 0307; ; 0307; 0307; tr After_Soft_Dotted; # COMBINING DOT ABOVE 892 * < 0307; ; 0307; 0307; az After_Soft_Dotted; # COMBINING DOT ABOVE 893 * --- 894 * > 0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE 895 * > 0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE 896 * where the context After_I is defined as: 897 * The last preceding base character was an uppercase I, and there is no 898 * intervening combining character class 230 (ABOVE). 899 * </quote> 900 * 901 * Note that SpecialCasing.txt even in Unicode 3.2 described the condition as: 902 * 903 * # When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i. 904 * # This matches the behavior of the canonically equivalent I-dot_above 905 * 906 * See also the description in this place in older versions of uchar.c (revision 1.100). 907 * 908 * Markus W. Scherer 2003-feb-15 909 */ 910 911 /* Is preceded by base character 'I' with no intervening cc=230 ? */ isPrecededBy_I(ContextIterator iter)912 private final boolean isPrecededBy_I(ContextIterator iter) { 913 int c; 914 int dotType; 915 916 if(iter==null) { 917 return false; 918 } 919 920 for(iter.reset(-1); (c=iter.next())>=0;) { 921 if(c==0x49) { 922 return true; /* preceded by I */ 923 } 924 dotType=getDotType(c); 925 if(dotType!=OTHER_ACCENT) { 926 return false; /* preceded by different base character (not I), or intervening cc==230 */ 927 } 928 } 929 930 return false; /* not preceded by I */ 931 } 932 933 /* Is followed by one or more cc==230 ? */ isFollowedByMoreAbove(ContextIterator iter)934 private final boolean isFollowedByMoreAbove(ContextIterator iter) { 935 int c; 936 int dotType; 937 938 if(iter==null) { 939 return false; 940 } 941 942 for(iter.reset(1); (c=iter.next())>=0;) { 943 dotType=getDotType(c); 944 if(dotType==ABOVE) { 945 return true; /* at least one cc==230 following */ 946 } else if(dotType!=OTHER_ACCENT) { 947 return false; /* next base character, no more cc==230 following */ 948 } 949 } 950 951 return false; /* no more cc==230 following */ 952 } 953 954 /* Is followed by a dot above (without cc==230 in between) ? */ isFollowedByDotAbove(ContextIterator iter)955 private final boolean isFollowedByDotAbove(ContextIterator iter) { 956 int c; 957 int dotType; 958 959 if(iter==null) { 960 return false; 961 } 962 963 for(iter.reset(1); (c=iter.next())>=0; ) { 964 if(c==0x307) { 965 return true; 966 } 967 dotType=getDotType(c); 968 if(dotType!=OTHER_ACCENT) { 969 return false; /* next base character or cc==230 in between */ 970 } 971 } 972 973 return false; /* no dot above following */ 974 } 975 976 private static final String 977 iDot= "i\u0307", 978 jDot= "j\u0307", 979 iOgonekDot= "\u012f\u0307", 980 iDotGrave= "i\u0307\u0300", 981 iDotAcute= "i\u0307\u0301", 982 iDotTilde= "i\u0307\u0303"; 983 984 /** 985 * Get the full lowercase mapping for c. 986 * 987 * @param c Character to be mapped. 988 * @param iter Character iterator, used for context-sensitive mappings. 989 * See ContextIterator for details. 990 * If iter==null then a context-independent result is returned. 991 * @param out If the mapping result is a string, then it is appended to out. 992 * @param caseLocale Case locale value from ucase_getCaseLocale(). 993 * @return Output code point or string length, see MAX_STRING_LENGTH. 994 * 995 * @see ContextIterator 996 * @see #MAX_STRING_LENGTH 997 * @hide draft / provisional / internal are hidden on Android 998 */ toFullLower(int c, ContextIterator iter, Appendable out, int caseLocale)999 public final int toFullLower(int c, ContextIterator iter, Appendable out, int caseLocale) { 1000 int result, props; 1001 1002 result=c; 1003 props=trie.get(c); 1004 if(!propsHasException(props)) { 1005 if(isUpperOrTitleFromProps(props)) { 1006 result=c+getDelta(props); 1007 } 1008 } else { 1009 int excOffset=getExceptionsOffset(props), excOffset2; 1010 int excWord=exceptions.charAt(excOffset++); 1011 int full; 1012 1013 excOffset2=excOffset; 1014 1015 if((excWord&EXC_CONDITIONAL_SPECIAL)!=0) { 1016 /* use hardcoded conditions and mappings */ 1017 /* 1018 * Test for conditional mappings first 1019 * (otherwise the unconditional default mappings are always taken), 1020 * then test for characters that have unconditional mappings in SpecialCasing.txt, 1021 * then get the UnicodeData.txt mappings. 1022 */ 1023 if( caseLocale==LOC_LITHUANIAN && 1024 /* base characters, find accents above */ 1025 (((c==0x49 || c==0x4a || c==0x12e) && 1026 isFollowedByMoreAbove(iter)) || 1027 /* precomposed with accent above, no need to find one */ 1028 (c==0xcc || c==0xcd || c==0x128)) 1029 ) { 1030 /* 1031 # Lithuanian 1032 1033 # Lithuanian retains the dot in a lowercase i when followed by accents. 1034 1035 # Introduce an explicit dot above when lowercasing capital I's and J's 1036 # whenever there are more accents above. 1037 # (of the accents used in Lithuanian: grave, acute, tilde above, and ogonek) 1038 1039 0049; 0069 0307; 0049; 0049; lt More_Above; # LATIN CAPITAL LETTER I 1040 004A; 006A 0307; 004A; 004A; lt More_Above; # LATIN CAPITAL LETTER J 1041 012E; 012F 0307; 012E; 012E; lt More_Above; # LATIN CAPITAL LETTER I WITH OGONEK 1042 00CC; 0069 0307 0300; 00CC; 00CC; lt; # LATIN CAPITAL LETTER I WITH GRAVE 1043 00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE 1044 0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE 1045 */ 1046 try { 1047 switch(c) { 1048 case 0x49: /* LATIN CAPITAL LETTER I */ 1049 out.append(iDot); 1050 return 2; 1051 case 0x4a: /* LATIN CAPITAL LETTER J */ 1052 out.append(jDot); 1053 return 2; 1054 case 0x12e: /* LATIN CAPITAL LETTER I WITH OGONEK */ 1055 out.append(iOgonekDot); 1056 return 2; 1057 case 0xcc: /* LATIN CAPITAL LETTER I WITH GRAVE */ 1058 out.append(iDotGrave); 1059 return 3; 1060 case 0xcd: /* LATIN CAPITAL LETTER I WITH ACUTE */ 1061 out.append(iDotAcute); 1062 return 3; 1063 case 0x128: /* LATIN CAPITAL LETTER I WITH TILDE */ 1064 out.append(iDotTilde); 1065 return 3; 1066 default: 1067 return 0; /* will not occur */ 1068 } 1069 } catch (IOException e) { 1070 throw new ICUUncheckedIOException(e); 1071 } 1072 /* # Turkish and Azeri */ 1073 } else if(caseLocale==LOC_TURKISH && c==0x130) { 1074 /* 1075 # I and i-dotless; I-dot and i are case pairs in Turkish and Azeri 1076 # The following rules handle those cases. 1077 1078 0130; 0069; 0130; 0130; tr # LATIN CAPITAL LETTER I WITH DOT ABOVE 1079 0130; 0069; 0130; 0130; az # LATIN CAPITAL LETTER I WITH DOT ABOVE 1080 */ 1081 return 0x69; 1082 } else if(caseLocale==LOC_TURKISH && c==0x307 && isPrecededBy_I(iter)) { 1083 /* 1084 # When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i. 1085 # This matches the behavior of the canonically equivalent I-dot_above 1086 1087 0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE 1088 0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE 1089 */ 1090 return 0; /* remove the dot (continue without output) */ 1091 } else if(caseLocale==LOC_TURKISH && c==0x49 && !isFollowedByDotAbove(iter)) { 1092 /* 1093 # When lowercasing, unless an I is before a dot_above, it turns into a dotless i. 1094 1095 0049; 0131; 0049; 0049; tr Not_Before_Dot; # LATIN CAPITAL LETTER I 1096 0049; 0131; 0049; 0049; az Not_Before_Dot; # LATIN CAPITAL LETTER I 1097 */ 1098 return 0x131; 1099 } else if(c==0x130) { 1100 /* 1101 # Preserve canonical equivalence for I with dot. Turkic is handled below. 1102 1103 0130; 0069 0307; 0130; 0130; # LATIN CAPITAL LETTER I WITH DOT ABOVE 1104 */ 1105 try { 1106 out.append(iDot); 1107 return 2; 1108 } catch (IOException e) { 1109 throw new ICUUncheckedIOException(e); 1110 } 1111 } else if( c==0x3a3 && 1112 !isFollowedByCasedLetter(iter, 1) && 1113 isFollowedByCasedLetter(iter, -1) /* -1=preceded */ 1114 ) { 1115 /* greek capital sigma maps depending on surrounding cased letters (see SpecialCasing.txt) */ 1116 /* 1117 # Special case for final form of sigma 1118 1119 03A3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK CAPITAL LETTER SIGMA 1120 */ 1121 return 0x3c2; /* greek small final sigma */ 1122 } else { 1123 /* no known conditional special case mapping, use a normal mapping */ 1124 } 1125 } else if(hasSlot(excWord, EXC_FULL_MAPPINGS)) { 1126 long value=getSlotValueAndOffset(excWord, EXC_FULL_MAPPINGS, excOffset); 1127 full=(int)value&FULL_LOWER; 1128 if(full!=0) { 1129 /* start of full case mapping strings */ 1130 excOffset=(int)(value>>32)+1; 1131 1132 try { 1133 // append the lowercase mapping 1134 out.append(exceptions, excOffset, excOffset+full); 1135 1136 /* return the string length */ 1137 return full; 1138 } catch (IOException e) { 1139 throw new ICUUncheckedIOException(e); 1140 } 1141 } 1142 } 1143 1144 if(hasSlot(excWord, EXC_DELTA) && isUpperOrTitleFromProps(props)) { 1145 int delta=getSlotValue(excWord, EXC_DELTA, excOffset2); 1146 return (excWord&EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta; 1147 } 1148 if(hasSlot(excWord, EXC_LOWER)) { 1149 result=getSlotValue(excWord, EXC_LOWER, excOffset2); 1150 } 1151 } 1152 1153 return (result==c) ? ~result : result; 1154 } 1155 1156 /* internal */ toUpperOrTitle(int c, ContextIterator iter, Appendable out, int loc, boolean upperNotTitle)1157 private final int toUpperOrTitle(int c, ContextIterator iter, 1158 Appendable out, 1159 int loc, 1160 boolean upperNotTitle) { 1161 int result; 1162 int props; 1163 1164 result=c; 1165 props=trie.get(c); 1166 if(!propsHasException(props)) { 1167 if(getTypeFromProps(props)==LOWER) { 1168 result=c+getDelta(props); 1169 } 1170 } else { 1171 int excOffset=getExceptionsOffset(props), excOffset2; 1172 int excWord=exceptions.charAt(excOffset++); 1173 int full, index; 1174 1175 excOffset2=excOffset; 1176 1177 if((excWord&EXC_CONDITIONAL_SPECIAL)!=0) { 1178 /* use hardcoded conditions and mappings */ 1179 if(loc==LOC_TURKISH && c==0x69) { 1180 /* 1181 # Turkish and Azeri 1182 1183 # I and i-dotless; I-dot and i are case pairs in Turkish and Azeri 1184 # The following rules handle those cases. 1185 1186 # When uppercasing, i turns into a dotted capital I 1187 1188 0069; 0069; 0130; 0130; tr; # LATIN SMALL LETTER I 1189 0069; 0069; 0130; 0130; az; # LATIN SMALL LETTER I 1190 */ 1191 return 0x130; 1192 } else if(loc==LOC_LITHUANIAN && c==0x307 && isPrecededBySoftDotted(iter)) { 1193 /* 1194 # Lithuanian 1195 1196 # Lithuanian retains the dot in a lowercase i when followed by accents. 1197 1198 # Remove DOT ABOVE after "i" with upper or titlecase 1199 1200 0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE 1201 */ 1202 return 0; /* remove the dot (continue without output) */ 1203 } else if(c==0x0587) { 1204 // See ICU-13416: 1205 // և ligature ech-yiwn 1206 // uppercases to ԵՒ=ech+yiwn by default and in Western Armenian, 1207 // but to ԵՎ=ech+vew in Eastern Armenian. 1208 try { 1209 if(loc==LOC_ARMENIAN) { 1210 out.append(upperNotTitle ? "ԵՎ" : "Եվ"); 1211 } else { 1212 out.append(upperNotTitle ? "ԵՒ" : "Եւ"); 1213 } 1214 return 2; 1215 } catch (IOException e) { 1216 throw new ICUUncheckedIOException(e); 1217 } 1218 } else { 1219 /* no known conditional special case mapping, use a normal mapping */ 1220 } 1221 } else if(hasSlot(excWord, EXC_FULL_MAPPINGS)) { 1222 long value=getSlotValueAndOffset(excWord, EXC_FULL_MAPPINGS, excOffset); 1223 full=(int)value&0xffff; 1224 1225 /* start of full case mapping strings */ 1226 excOffset=(int)(value>>32)+1; 1227 1228 /* skip the lowercase and case-folding result strings */ 1229 excOffset+=full&FULL_LOWER; 1230 full>>=4; 1231 excOffset+=full&0xf; 1232 full>>=4; 1233 1234 if(upperNotTitle) { 1235 full&=0xf; 1236 } else { 1237 /* skip the uppercase result string */ 1238 excOffset+=full&0xf; 1239 full=(full>>4)&0xf; 1240 } 1241 1242 if(full!=0) { 1243 try { 1244 // append the result string 1245 out.append(exceptions, excOffset, excOffset+full); 1246 1247 /* return the string length */ 1248 return full; 1249 } catch (IOException e) { 1250 throw new ICUUncheckedIOException(e); 1251 } 1252 } 1253 } 1254 1255 if(hasSlot(excWord, EXC_DELTA) && getTypeFromProps(props)==LOWER) { 1256 int delta=getSlotValue(excWord, EXC_DELTA, excOffset2); 1257 return (excWord&EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta; 1258 } 1259 if(!upperNotTitle && hasSlot(excWord, EXC_TITLE)) { 1260 index=EXC_TITLE; 1261 } else if(hasSlot(excWord, EXC_UPPER)) { 1262 /* here, titlecase is same as uppercase */ 1263 index=EXC_UPPER; 1264 } else { 1265 return ~c; 1266 } 1267 result=getSlotValue(excWord, index, excOffset2); 1268 } 1269 1270 return (result==c) ? ~result : result; 1271 } 1272 toFullUpper(int c, ContextIterator iter, Appendable out, int caseLocale)1273 public final int toFullUpper(int c, ContextIterator iter, 1274 Appendable out, 1275 int caseLocale) { 1276 return toUpperOrTitle(c, iter, out, caseLocale, true); 1277 } 1278 toFullTitle(int c, ContextIterator iter, Appendable out, int caseLocale)1279 public final int toFullTitle(int c, ContextIterator iter, 1280 Appendable out, 1281 int caseLocale) { 1282 return toUpperOrTitle(c, iter, out, caseLocale, false); 1283 } 1284 1285 /* case folding ------------------------------------------------------------- */ 1286 1287 /* 1288 * Case folding is similar to lowercasing. 1289 * The result may be a simple mapping, i.e., a single code point, or 1290 * a full mapping, i.e., a string. 1291 * If the case folding for a code point is the same as its simple (1:1) lowercase mapping, 1292 * then only the lowercase mapping is stored. 1293 * 1294 * Some special cases are hardcoded because their conditions cannot be 1295 * parsed and processed from CaseFolding.txt. 1296 * 1297 * Unicode 3.2 CaseFolding.txt specifies for its status field: 1298 1299 # C: common case folding, common mappings shared by both simple and full mappings. 1300 # F: full case folding, mappings that cause strings to grow in length. Multiple characters are separated by spaces. 1301 # S: simple case folding, mappings to single characters where different from F. 1302 # T: special case for uppercase I and dotted uppercase I 1303 # - For non-Turkic languages, this mapping is normally not used. 1304 # - For Turkic languages (tr, az), this mapping can be used instead of the normal mapping for these characters. 1305 # 1306 # Usage: 1307 # A. To do a simple case folding, use the mappings with status C + S. 1308 # B. To do a full case folding, use the mappings with status C + F. 1309 # 1310 # The mappings with status T can be used or omitted depending on the desired case-folding 1311 # behavior. (The default option is to exclude them.) 1312 1313 * Unicode 3.2 has 'T' mappings as follows: 1314 1315 0049; T; 0131; # LATIN CAPITAL LETTER I 1316 0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE 1317 1318 * while the default mappings for these code points are: 1319 1320 0049; C; 0069; # LATIN CAPITAL LETTER I 1321 0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE 1322 1323 * U+0130 has no simple case folding (simple-case-folds to itself). 1324 */ 1325 1326 /** 1327 * Bit mask for getting just the options from a string compare options word 1328 * that are relevant for case folding (of a single string or code point). 1329 * 1330 * Currently only bit 0 for FOLD_CASE_EXCLUDE_SPECIAL_I. 1331 * It is conceivable that at some point we might use one more bit for using uppercase sharp s. 1332 * It is conceivable that at some point we might want the option to use only simple case foldings 1333 * when operating on strings. 1334 * 1335 * @hide draft / provisional / internal are hidden on Android 1336 */ 1337 static final int FOLD_CASE_OPTIONS_MASK = 7; 1338 1339 /* return the simple case folding mapping for c */ fold(int c, int options)1340 public final int fold(int c, int options) { 1341 int props=trie.get(c); 1342 if(!propsHasException(props)) { 1343 if(isUpperOrTitleFromProps(props)) { 1344 c+=getDelta(props); 1345 } 1346 } else { 1347 int excOffset=getExceptionsOffset(props); 1348 int excWord=exceptions.charAt(excOffset++); 1349 int index; 1350 if((excWord&EXC_CONDITIONAL_FOLD)!=0) { 1351 /* special case folding mappings, hardcoded */ 1352 if((options&FOLD_CASE_OPTIONS_MASK)==UCharacter.FOLD_CASE_DEFAULT) { 1353 /* default mappings */ 1354 if(c==0x49) { 1355 /* 0049; C; 0069; # LATIN CAPITAL LETTER I */ 1356 return 0x69; 1357 } else if(c==0x130) { 1358 /* no simple case folding for U+0130 */ 1359 return c; 1360 } 1361 } else { 1362 /* Turkic mappings */ 1363 if(c==0x49) { 1364 /* 0049; T; 0131; # LATIN CAPITAL LETTER I */ 1365 return 0x131; 1366 } else if(c==0x130) { 1367 /* 0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE */ 1368 return 0x69; 1369 } 1370 } 1371 } 1372 if((excWord&EXC_NO_SIMPLE_CASE_FOLDING)!=0) { 1373 return c; 1374 } 1375 if(hasSlot(excWord, EXC_DELTA) && isUpperOrTitleFromProps(props)) { 1376 int delta=getSlotValue(excWord, EXC_DELTA, excOffset); 1377 return (excWord&EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta; 1378 } 1379 if(hasSlot(excWord, EXC_FOLD)) { 1380 index=EXC_FOLD; 1381 } else if(hasSlot(excWord, EXC_LOWER)) { 1382 index=EXC_LOWER; 1383 } else { 1384 return c; 1385 } 1386 c=getSlotValue(excWord, index, excOffset); 1387 } 1388 return c; 1389 } 1390 1391 /* 1392 * Issue for canonical caseless match (UAX #21): 1393 * Turkic casefolding (using "T" mappings in CaseFolding.txt) does not preserve 1394 * canonical equivalence, unlike default-option casefolding. 1395 * For example, I-grave and I + grave fold to strings that are not canonically 1396 * equivalent. 1397 * For more details, see the comment in unorm_compare() in unorm.cpp 1398 * and the intermediate prototype changes for Jitterbug 2021. 1399 * (For example, revision 1.104 of uchar.c and 1.4 of CaseFolding.txt.) 1400 * 1401 * This did not get fixed because it appears that it is not possible to fix 1402 * it for uppercase and lowercase characters (I-grave vs. i-grave) 1403 * together in a way that they still fold to common result strings. 1404 */ 1405 toFullFolding(int c, Appendable out, int options)1406 public final int toFullFolding(int c, Appendable out, int options) { 1407 int result; 1408 int props; 1409 1410 result=c; 1411 props=trie.get(c); 1412 if(!propsHasException(props)) { 1413 if(isUpperOrTitleFromProps(props)) { 1414 result=c+getDelta(props); 1415 } 1416 } else { 1417 int excOffset=getExceptionsOffset(props), excOffset2; 1418 int excWord=exceptions.charAt(excOffset++); 1419 int full, index; 1420 1421 excOffset2=excOffset; 1422 1423 if((excWord&EXC_CONDITIONAL_FOLD)!=0) { 1424 /* use hardcoded conditions and mappings */ 1425 if((options&FOLD_CASE_OPTIONS_MASK)==UCharacter.FOLD_CASE_DEFAULT) { 1426 /* default mappings */ 1427 if(c==0x49) { 1428 /* 0049; C; 0069; # LATIN CAPITAL LETTER I */ 1429 return 0x69; 1430 } else if(c==0x130) { 1431 /* 0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE */ 1432 try { 1433 out.append(iDot); 1434 return 2; 1435 } catch (IOException e) { 1436 throw new ICUUncheckedIOException(e); 1437 } 1438 } 1439 } else { 1440 /* Turkic mappings */ 1441 if(c==0x49) { 1442 /* 0049; T; 0131; # LATIN CAPITAL LETTER I */ 1443 return 0x131; 1444 } else if(c==0x130) { 1445 /* 0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE */ 1446 return 0x69; 1447 } 1448 } 1449 } else if(hasSlot(excWord, EXC_FULL_MAPPINGS)) { 1450 long value=getSlotValueAndOffset(excWord, EXC_FULL_MAPPINGS, excOffset); 1451 full=(int)value&0xffff; 1452 1453 /* start of full case mapping strings */ 1454 excOffset=(int)(value>>32)+1; 1455 1456 /* skip the lowercase result string */ 1457 excOffset+=full&FULL_LOWER; 1458 full=(full>>4)&0xf; 1459 1460 if(full!=0) { 1461 try { 1462 // append the result string 1463 out.append(exceptions, excOffset, excOffset+full); 1464 1465 /* return the string length */ 1466 return full; 1467 } catch (IOException e) { 1468 throw new ICUUncheckedIOException(e); 1469 } 1470 } 1471 } 1472 1473 if((excWord&EXC_NO_SIMPLE_CASE_FOLDING)!=0) { 1474 return ~c; 1475 } 1476 if(hasSlot(excWord, EXC_DELTA) && isUpperOrTitleFromProps(props)) { 1477 int delta=getSlotValue(excWord, EXC_DELTA, excOffset2); 1478 return (excWord&EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta; 1479 } 1480 if(hasSlot(excWord, EXC_FOLD)) { 1481 index=EXC_FOLD; 1482 } else if(hasSlot(excWord, EXC_LOWER)) { 1483 index=EXC_LOWER; 1484 } else { 1485 return ~c; 1486 } 1487 result=getSlotValue(excWord, index, excOffset2); 1488 } 1489 1490 return (result==c) ? ~result : result; 1491 } 1492 1493 /* case mapping properties API ---------------------------------------------- */ 1494 1495 /* 1496 * We need a StringBuilder for multi-code point output from the 1497 * full case mapping functions. However, we do not actually use that output, 1498 * we just check whether the input character was mapped to anything else. 1499 * We use a shared StringBuilder to avoid allocating a new one in each call. 1500 * We remove its contents each time so that it does not grow large over time. 1501 * 1502 * @internal 1503 */ 1504 public static final StringBuilder dummyStringBuilder = new StringBuilder(); 1505 hasBinaryProperty(int c, int which)1506 public final boolean hasBinaryProperty(int c, int which) { 1507 switch(which) { 1508 case UProperty.LOWERCASE: 1509 return LOWER==getType(c); 1510 case UProperty.UPPERCASE: 1511 return UPPER==getType(c); 1512 case UProperty.SOFT_DOTTED: 1513 return isSoftDotted(c); 1514 case UProperty.CASE_SENSITIVE: 1515 return isCaseSensitive(c); 1516 case UProperty.CASED: 1517 return NONE!=getType(c); 1518 case UProperty.CASE_IGNORABLE: 1519 return (getTypeOrIgnorable(c)>>2)!=0; 1520 /* 1521 * Note: The following Changes_When_Xyz are defined as testing whether 1522 * the NFD form of the input changes when Xyz-case-mapped. 1523 * However, this simpler implementation of these properties, 1524 * ignoring NFD, passes the tests. 1525 * The implementation needs to be changed if the tests start failing. 1526 * When that happens, optimizations should be used to work with the 1527 * per-single-code point ucase_toFullXyz() functions unless 1528 * the NFD form has more than one code point, 1529 * and the property starts set needs to be the union of the 1530 * start sets for normalization and case mappings. 1531 */ 1532 case UProperty.CHANGES_WHEN_LOWERCASED: 1533 dummyStringBuilder.setLength(0); 1534 return toFullLower(c, null, dummyStringBuilder, LOC_ROOT)>=0; 1535 case UProperty.CHANGES_WHEN_UPPERCASED: 1536 dummyStringBuilder.setLength(0); 1537 return toFullUpper(c, null, dummyStringBuilder, LOC_ROOT)>=0; 1538 case UProperty.CHANGES_WHEN_TITLECASED: 1539 dummyStringBuilder.setLength(0); 1540 return toFullTitle(c, null, dummyStringBuilder, LOC_ROOT)>=0; 1541 /* case UProperty.CHANGES_WHEN_CASEFOLDED: -- in UCharacterProperty.java */ 1542 case UProperty.CHANGES_WHEN_CASEMAPPED: 1543 dummyStringBuilder.setLength(0); 1544 return 1545 toFullLower(c, null, dummyStringBuilder, LOC_ROOT)>=0 || 1546 toFullUpper(c, null, dummyStringBuilder, LOC_ROOT)>=0 || 1547 toFullTitle(c, null, dummyStringBuilder, LOC_ROOT)>=0; 1548 default: 1549 return false; 1550 } 1551 } 1552 1553 // data members -------------------------------------------------------- *** 1554 private int indexes[]; 1555 private String exceptions; 1556 private char unfold[]; 1557 1558 private Trie2_16 trie; 1559 1560 // data format constants ----------------------------------------------- *** 1561 private static final String DATA_NAME="ucase"; 1562 private static final String DATA_TYPE="icu"; 1563 private static final String DATA_FILE_NAME=DATA_NAME+"."+DATA_TYPE; 1564 1565 /* format "cAsE" */ 1566 private static final int FMT=0x63415345; 1567 1568 /* indexes into indexes[] */ 1569 //private static final int IX_INDEX_TOP=0; 1570 //private static final int IX_LENGTH=1; 1571 private static final int IX_TRIE_SIZE=2; 1572 private static final int IX_EXC_LENGTH=3; 1573 private static final int IX_UNFOLD_LENGTH=4; 1574 1575 //private static final int IX_MAX_FULL_LENGTH=15; 1576 private static final int IX_TOP=16; 1577 1578 // definitions for 16-bit case properties word ------------------------- *** 1579 getTrie()1580 static Trie2_16 getTrie() { 1581 return INSTANCE.trie; 1582 } 1583 1584 /* 2-bit constants for types of cased characters */ 1585 public static final int TYPE_MASK=3; 1586 public static final int NONE=0; 1587 public static final int LOWER=1; 1588 public static final int UPPER=2; 1589 public static final int TITLE=3; 1590 1591 /** @return NONE, LOWER, UPPER, TITLE */ getTypeFromProps(int props)1592 static final int getTypeFromProps(int props) { 1593 return props&TYPE_MASK; 1594 } 1595 1596 /** @return like getTypeFromProps() but also sets IGNORABLE if props indicate case-ignorable */ getTypeAndIgnorableFromProps(int props)1597 private static final int getTypeAndIgnorableFromProps(int props) { 1598 return props&7; 1599 } 1600 isUpperOrTitleFromProps(int props)1601 static final boolean isUpperOrTitleFromProps(int props) { 1602 return (props & 2) != 0; 1603 } 1604 1605 static final int IGNORABLE=4; 1606 private static final int EXCEPTION= 8; 1607 private static final int SENSITIVE= 0x10; 1608 1609 private static final int DOT_MASK= 0x60; 1610 //private static final int NO_DOT= 0; /* normal characters with cc=0 */ 1611 private static final int SOFT_DOTTED= 0x20; /* soft-dotted characters with cc=0 */ 1612 private static final int ABOVE= 0x40; /* "above" accents with cc=230 */ 1613 private static final int OTHER_ACCENT= 0x60; /* other accent character (0<cc!=230) */ 1614 1615 /* no exception: bits 15..7 are a 9-bit signed case mapping delta */ 1616 private static final int DELTA_SHIFT= 7; 1617 //private static final int DELTA_MASK= 0xff80; 1618 //private static final int MAX_DELTA= 0xff; 1619 //private static final int MIN_DELTA= (-MAX_DELTA-1); 1620 getDelta(int props)1621 static final int getDelta(int props) { 1622 return (short)props>>DELTA_SHIFT; 1623 } 1624 1625 /* exception: bits 15..4 are an unsigned 12-bit index into the exceptions array */ 1626 private static final int EXC_SHIFT= 4; 1627 //private static final int EXC_MASK= 0xfff0; 1628 //private static final int MAX_EXCEPTIONS=((EXC_MASK>>EXC_SHIFT)+1); 1629 1630 /* definitions for 16-bit main exceptions word ------------------------------ */ 1631 1632 /* first 8 bits indicate values in optional slots */ 1633 private static final int EXC_LOWER=0; 1634 private static final int EXC_FOLD=1; 1635 private static final int EXC_UPPER=2; 1636 private static final int EXC_TITLE=3; 1637 private static final int EXC_DELTA=4; 1638 //private static final int EXC_5=5; /* reserved */ 1639 private static final int EXC_CLOSURE=6; 1640 private static final int EXC_FULL_MAPPINGS=7; 1641 //private static final int EXC_ALL_SLOTS=8; /* one past the last slot */ 1642 1643 /* each slot is 2 uint16_t instead of 1 */ 1644 private static final int EXC_DOUBLE_SLOTS= 0x100; 1645 1646 private static final int EXC_NO_SIMPLE_CASE_FOLDING=0x200; 1647 private static final int EXC_DELTA_IS_NEGATIVE=0x400; 1648 private static final int EXC_SENSITIVE=0x800; 1649 1650 /* EXC_DOT_MASK=DOT_MASK<<EXC_DOT_SHIFT */ 1651 private static final int EXC_DOT_SHIFT=7; 1652 1653 /* normally stored in the main word, but pushed out for larger exception indexes */ 1654 //private static final int EXC_DOT_MASK= 0x3000; 1655 //private static final int EXC_NO_DOT= 0; 1656 //private static final int EXC_SOFT_DOTTED= 0x1000; 1657 //private static final int EXC_ABOVE= 0x2000; /* "above" accents with cc=230 */ 1658 //private static final int EXC_OTHER_ACCENT= 0x3000; /* other character (0<cc!=230) */ 1659 1660 /* complex/conditional mappings */ 1661 private static final int EXC_CONDITIONAL_SPECIAL= 0x4000; 1662 private static final int EXC_CONDITIONAL_FOLD= 0x8000; 1663 1664 /* definitions for lengths word for full case mappings */ 1665 private static final int FULL_LOWER= 0xf; 1666 //private static final int FULL_FOLDING= 0xf0; 1667 //private static final int FULL_UPPER= 0xf00; 1668 //private static final int FULL_TITLE= 0xf000; 1669 1670 /* maximum lengths */ 1671 //private static final int FULL_MAPPINGS_MAX_LENGTH=4*0xf; 1672 private static final int CLOSURE_MAX_LENGTH=0xf; 1673 1674 /* constants for reverse case folding ("unfold") data */ 1675 private static final int UNFOLD_ROWS=0; 1676 private static final int UNFOLD_ROW_WIDTH=1; 1677 private static final int UNFOLD_STRING_WIDTH=2; 1678 1679 /* 1680 * public singleton instance 1681 */ 1682 public static final UCaseProps INSTANCE; 1683 1684 // This static initializer block must be placed after 1685 // other static member initialization 1686 static { 1687 try { 1688 INSTANCE = new UCaseProps(); 1689 } catch (IOException e) { 1690 throw new ICUUncheckedIOException(e); 1691 } 1692 } 1693 } 1694