1 /* 2 ******************************************************************************* 3 * Copyright (C) 1996-2014, International Business Machines Corporation and * 4 * others. All Rights Reserved. * 5 ******************************************************************************* 6 */ 7 package org.unicode.cldr.util.props; 8 9 import java.io.PrintWriter; 10 import java.io.StringWriter; 11 import java.text.ParsePosition; 12 import java.util.ArrayList; 13 import java.util.Arrays; 14 import java.util.Collection; 15 import java.util.Comparator; 16 import java.util.HashMap; 17 import java.util.Iterator; 18 import java.util.LinkedHashSet; 19 import java.util.List; 20 import java.util.Map; 21 import java.util.TreeMap; 22 import java.util.regex.Pattern; 23 24 import com.ibm.icu.dev.util.CollectionUtilities.InverseMatcher; 25 import com.ibm.icu.dev.util.CollectionUtilities.ObjectMatcher; 26 import com.ibm.icu.dev.util.UnicodeMap; 27 import com.ibm.icu.impl.Utility; 28 import com.ibm.icu.text.SymbolTable; 29 import com.ibm.icu.text.UFormat; 30 import com.ibm.icu.text.UTF16; 31 import com.ibm.icu.text.UnicodeMatcher; 32 import com.ibm.icu.text.UnicodeSet; 33 import com.ibm.icu.text.UnicodeSetIterator; 34 35 public abstract class UnicodeProperty extends UnicodeLabel { 36 37 public static final UnicodeSet NONCHARACTERS = new UnicodeSet("[:noncharactercodepoint:]").freeze(); 38 public static final UnicodeSet PRIVATE_USE = new UnicodeSet("[:gc=privateuse:]").freeze(); 39 public static final UnicodeSet SURROGATE = new UnicodeSet("[:gc=surrogate:]").freeze(); 40 41 public static final UnicodeSet HIGH_SURROGATES = new UnicodeSet("[\\uD800-\\uDB7F]").freeze(); 42 public static final int SAMPLE_HIGH_SURROGATE = HIGH_SURROGATES.charAt(0); 43 public static final UnicodeSet HIGH_PRIVATE_USE_SURROGATES = new UnicodeSet("[\\uDB80-\\uDBFF]").freeze(); 44 public static final int SAMPLE_HIGH_PRIVATE_USE_SURROGATE = HIGH_PRIVATE_USE_SURROGATES.charAt(0); 45 public static final UnicodeSet LOW_SURROGATES = new UnicodeSet("[\\uDC00-\\uDFFF]").freeze(); 46 public static final int SAMPLE_LOW_SURROGATE = LOW_SURROGATES.charAt(0); 47 48 public static final UnicodeSet PRIVATE_USE_AREA = new UnicodeSet("[\\uE000-\\uF8FF]").freeze(); 49 public static final int SAMPLE_PRIVATE_USE_AREA = PRIVATE_USE_AREA.charAt(0); 50 public static final UnicodeSet PRIVATE_USE_AREA_A = new UnicodeSet("[\\U000F0000-\\U000FFFFD]").freeze(); 51 public static final int SAMPLE_PRIVATE_USE_AREA_A = PRIVATE_USE_AREA_A.charAt(0); 52 public static final UnicodeSet PRIVATE_USE_AREA_B = new UnicodeSet("[\\U00100000-\\U0010FFFD]").freeze(); 53 public static final int SAMPLE_PRIVATE_USE_AREA_B = PRIVATE_USE_AREA_B.charAt(0); 54 55 // The following are special. They are used for performance, but must be changed if the version of Unicode for the UnicodeProperty changes. 56 private static UnicodeSet UNASSIGNED; 57 private static int SAMPLE_UNASSIGNED; 58 private static UnicodeSet SPECIALS; 59 private static UnicodeSet STUFF_TO_TEST; 60 private static UnicodeSet STUFF_TO_TEST_WITH_UNASSIGNED; 61 getUNASSIGNED()62 public static synchronized UnicodeSet getUNASSIGNED() { 63 if (UNASSIGNED == null) { 64 UNASSIGNED = new UnicodeSet("[:gc=unassigned:]").freeze(); 65 } 66 return UNASSIGNED; 67 } 68 contractUNASSIGNED(UnicodeSet toBeUnassigned)69 public static synchronized UnicodeSet contractUNASSIGNED(UnicodeSet toBeUnassigned) { 70 UnicodeSet temp = UNASSIGNED; 71 ResetCacheProperties(); 72 UNASSIGNED = temp == null ? toBeUnassigned.freeze() : new UnicodeSet(temp).retainAll(toBeUnassigned).freeze(); 73 return UNASSIGNED; 74 } 75 getSAMPLE_UNASSIGNED()76 public static synchronized int getSAMPLE_UNASSIGNED() { 77 if (SAMPLE_UNASSIGNED == 0) { 78 SAMPLE_UNASSIGNED = getUNASSIGNED().charAt(0); 79 } 80 return SAMPLE_UNASSIGNED; 81 } 82 getSPECIALS()83 public static synchronized UnicodeSet getSPECIALS() { 84 if (SPECIALS == null) { 85 SPECIALS = new UnicodeSet(getUNASSIGNED()).addAll(PRIVATE_USE).addAll(SURROGATE).freeze(); 86 } 87 return SPECIALS; 88 } 89 getSTUFF_TO_TEST()90 public static synchronized UnicodeSet getSTUFF_TO_TEST() { 91 if (STUFF_TO_TEST == null) { 92 STUFF_TO_TEST = new UnicodeSet(getSPECIALS()).complement() 93 .addAll(NONCHARACTERS) 94 .add(getSAMPLE_UNASSIGNED()) 95 .add(SAMPLE_HIGH_SURROGATE) 96 .add(SAMPLE_HIGH_PRIVATE_USE_SURROGATE) 97 .add(SAMPLE_LOW_SURROGATE) 98 .add(SAMPLE_PRIVATE_USE_AREA) 99 .add(SAMPLE_PRIVATE_USE_AREA_A) 100 .add(SAMPLE_PRIVATE_USE_AREA_B) 101 .freeze(); 102 } 103 return STUFF_TO_TEST; 104 } 105 getSTUFF_TO_TEST_WITH_UNASSIGNED()106 public static synchronized UnicodeSet getSTUFF_TO_TEST_WITH_UNASSIGNED() { 107 if (STUFF_TO_TEST_WITH_UNASSIGNED == null) { 108 STUFF_TO_TEST_WITH_UNASSIGNED = new UnicodeSet(getSTUFF_TO_TEST()).addAll(getUNASSIGNED()).freeze(); 109 } 110 return STUFF_TO_TEST_WITH_UNASSIGNED; 111 } 112 113 /** 114 * Reset the cache properties. Must be done if the version of Unicode is different than the ICU one, AND any UnicodeProperty has already been instantiated. 115 * TODO make this a bit more robust. 116 * @internal 117 */ ResetCacheProperties()118 public static synchronized void ResetCacheProperties() { 119 UNASSIGNED = null; 120 SAMPLE_UNASSIGNED = 0; 121 SPECIALS = null; 122 STUFF_TO_TEST = null; 123 STUFF_TO_TEST_WITH_UNASSIGNED = null; 124 } 125 126 public static boolean DEBUG = false; 127 128 public static String CHECK_NAME = "FC_NFKC_Closure"; 129 130 public static int CHECK_VALUE = 0x037A; 131 132 private String name; 133 134 private String firstNameAlias = null; 135 136 private int type; 137 138 private Map valueToFirstValueAlias = null; 139 140 private boolean hasUniformUnassigned = true; 141 142 /* 143 * Name: Unicode_1_Name Name: ISO_Comment Name: Name Name: Unicode_1_Name 144 * 145 */ 146 147 public static final int UNKNOWN = 0, BINARY = 2, EXTENDED_BINARY = 3, 148 ENUMERATED = 4, EXTENDED_ENUMERATED = 5, CATALOG = 6, 149 EXTENDED_CATALOG = 7, MISC = 8, EXTENDED_MISC = 9, STRING = 10, 150 EXTENDED_STRING = 11, NUMERIC = 12, EXTENDED_NUMERIC = 13, 151 START_TYPE = 2, LIMIT_TYPE = 14, EXTENDED_MASK = 1, 152 CORE_MASK = ~EXTENDED_MASK, BINARY_MASK = (1 << BINARY) 153 | (1 << EXTENDED_BINARY), STRING_MASK = (1 << STRING) 154 | (1 << EXTENDED_STRING), 155 STRING_OR_MISC_MASK = (1 << STRING) | (1 << EXTENDED_STRING) 156 | (1 << MISC) | (1 << EXTENDED_MISC), 157 ENUMERATED_OR_CATALOG_MASK = (1 << ENUMERATED) 158 | (1 << EXTENDED_ENUMERATED) | (1 << CATALOG) 159 | (1 << EXTENDED_CATALOG); 160 161 private static final String[] TYPE_NAMES = { "Unknown", "Unknown", 162 "Binary", "Extended Binary", "Enumerated", "Extended Enumerated", 163 "Catalog", "Extended Catalog", "Miscellaneous", 164 "Extended Miscellaneous", "String", "Extended String", "Numeric", 165 "Extended Numeric", }; 166 getTypeName(int propType)167 public static String getTypeName(int propType) { 168 return TYPE_NAMES[propType]; 169 } 170 getName()171 public final String getName() { 172 return name; 173 } 174 getType()175 public final int getType() { 176 return type; 177 } 178 getTypeName()179 public String getTypeName() { 180 return TYPE_NAMES[type]; 181 } 182 isType(int mask)183 public final boolean isType(int mask) { 184 return ((1 << type) & mask) != 0; 185 } 186 setName(String string)187 protected final void setName(String string) { 188 if (string == null) 189 throw new IllegalArgumentException("Name must not be null"); 190 name = string; 191 } 192 setType(int i)193 protected final void setType(int i) { 194 type = i; 195 } 196 getVersion()197 public String getVersion() { 198 return _getVersion(); 199 } 200 getValue(int codepoint)201 public String getValue(int codepoint) { 202 if (DEBUG && CHECK_VALUE == codepoint && CHECK_NAME.equals(getName())) { 203 String value = _getValue(codepoint); 204 System.out.println(getName() + "(" + Utility.hex(codepoint) + "):" 205 + (getType() == STRING ? Utility.hex(value) : value)); 206 return value; 207 } 208 return _getValue(codepoint); 209 } 210 211 // public String getValue(int codepoint, boolean isShort) { 212 // return getValue(codepoint); 213 // } 214 getNameAliases(List<String> result)215 public List<String> getNameAliases(List<String> result) { 216 if (result == null) 217 result = new ArrayList(1); 218 return _getNameAliases(result); 219 } 220 getValueAliases(String valueAlias, List<String> result)221 public List<String> getValueAliases(String valueAlias, List<String> result) { 222 if (result == null) 223 result = new ArrayList(1); 224 result = _getValueAliases(valueAlias, result); 225 if (!result.contains(valueAlias)) { // FIX && type < NUMERIC 226 result = _getValueAliases(valueAlias, result); // for debugging 227 throw new IllegalArgumentException("Internal error: " + getName() 228 + " doesn't contain " + valueAlias + ": " 229 + new BagFormatter().join(result)); 230 } 231 return result; 232 } 233 getAvailableValues(List<String> result)234 public List<String> getAvailableValues(List<String> result) { 235 if (result == null) 236 result = new ArrayList(1); 237 return _getAvailableValues(result); 238 } 239 _getVersion()240 protected abstract String _getVersion(); 241 _getValue(int codepoint)242 protected abstract String _getValue(int codepoint); 243 _getNameAliases(List<String> result)244 protected abstract List<String> _getNameAliases(List<String> result); 245 _getValueAliases(String valueAlias, List<String> result)246 protected abstract List<String> _getValueAliases(String valueAlias, List<String> result); 247 _getAvailableValues(List<String> result)248 protected abstract List<String> _getAvailableValues(List<String> result); 249 250 // conveniences getNameAliases()251 public final List<String> getNameAliases() { 252 return getNameAliases(null); 253 } 254 getValueAliases(String valueAlias)255 public final List<String> getValueAliases(String valueAlias) { 256 return getValueAliases(valueAlias, null); 257 } 258 getAvailableValues()259 public final List<String> getAvailableValues() { 260 return getAvailableValues(null); 261 } 262 getValue(int codepoint, boolean getShortest)263 public final String getValue(int codepoint, boolean getShortest) { 264 String result = getValue(codepoint); 265 if (type >= MISC || result == null || !getShortest) 266 return result; 267 return getFirstValueAlias(result); 268 } 269 getFirstNameAlias()270 public final String getFirstNameAlias() { 271 if (firstNameAlias == null) { 272 firstNameAlias = (String) getNameAliases().get(0); 273 } 274 return firstNameAlias; 275 } 276 getFirstValueAlias(String value)277 public final String getFirstValueAlias(String value) { 278 if (valueToFirstValueAlias == null) 279 _getFirstValueAliasCache(); 280 return valueToFirstValueAlias.get(value).toString(); 281 } 282 _getFirstValueAliasCache()283 private void _getFirstValueAliasCache() { 284 maxValueWidth = 0; 285 maxFirstValueAliasWidth = 0; 286 valueToFirstValueAlias = new HashMap(1); 287 Iterator it = getAvailableValues().iterator(); 288 while (it.hasNext()) { 289 String value = (String) it.next(); 290 String first = (String) getValueAliases(value).get(0); 291 if (first == null) { // internal error 292 throw new IllegalArgumentException( 293 "Value not in value aliases: " + value); 294 } 295 if (DEBUG && CHECK_NAME.equals(getName())) { 296 System.out.println("First Alias: " + getName() + ": " + value 297 + " => " + first 298 + new BagFormatter().join(getValueAliases(value))); 299 } 300 valueToFirstValueAlias.put(value, first); 301 if (value.length() > maxValueWidth) { 302 maxValueWidth = value.length(); 303 } 304 if (first.length() > maxFirstValueAliasWidth) { 305 maxFirstValueAliasWidth = first.length(); 306 } 307 } 308 } 309 310 private int maxValueWidth = -1; 311 312 private int maxFirstValueAliasWidth = -1; 313 getMaxWidth(boolean getShortest)314 public int getMaxWidth(boolean getShortest) { 315 if (maxValueWidth < 0) 316 _getFirstValueAliasCache(); 317 if (getShortest) 318 return maxFirstValueAliasWidth; 319 return maxValueWidth; 320 } 321 getSet(String propertyValue)322 public final UnicodeSet getSet(String propertyValue) { 323 return getSet(propertyValue, null); 324 } 325 getSet(PatternMatcher matcher)326 public final UnicodeSet getSet(PatternMatcher matcher) { 327 return getSet(matcher, null); 328 } 329 330 /** Adds the property value set to the result. Clear the result first if you don't want to keep the original contents. 331 */ getSet(String propertyValue, UnicodeSet result)332 public final UnicodeSet getSet(String propertyValue, UnicodeSet result) { 333 return getSet(new SimpleMatcher(propertyValue, 334 isType(STRING_OR_MISC_MASK) ? null : PROPERTY_COMPARATOR), 335 result); 336 } 337 338 private UnicodeMap unicodeMap = null; 339 340 public static final String UNUSED = "??"; 341 getSet(PatternMatcher matcher, UnicodeSet result)342 public UnicodeSet getSet(PatternMatcher matcher, UnicodeSet result) { 343 if (result == null) 344 result = new UnicodeSet(); 345 boolean uniformUnassigned = hasUniformUnassigned(); 346 if (isType(STRING_OR_MISC_MASK)) { 347 for (UnicodeSetIterator usi = getStuffToTest(uniformUnassigned); usi.next();) { // int i = 0; i <= 0x10FFFF; ++i 348 int i = usi.codepoint; 349 String value = getValue(i); 350 if (value != null && matcher.matches(value)) { 351 result.add(i); 352 } 353 } 354 return addUntested(result, uniformUnassigned); 355 } 356 List temp = new ArrayList(1); // to avoid reallocating... 357 UnicodeMap um = getUnicodeMap_internal(); 358 Iterator it = um.getAvailableValues(null).iterator(); 359 main: while (it.hasNext()) { 360 String value = (String) it.next(); 361 temp.clear(); 362 Iterator it2 = getValueAliases(value, temp).iterator(); 363 while (it2.hasNext()) { 364 String value2 = (String) it2.next(); 365 // System.out.println("Values:" + value2); 366 if (matcher.matches(value2) 367 || matcher.matches(toSkeleton(value2))) { 368 um.keySet(value, result); 369 continue main; 370 } 371 } 372 } 373 return result; 374 } 375 376 /* 377 * public UnicodeSet getMatchSet(UnicodeSet result) { if (result == null) 378 * result = new UnicodeSet(); addAll(matchIterator, result); return result; } 379 * 380 * public void setMatchSet(UnicodeSet set) { matchIterator = new 381 * UnicodeSetIterator(set); } 382 */ 383 384 /** 385 * Utility for debugging 386 */ getStack()387 public static String getStack() { 388 Exception e = new Exception(); 389 StringWriter sw = new StringWriter(); 390 PrintWriter pw = new PrintWriter(sw); 391 e.printStackTrace(pw); 392 pw.flush(); 393 return "Showing Stack with fake " + sw.getBuffer().toString(); 394 } 395 396 // TODO use this instead of plain strings 397 public static class Name implements Comparable { 398 private String skeleton; 399 400 private String pretty; 401 402 public final int RAW = 0, TITLE = 1, NORMAL = 2; 403 Name(String name, int style)404 public Name(String name, int style) { 405 if (name == null) 406 name = ""; 407 if (style == RAW) { 408 skeleton = pretty = name; 409 } else { 410 pretty = regularize(name, style == TITLE); 411 skeleton = toSkeleton(pretty); 412 } 413 } 414 compareTo(Object o)415 public int compareTo(Object o) { 416 return skeleton.compareTo(((Name) o).skeleton); 417 } 418 equals(Object o)419 public boolean equals(Object o) { 420 return skeleton.equals(((Name) o).skeleton); 421 } 422 hashCode()423 public int hashCode() { 424 return skeleton.hashCode(); 425 } 426 toString()427 public String toString() { 428 return pretty; 429 } 430 } 431 432 /** 433 * @return the unicode map 434 */ getUnicodeMap()435 public UnicodeMap getUnicodeMap() { 436 return getUnicodeMap(false); 437 } 438 439 /** 440 * @return the unicode map 441 */ getUnicodeMap(boolean getShortest)442 public UnicodeMap getUnicodeMap(boolean getShortest) { 443 if (!getShortest) 444 return (UnicodeMap) getUnicodeMap_internal().cloneAsThawed(); 445 UnicodeMap result = new UnicodeMap(); 446 boolean uniformUnassigned = hasUniformUnassigned(); 447 448 for (UnicodeSetIterator usi = getStuffToTest(uniformUnassigned); usi.next();) { // int i = 0; i <= 0x10FFFF; ++i 449 int i = usi.codepoint; 450 // if (DEBUG && i == 0x41) System.out.println(i + "\t" + 451 // getValue(i)); 452 String value = getValue(i, true); 453 result.put(i, value); 454 } 455 return addUntested(result, uniformUnassigned); 456 } 457 458 /** 459 * @return the unicode map 460 */ getUnicodeMap_internal()461 public UnicodeMap getUnicodeMap_internal() { 462 if (unicodeMap == null) 463 unicodeMap = _getUnicodeMap(); 464 return unicodeMap; 465 } 466 _getUnicodeMap()467 protected UnicodeMap _getUnicodeMap() { 468 UnicodeMap result = new UnicodeMap(); 469 HashMap myIntern = new HashMap(); 470 boolean uniformUnassigned = hasUniformUnassigned(); 471 472 for (UnicodeSetIterator usi = getStuffToTest(uniformUnassigned); usi.next();) { // int i = 0; i <= 0x10FFFF; ++i 473 int i = usi.codepoint; 474 // if (DEBUG && i == 0x41) System.out.println(i + "\t" + 475 // getValue(i)); 476 String value = getValue(i); 477 String iValue = (String) myIntern.get(value); 478 if (iValue == null) 479 myIntern.put(value, iValue = value); 480 result.put(i, iValue); 481 } 482 addUntested(result, uniformUnassigned); 483 484 if (DEBUG) { 485 for (UnicodeSetIterator usi = getStuffToTest(uniformUnassigned); usi.next();) { // int i = 0; i <= 0x10FFFF; ++i 486 int i = usi.codepoint; 487 // if (DEBUG && i == 0x41) System.out.println(i + "\t" + 488 // getValue(i)); 489 String value = getValue(i); 490 String resultValue = (String) result.getValue(i); 491 if (!value.equals(resultValue)) { 492 throw new RuntimeException("Value failure at: " 493 + Utility.hex(i)); 494 } 495 } 496 } 497 if (DEBUG && CHECK_NAME.equals(getName())) { 498 System.out.println(getName() + ":\t" + getClass().getName() + "\t" 499 + getVersion()); 500 System.out.println(getStack()); 501 System.out.println(result); 502 } 503 return result; 504 } 505 getStuffToTest(boolean uniformUnassigned)506 private static UnicodeSetIterator getStuffToTest(boolean uniformUnassigned) { 507 return new UnicodeSetIterator(uniformUnassigned ? getSTUFF_TO_TEST() : getSTUFF_TO_TEST_WITH_UNASSIGNED()); 508 } 509 510 /** 511 * Really ought to create a Collection UniqueList, that forces uniqueness. 512 * But for now... 513 */ addUnique(Object obj, Collection result)514 public static Collection addUnique(Object obj, Collection result) { 515 if (obj != null && !result.contains(obj)) 516 result.add(obj); 517 return result; 518 } 519 520 /** 521 * Utility for managing property & non-string value aliases 522 */ 523 public static final Comparator PROPERTY_COMPARATOR = new Comparator() { 524 public int compare(Object o1, Object o2) { 525 return compareNames((String) o1, (String) o2); 526 } 527 }; 528 529 /** 530 * Utility for managing property & non-string value aliases 531 * 532 */ 533 // TODO optimize equalNames(String a, String b)534 public static boolean equalNames(String a, String b) { 535 if (a == b) 536 return true; 537 if (a == null) 538 return false; 539 return toSkeleton(a).equals(toSkeleton(b)); 540 } 541 542 /** 543 * Utility for managing property & non-string value aliases 544 */ 545 // TODO optimize compareNames(String a, String b)546 public static int compareNames(String a, String b) { 547 if (a == b) 548 return 0; 549 if (a == null) 550 return -1; 551 if (b == null) 552 return 1; 553 return toSkeleton(a).compareTo(toSkeleton(b)); 554 } 555 556 /** 557 * Utility for managing property & non-string value aliases 558 */ 559 // TODO account for special names, tibetan, hangul toSkeleton(String source)560 public static String toSkeleton(String source) { 561 if (source == null) 562 return null; 563 StringBuffer skeletonBuffer = new StringBuffer(); 564 boolean gotOne = false; 565 // remove spaces, '_', '-' 566 // we can do this with char, since no surrogates are involved 567 for (int i = 0; i < source.length(); ++i) { 568 char ch = source.charAt(i); 569 if (i > 0 && (ch == '_' || ch == ' ' || ch == '-')) { 570 gotOne = true; 571 } else { 572 char ch2 = Character.toLowerCase(ch); 573 if (ch2 != ch) { 574 gotOne = true; 575 skeletonBuffer.append(ch2); 576 } else { 577 skeletonBuffer.append(ch); 578 } 579 } 580 } 581 if (!gotOne) 582 return source; // avoid string creation 583 return skeletonBuffer.toString(); 584 } 585 586 // get the name skeleton toNameSkeleton(String source)587 public static String toNameSkeleton(String source) { 588 if (source == null) 589 return null; 590 StringBuffer result = new StringBuffer(); 591 // remove spaces, medial '-' 592 // we can do this with char, since no surrogates are involved 593 for (int i = 0; i < source.length(); ++i) { 594 char ch = source.charAt(i); 595 if (('0' <= ch && ch <= '9') || ('A' <= ch && ch <= 'Z') 596 || ch == '<' || ch == '>') { 597 result.append(ch); 598 } else if (ch == ' ') { 599 // don't copy ever 600 } else if (ch == '-') { 601 // only copy non-medials AND trailing O-E 602 if (0 == i 603 || i == source.length() - 1 604 || source.charAt(i - 1) == ' ' 605 || source.charAt(i + 1) == ' ' 606 || (i == source.length() - 2 607 && source.charAt(i - 1) == 'O' && source 608 .charAt(i + 1) == 'E')) { 609 System.out.println("****** EXCEPTION " + source); 610 result.append(ch); 611 } 612 // otherwise don't copy 613 } else { 614 throw new IllegalArgumentException("Illegal Name Char: U+" 615 + Utility.hex(ch) + ", " + ch); 616 } 617 } 618 return result.toString(); 619 } 620 621 /** 622 * These routines use the Java functions, because they only need to act on 623 * ASCII Changes space, - into _, inserts _ between lower and UPPER. 624 */ regularize(String source, boolean titlecaseStart)625 public static String regularize(String source, boolean titlecaseStart) { 626 if (source == null) 627 return source; 628 /* 629 * if (source.equals("noBreak")) { // HACK if (titlecaseStart) return 630 * "NoBreak"; return source; } 631 */ 632 StringBuffer result = new StringBuffer(); 633 int lastCat = -1; 634 boolean haveFirstCased = true; 635 for (int i = 0; i < source.length(); ++i) { 636 char c = source.charAt(i); 637 if (c == ' ' || c == '-' || c == '_') { 638 c = '_'; 639 haveFirstCased = true; 640 } 641 if (c == '=') 642 haveFirstCased = true; 643 int cat = Character.getType(c); 644 if (lastCat == Character.LOWERCASE_LETTER 645 && cat == Character.UPPERCASE_LETTER) { 646 result.append('_'); 647 } 648 if (haveFirstCased 649 && (cat == Character.LOWERCASE_LETTER 650 || cat == Character.TITLECASE_LETTER || cat == Character.UPPERCASE_LETTER)) { 651 if (titlecaseStart) { 652 c = Character.toUpperCase(c); 653 } 654 haveFirstCased = false; 655 } 656 result.append(c); 657 lastCat = cat; 658 } 659 return result.toString(); 660 } 661 662 /** 663 * Utility function for comparing codepoint to string without generating new 664 * string. 665 * 666 * @param codepoint 667 * @param other 668 * @return true if the codepoint equals the string 669 */ equals(int codepoint, String other)670 public static final boolean equals(int codepoint, String other) { 671 if (other == null) return false; 672 if (other.length() == 1) { 673 return codepoint == other.charAt(0); 674 } 675 if (other.length() == 2) { 676 return other.equals(UTF16.valueOf(codepoint)); 677 } 678 return false; 679 } 680 681 /** 682 * Utility function for comparing objects that may be null 683 * string. 684 */ equals(T a, T b)685 public static final <T extends Object> boolean equals(T a, T b) { 686 return a == null ? b == null 687 : b == null ? false 688 : a.equals(b); 689 } 690 691 /** 692 * Utility that should be on UnicodeSet 693 * 694 * @param source 695 * @param result 696 */ addAll(UnicodeSetIterator source, UnicodeSet result)697 static public void addAll(UnicodeSetIterator source, UnicodeSet result) { 698 while (source.nextRange()) { 699 if (source.codepoint == UnicodeSetIterator.IS_STRING) { 700 result.add(source.string); 701 } else { 702 result.add(source.codepoint, source.codepointEnd); 703 } 704 } 705 } 706 707 /** 708 * Really ought to create a Collection UniqueList, that forces uniqueness. 709 * But for now... 710 */ addAllUnique(Collection source, Collection result)711 public static Collection addAllUnique(Collection source, Collection result) { 712 for (Iterator it = source.iterator(); it.hasNext();) { 713 addUnique(it.next(), result); 714 } 715 return result; 716 } 717 718 /** 719 * Really ought to create a Collection UniqueList, that forces uniqueness. 720 * But for now... 721 */ addAllUnique(Object[] source, Collection result)722 public static Collection addAllUnique(Object[] source, Collection result) { 723 for (int i = 0; i < source.length; ++i) { 724 addUnique(source[i], result); 725 } 726 return result; 727 } 728 729 static public class Factory { 730 static boolean DEBUG = false; 731 732 Map<String, UnicodeProperty> canonicalNames = new TreeMap<String, UnicodeProperty>(); 733 734 Map skeletonNames = new TreeMap(); 735 736 Map propertyCache = new HashMap(1); 737 add(UnicodeProperty sp)738 public final Factory add(UnicodeProperty sp) { 739 String name2 = sp.getName(); 740 if (name2.length() == 0) { 741 throw new IllegalArgumentException(); 742 } 743 canonicalNames.put(name2, sp); 744 skeletonNames.put(toSkeleton(name2), sp); 745 List c = sp.getNameAliases(new ArrayList(1)); 746 Iterator it = c.iterator(); 747 while (it.hasNext()) { 748 skeletonNames.put(toSkeleton((String) it.next()), sp); 749 } 750 return this; 751 } 752 getProperty(String propertyAlias)753 public UnicodeProperty getProperty(String propertyAlias) { 754 return (UnicodeProperty) skeletonNames 755 .get(toSkeleton(propertyAlias)); 756 } 757 getAvailableNames()758 public final List<String> getAvailableNames() { 759 return getAvailableNames(null); 760 } 761 getAvailableNames(List<String> result)762 public final List<String> getAvailableNames(List<String> result) { 763 if (result == null) 764 result = new ArrayList(1); 765 Iterator it = canonicalNames.keySet().iterator(); 766 while (it.hasNext()) { 767 addUnique(it.next(), result); 768 } 769 return result; 770 } 771 getAvailableNames(int propertyTypeMask)772 public final List getAvailableNames(int propertyTypeMask) { 773 return getAvailableNames(propertyTypeMask, null); 774 } 775 getAvailableNames(int propertyTypeMask, List result)776 public final List getAvailableNames(int propertyTypeMask, List result) { 777 if (result == null) 778 result = new ArrayList(1); 779 Iterator it = canonicalNames.keySet().iterator(); 780 while (it.hasNext()) { 781 String item = (String) it.next(); 782 UnicodeProperty property = getProperty(item); 783 if (DEBUG) 784 System.out.println("Properties: " + item + "," 785 + property.getType()); 786 if (!property.isType(propertyTypeMask)) { 787 // System.out.println("Masking: " + property.getType() + "," 788 // + propertyTypeMask); 789 continue; 790 } 791 addUnique(property.getName(), result); 792 } 793 return result; 794 } 795 796 InversePatternMatcher inverseMatcher = new InversePatternMatcher(); 797 798 /** 799 * Format is: propname ('=' | '!=') propvalue ( '|' propValue )* 800 */ getSet(String propAndValue, PatternMatcher matcher, UnicodeSet result)801 public final UnicodeSet getSet(String propAndValue, 802 PatternMatcher matcher, UnicodeSet result) { 803 int equalPos = propAndValue.indexOf('='); 804 String prop = propAndValue.substring(0, equalPos); 805 String value = propAndValue.substring(equalPos + 1); 806 boolean negative = false; 807 if (prop.endsWith("!")) { 808 prop = prop.substring(0, prop.length() - 1); 809 negative = true; 810 } 811 prop = prop.trim(); 812 UnicodeProperty up = getProperty(prop); 813 if (matcher == null) { 814 matcher = new SimpleMatcher(value, up 815 .isType(STRING_OR_MISC_MASK) ? null 816 : PROPERTY_COMPARATOR); 817 } 818 if (negative) { 819 inverseMatcher.set(matcher); 820 matcher = inverseMatcher; 821 } 822 return up.getSet(matcher.set(value), result); 823 } 824 getSet(String propAndValue, PatternMatcher matcher)825 public final UnicodeSet getSet(String propAndValue, 826 PatternMatcher matcher) { 827 return getSet(propAndValue, matcher, null); 828 } 829 getSet(String propAndValue)830 public final UnicodeSet getSet(String propAndValue) { 831 return getSet(propAndValue, null, null); 832 } 833 getSymbolTable(String prefix)834 public final SymbolTable getSymbolTable(String prefix) { 835 return new PropertySymbolTable(prefix); 836 } 837 838 private class MyXSymbolTable extends UnicodeSet.XSymbolTable { applyPropertyAlias(String propertyName, String propertyValue, UnicodeSet result)839 public boolean applyPropertyAlias(String propertyName, 840 String propertyValue, UnicodeSet result) { 841 if (false) 842 System.out.println(propertyName + "=" + propertyValue); 843 UnicodeProperty prop = getProperty(propertyName); 844 if (prop == null) 845 return false; 846 result.clear(); 847 UnicodeSet x = prop.getSet(propertyValue, result); 848 return x.size() != 0; 849 } 850 } 851 getXSymbolTable()852 public final UnicodeSet.XSymbolTable getXSymbolTable() { 853 return new MyXSymbolTable(); 854 } 855 856 private class PropertySymbolTable implements SymbolTable { 857 static final boolean DEBUG = false; 858 859 private String prefix; 860 861 RegexMatcher regexMatcher = new RegexMatcher(); 862 PropertySymbolTable(String prefix)863 PropertySymbolTable(String prefix) { 864 this.prefix = prefix; 865 } 866 lookup(String s)867 public char[] lookup(String s) { 868 if (DEBUG) 869 System.out.println("\t(" + prefix + ")Looking up " + s); 870 // ensure, again, that prefix matches 871 int start = prefix.length(); 872 if (!s.regionMatches(true, 0, prefix, 0, start)) 873 return null; 874 875 int pos = s.indexOf(':', start); 876 if (pos < 0) { // should never happen 877 throw new IllegalArgumentException( 878 "Internal Error: missing =: " + s + "\r\n"); 879 } 880 UnicodeProperty prop = getProperty(s.substring(start, pos)); 881 if (prop == null) { 882 throw new IllegalArgumentException("Invalid Property in: " 883 + s + "\r\nUse " + showSet(getAvailableNames())); 884 } 885 String value = s.substring(pos + 1); 886 UnicodeSet set; 887 if (value.startsWith("\u00AB")) { // regex! 888 set = prop.getSet(regexMatcher.set(value.substring(1, value 889 .length() - 1))); 890 } else { 891 set = prop.getSet(value); 892 } 893 if (set.size() == 0) { 894 throw new IllegalArgumentException( 895 "Empty Property-Value in: " + s + "\r\nUse " 896 + showSet(prop.getAvailableValues())); 897 } 898 if (DEBUG) 899 System.out.println("\t(" + prefix + ")Returning " 900 + set.toPattern(true)); 901 return set.toPattern(true).toCharArray(); // really ugly 902 } 903 showSet(List list)904 private String showSet(List list) { 905 StringBuffer result = new StringBuffer("["); 906 boolean first = true; 907 for (Iterator it = list.iterator(); it.hasNext();) { 908 if (!first) 909 result.append(", "); 910 else 911 first = false; 912 result.append(it.next().toString()); 913 } 914 result.append("]"); 915 return result.toString(); 916 } 917 lookupMatcher(int ch)918 public UnicodeMatcher lookupMatcher(int ch) { 919 return null; 920 } 921 parseReference(String text, ParsePosition pos, int limit)922 public String parseReference(String text, ParsePosition pos, 923 int limit) { 924 if (DEBUG) 925 System.out.println("\t(" + prefix + ")Parsing <" 926 + text.substring(pos.getIndex(), limit) + ">"); 927 int start = pos.getIndex(); 928 // ensure that it starts with 'prefix' 929 if (!text 930 .regionMatches(true, start, prefix, 0, prefix.length())) 931 return null; 932 start += prefix.length(); 933 // now see if it is of the form identifier:identifier 934 int i = getIdentifier(text, start, limit); 935 if (i == start) 936 return null; 937 String prop = text.substring(start, i); 938 String value = "true"; 939 if (i < limit) { 940 if (text.charAt(i) == ':') { 941 int j; 942 if (text.charAt(i + 1) == '\u00AB') { // regular 943 // expression 944 j = text.indexOf('\u00BB', i + 2) + 1; // include 945 // last 946 // character 947 if (j <= 0) 948 return null; 949 } else { 950 j = getIdentifier(text, i + 1, limit); 951 } 952 value = text.substring(i + 1, j); 953 i = j; 954 } 955 } 956 pos.setIndex(i); 957 if (DEBUG) 958 System.out.println("\t(" + prefix + ")Parsed <" + prop 959 + ">=<" + value + ">"); 960 return prefix + prop + ":" + value; 961 } 962 getIdentifier(String text, int start, int limit)963 private int getIdentifier(String text, int start, int limit) { 964 if (DEBUG) 965 System.out.println("\tGetID <" 966 + text.substring(start, limit) + ">"); 967 int cp = 0; 968 int i; 969 for (i = start; i < limit; i += UTF16.getCharCount(cp)) { 970 cp = UTF16.charAt(text, i); 971 if (!com.ibm.icu.lang.UCharacter 972 .isUnicodeIdentifierPart(cp) 973 && cp != '.') { 974 break; 975 } 976 } 977 if (DEBUG) 978 System.out.println("\tGotID <" + text.substring(start, i) 979 + ">"); 980 return i; 981 } 982 } 983 } 984 985 public static class FilteredProperty extends UnicodeProperty { 986 private UnicodeProperty property; 987 988 protected StringFilter filter; 989 990 protected UnicodeSetIterator matchIterator = new UnicodeSetIterator( 991 new UnicodeSet(0, 0x10FFFF)); 992 993 protected HashMap backmap; 994 995 boolean allowValueAliasCollisions = false; 996 FilteredProperty(UnicodeProperty property, StringFilter filter)997 public FilteredProperty(UnicodeProperty property, StringFilter filter) { 998 this.property = property; 999 this.filter = filter; 1000 } 1001 getFilter()1002 public StringFilter getFilter() { 1003 return filter; 1004 } 1005 setFilter(StringFilter filter)1006 public UnicodeProperty setFilter(StringFilter filter) { 1007 this.filter = filter; 1008 return this; 1009 } 1010 1011 List temp = new ArrayList(1); 1012 _getAvailableValues(List result)1013 public List _getAvailableValues(List result) { 1014 temp.clear(); 1015 return filter.addUnique(property.getAvailableValues(temp), result); 1016 } 1017 _getNameAliases(List result)1018 public List _getNameAliases(List result) { 1019 temp.clear(); 1020 return filter.addUnique(property.getNameAliases(temp), result); 1021 } 1022 _getValue(int codepoint)1023 public String _getValue(int codepoint) { 1024 return filter.remap(property.getValue(codepoint)); 1025 } 1026 _getValueAliases(String valueAlias, List result)1027 public List _getValueAliases(String valueAlias, List result) { 1028 if (backmap == null) { 1029 backmap = new HashMap(1); 1030 temp.clear(); 1031 Iterator it = property.getAvailableValues(temp).iterator(); 1032 while (it.hasNext()) { 1033 String item = (String) it.next(); 1034 String mappedItem = filter.remap(item); 1035 if (backmap.get(mappedItem) != null 1036 && !allowValueAliasCollisions) { 1037 throw new IllegalArgumentException( 1038 "Filter makes values collide! " + item + ", " 1039 + mappedItem); 1040 } 1041 backmap.put(mappedItem, item); 1042 } 1043 } 1044 valueAlias = (String) backmap.get(valueAlias); 1045 temp.clear(); 1046 return filter.addUnique(property.getValueAliases(valueAlias, temp), 1047 result); 1048 } 1049 _getVersion()1050 public String _getVersion() { 1051 return property.getVersion(); 1052 } 1053 isAllowValueAliasCollisions()1054 public boolean isAllowValueAliasCollisions() { 1055 return allowValueAliasCollisions; 1056 } 1057 setAllowValueAliasCollisions(boolean b)1058 public FilteredProperty setAllowValueAliasCollisions(boolean b) { 1059 allowValueAliasCollisions = b; 1060 return this; 1061 } 1062 1063 } 1064 1065 public static abstract class StringFilter implements Cloneable { remap(String original)1066 public abstract String remap(String original); 1067 addUnique(Collection source, List result)1068 public final List addUnique(Collection source, List result) { 1069 if (result == null) 1070 result = new ArrayList(1); 1071 Iterator it = source.iterator(); 1072 while (it.hasNext()) { 1073 UnicodeProperty.addUnique(remap((String) it.next()), result); 1074 } 1075 return result; 1076 } 1077 /* 1078 * public Object clone() { try { return super.clone(); } catch 1079 * (CloneNotSupportedException e) { throw new 1080 * IllegalStateException("Should never happen."); } } 1081 */ 1082 } 1083 1084 public static class MapFilter extends StringFilter { 1085 private Map valueMap; 1086 MapFilter(Map valueMap)1087 public MapFilter(Map valueMap) { 1088 this.valueMap = valueMap; 1089 } 1090 remap(String original)1091 public String remap(String original) { 1092 Object changed = valueMap.get(original); 1093 return changed == null ? original : (String) changed; 1094 } 1095 getMap()1096 public Map getMap() { 1097 return valueMap; 1098 } 1099 } 1100 1101 public interface PatternMatcher extends ObjectMatcher { set(String pattern)1102 public PatternMatcher set(String pattern); 1103 } 1104 1105 public static class InversePatternMatcher extends InverseMatcher implements 1106 PatternMatcher { 1107 PatternMatcher other; 1108 set(PatternMatcher toInverse)1109 public PatternMatcher set(PatternMatcher toInverse) { 1110 other = toInverse; 1111 return this; 1112 } 1113 matches(Object value)1114 public boolean matches(Object value) { 1115 return !other.matches(value); 1116 } 1117 set(String pattern)1118 public PatternMatcher set(String pattern) { 1119 other.set(pattern); 1120 return this; 1121 } 1122 } 1123 1124 public static class SimpleMatcher implements PatternMatcher { 1125 Comparator comparator; 1126 1127 String pattern; 1128 SimpleMatcher(String pattern, Comparator comparator)1129 public SimpleMatcher(String pattern, Comparator comparator) { 1130 this.comparator = comparator; 1131 this.pattern = pattern; 1132 } 1133 matches(Object value)1134 public boolean matches(Object value) { 1135 if (comparator == null) 1136 return pattern.equals(value); 1137 return comparator.compare(pattern, value) == 0; 1138 } 1139 set(String pattern)1140 public PatternMatcher set(String pattern) { 1141 this.pattern = pattern; 1142 return this; 1143 } 1144 } 1145 1146 public static class RegexMatcher implements UnicodeProperty.PatternMatcher { 1147 private java.util.regex.Matcher matcher; 1148 set(String pattern)1149 public UnicodeProperty.PatternMatcher set(String pattern) { 1150 matcher = Pattern.compile(pattern).matcher(""); 1151 return this; 1152 } 1153 UFormat foo; matches(Object value)1154 public boolean matches(Object value) { 1155 matcher.reset(value.toString()); 1156 return matcher.find(); 1157 } 1158 } 1159 1160 public enum AliasAddAction {IGNORE_IF_MISSING, REQUIRE_MAIN_ALIAS, ADD_MAIN_ALIAS} 1161 1162 public static abstract class BaseProperty extends UnicodeProperty { 1163 private static final String[] NO_VALUES = {"No", "N", "F", "False"}; 1164 1165 private static final String[] YES_VALUES = {"Yes", "Y", "T", "True"}; 1166 1167 /** 1168 * 1169 */ 1170 private static final String[][] YES_NO_ALIASES = new String[][] {YES_VALUES, NO_VALUES}; 1171 1172 protected List propertyAliases = new ArrayList(1); 1173 1174 protected Map toValueAliases; 1175 1176 protected String version; 1177 setMain(String alias, String shortAlias, int propertyType, String version)1178 public BaseProperty setMain(String alias, String shortAlias, 1179 int propertyType, String version) { 1180 setName(alias); 1181 setType(propertyType); 1182 propertyAliases.add(shortAlias); 1183 propertyAliases.add(alias); 1184 if (propertyType == BINARY) { 1185 addValueAliases(YES_NO_ALIASES, AliasAddAction.ADD_MAIN_ALIAS); 1186 } 1187 this.version = version; 1188 return this; 1189 } 1190 _getVersion()1191 public String _getVersion() { 1192 return version; 1193 } 1194 _getNameAliases(List result)1195 public List _getNameAliases(List result) { 1196 addAllUnique(propertyAliases, result); 1197 return result; 1198 } 1199 addValueAliases(String[][] valueAndAlternates, AliasAddAction aliasAddAction)1200 public BaseProperty addValueAliases(String[][] valueAndAlternates, 1201 AliasAddAction aliasAddAction) { 1202 if (toValueAliases == null) 1203 _fixValueAliases(); 1204 for (int i = 0; i < valueAndAlternates.length; ++i) { 1205 for (int j = 1; j < valueAndAlternates[0].length; ++j) { 1206 addValueAlias(valueAndAlternates[i][0], 1207 valueAndAlternates[i][j], aliasAddAction); 1208 } 1209 } 1210 return this; 1211 } 1212 addValueAlias(String value, String valueAlias, AliasAddAction aliasAddAction)1213 public void addValueAlias(String value, String valueAlias, 1214 AliasAddAction aliasAddAction) { 1215 List result = (List) toValueAliases.get(value); 1216 if (result == null) { 1217 switch(aliasAddAction) { 1218 case IGNORE_IF_MISSING: return; 1219 case REQUIRE_MAIN_ALIAS: throw new IllegalArgumentException("Can't add alias for mising value: " + value); 1220 case ADD_MAIN_ALIAS: 1221 toValueAliases.put(value, result = new ArrayList(0)); 1222 break; 1223 } 1224 } 1225 addUnique(value, result); 1226 addUnique(valueAlias, result); 1227 } 1228 _getValueAliases(String valueAlias, List result)1229 protected List _getValueAliases(String valueAlias, List result) { 1230 if (toValueAliases == null) 1231 _fixValueAliases(); 1232 List a = (List) toValueAliases.get(valueAlias); 1233 if (a != null) 1234 addAllUnique(a, result); 1235 return result; 1236 } 1237 _fixValueAliases()1238 protected void _fixValueAliases() { 1239 if (toValueAliases == null) 1240 toValueAliases = new HashMap(1); 1241 for (Iterator it = getAvailableValues().iterator(); it.hasNext();) { 1242 Object value = it.next(); 1243 _ensureValueInAliases(value); 1244 } 1245 } 1246 _ensureValueInAliases(Object value)1247 protected void _ensureValueInAliases(Object value) { 1248 List result = (List) toValueAliases.get(value); 1249 if (result == null) 1250 toValueAliases.put(value, result = new ArrayList(1)); 1251 addUnique(value, result); 1252 } 1253 swapFirst2ValueAliases()1254 public BaseProperty swapFirst2ValueAliases() { 1255 for (Iterator it = toValueAliases.keySet().iterator(); it.hasNext();) { 1256 List list = (List) toValueAliases.get(it.next()); 1257 if (list.size() < 2) 1258 continue; 1259 Object first = list.get(0); 1260 list.set(0, list.get(1)); 1261 list.set(1, first); 1262 } 1263 return this; 1264 } 1265 1266 /** 1267 * @param string 1268 * @return 1269 */ addName(String string)1270 public UnicodeProperty addName(String string) { 1271 throw new UnsupportedOperationException(); 1272 } 1273 1274 } 1275 1276 public static abstract class SimpleProperty extends BaseProperty { 1277 LinkedHashSet values; 1278 addName(String alias)1279 public UnicodeProperty addName(String alias) { 1280 propertyAliases.add(alias); 1281 return this; 1282 } 1283 setValues(String valueAlias)1284 public SimpleProperty setValues(String valueAlias) { 1285 _addToValues(valueAlias, null); 1286 return this; 1287 } 1288 addAliases(String valueAlias, String... aliases)1289 public SimpleProperty addAliases(String valueAlias, String... aliases) { 1290 _addToValues(valueAlias, null); 1291 return this; 1292 } 1293 setValues(String[] valueAliases, String[] alternateValueAliases)1294 public SimpleProperty setValues(String[] valueAliases, 1295 String[] alternateValueAliases) { 1296 for (int i = 0; i < valueAliases.length; ++i) { 1297 if (valueAliases[i].equals(UNUSED)) 1298 continue; 1299 _addToValues( 1300 valueAliases[i], 1301 alternateValueAliases != null ? alternateValueAliases[i] 1302 : null); 1303 } 1304 return this; 1305 } 1306 setValues(List valueAliases)1307 public SimpleProperty setValues(List valueAliases) { 1308 this.values = new LinkedHashSet(valueAliases); 1309 for (Iterator it = this.values.iterator(); it.hasNext();) { 1310 _addToValues((String) it.next(), null); 1311 } 1312 return this; 1313 } 1314 _getAvailableValues(List result)1315 public List _getAvailableValues(List result) { 1316 if (values == null) 1317 _fillValues(); 1318 result.addAll(values); 1319 return result; 1320 } 1321 _fillValues()1322 protected void _fillValues() { 1323 List newvalues = (List) getUnicodeMap_internal() 1324 .getAvailableValues(new ArrayList()); 1325 for (Iterator it = newvalues.iterator(); it.hasNext();) { 1326 _addToValues((String) it.next(), null); 1327 } 1328 } 1329 _addToValues(String item, String alias)1330 private void _addToValues(String item, String alias) { 1331 if (values == null) 1332 values = new LinkedHashSet(); 1333 if (toValueAliases == null) 1334 _fixValueAliases(); 1335 addUnique(item, values); 1336 _ensureValueInAliases(item); 1337 addValueAlias(item, alias, AliasAddAction.REQUIRE_MAIN_ALIAS); 1338 } 1339 /* public String _getVersion() { 1340 return version; 1341 } 1342 */ 1343 } 1344 1345 public static class UnicodeMapProperty extends BaseProperty { 1346 /* 1347 * Example of usage: 1348 * new UnicodeProperty.UnicodeMapProperty() { 1349 { 1350 unicodeMap = new UnicodeMap(); 1351 unicodeMap.setErrorOnReset(true); 1352 unicodeMap.put(0xD, "CR"); 1353 unicodeMap.put(0xA, "LF"); 1354 UnicodeProperty cat = getProperty("General_Category"); 1355 UnicodeSet temp = cat.getSet("Line_Separator") 1356 .addAll(cat.getSet("Paragraph_Separator")) 1357 .addAll(cat.getSet("Control")) 1358 .addAll(cat.getSet("Format")) 1359 .remove(0xD).remove(0xA).remove(0x200C).remove(0x200D); 1360 unicodeMap.putAll(temp, "Control"); 1361 UnicodeSet graphemeExtend = getProperty("Grapheme_Extend").getSet("true"); 1362 unicodeMap.putAll(graphemeExtend,"Extend"); 1363 UnicodeProperty hangul = getProperty("Hangul_Syllable_Type"); 1364 unicodeMap.putAll(hangul.getSet("L"),"L"); 1365 unicodeMap.putAll(hangul.getSet("V"),"V"); 1366 unicodeMap.putAll(hangul.getSet("T"),"T"); 1367 unicodeMap.putAll(hangul.getSet("LV"),"LV"); 1368 unicodeMap.putAll(hangul.getSet("LVT"),"LVT"); 1369 unicodeMap.setMissing("Other"); 1370 } 1371 }.setMain("Grapheme_Cluster_Break", "GCB", UnicodeProperty.ENUMERATED, version) 1372 */ 1373 protected UnicodeMap unicodeMap; 1374 _getUnicodeMap()1375 protected UnicodeMap _getUnicodeMap() { 1376 return unicodeMap; 1377 } 1378 set(UnicodeMap map)1379 public UnicodeMapProperty set(UnicodeMap map) { 1380 unicodeMap = map.freeze(); 1381 return this; 1382 } 1383 _getValue(int codepoint)1384 protected String _getValue(int codepoint) { 1385 return (String) unicodeMap.getValue(codepoint); 1386 } 1387 1388 /* protected List _getValueAliases(String valueAlias, List result) { 1389 if (!unicodeMap.getAvailableValues().contains(valueAlias)) return result; 1390 result.add(valueAlias); 1391 return result; // no other aliases 1392 } _getAvailableValues(List result)1393 */protected List _getAvailableValues(List result) { 1394 unicodeMap.getAvailableValues(result); 1395 if (toValueAliases != null) { 1396 for (Object s : toValueAliases.keySet()) { 1397 if (!result.contains(s)) { 1398 result.add(s); 1399 } 1400 } 1401 } 1402 return result; 1403 } 1404 } 1405 isValidValue(String propertyValue)1406 public boolean isValidValue(String propertyValue) { 1407 if (isType(STRING_OR_MISC_MASK)) { 1408 return true; 1409 } 1410 Collection<String> values = (Collection<String>) getAvailableValues(); 1411 for (String valueAlias : values) { 1412 if (UnicodeProperty.compareNames(valueAlias, propertyValue) == 0) { 1413 return true; 1414 } 1415 for (String valueAlias2 : (Collection<String>) getValueAliases(valueAlias)) { 1416 if (UnicodeProperty.compareNames(valueAlias2, propertyValue) == 0) { 1417 return true; 1418 } 1419 } 1420 } 1421 return false; 1422 } 1423 getValueAliases()1424 public List<String> getValueAliases() { 1425 List<String> result = new ArrayList(); 1426 if (isType(STRING_OR_MISC_MASK)) { 1427 return result; 1428 } 1429 Collection<String> values = (Collection<String>) getAvailableValues(); 1430 for (String valueAlias : values) { 1431 UnicodeProperty.addAllUnique(getValueAliases(valueAlias), result); 1432 } 1433 result.removeAll(values); 1434 return result; 1435 } 1436 1437 addUntested(UnicodeSet result, boolean uniformUnassigned)1438 public static UnicodeSet addUntested(UnicodeSet result, boolean uniformUnassigned) { 1439 if (uniformUnassigned && result.contains(UnicodeProperty.getSAMPLE_UNASSIGNED())) { 1440 result.addAll(UnicodeProperty.getUNASSIGNED()); 1441 } 1442 1443 if (result.contains(UnicodeProperty.SAMPLE_HIGH_SURROGATE)) { 1444 result.addAll(UnicodeProperty.HIGH_SURROGATES); 1445 } 1446 if (result.contains(UnicodeProperty.SAMPLE_HIGH_PRIVATE_USE_SURROGATE)) { 1447 result.addAll(UnicodeProperty.HIGH_PRIVATE_USE_SURROGATES); 1448 } 1449 if (result.contains(UnicodeProperty.SAMPLE_LOW_SURROGATE)) { 1450 result.addAll(UnicodeProperty.LOW_SURROGATES); 1451 } 1452 1453 if (result.contains(UnicodeProperty.SAMPLE_PRIVATE_USE_AREA)) { 1454 result.addAll(UnicodeProperty.PRIVATE_USE_AREA); 1455 } 1456 if (result.contains(UnicodeProperty.SAMPLE_PRIVATE_USE_AREA_A)) { 1457 result.addAll(UnicodeProperty.PRIVATE_USE_AREA_A); 1458 } 1459 if (result.contains(UnicodeProperty.SAMPLE_PRIVATE_USE_AREA_B)) { 1460 result.addAll(UnicodeProperty.PRIVATE_USE_AREA_B); 1461 } 1462 1463 return result; 1464 } 1465 addUntested(UnicodeMap result, boolean uniformUnassigned)1466 public static UnicodeMap addUntested(UnicodeMap result, boolean uniformUnassigned) { 1467 Object temp; 1468 if (uniformUnassigned && null != (temp = result.get(UnicodeProperty.getSAMPLE_UNASSIGNED()))) { 1469 result.putAll(UnicodeProperty.getUNASSIGNED(), temp); 1470 } 1471 1472 if (null != (temp = result.get(UnicodeProperty.SAMPLE_HIGH_SURROGATE))) { 1473 result.putAll(UnicodeProperty.HIGH_SURROGATES, temp); 1474 } 1475 if (null != (temp = result.get(UnicodeProperty.SAMPLE_HIGH_PRIVATE_USE_SURROGATE))) { 1476 result.putAll(UnicodeProperty.HIGH_PRIVATE_USE_SURROGATES, temp); 1477 } 1478 if (null != (temp = result.get(UnicodeProperty.SAMPLE_LOW_SURROGATE))) { 1479 result.putAll(UnicodeProperty.LOW_SURROGATES, temp); 1480 } 1481 1482 if (null != (temp = result.get(UnicodeProperty.SAMPLE_PRIVATE_USE_AREA))) { 1483 result.putAll(UnicodeProperty.PRIVATE_USE_AREA, temp); 1484 } 1485 if (null != (temp = result.get(UnicodeProperty.SAMPLE_PRIVATE_USE_AREA_A))) { 1486 result.putAll(UnicodeProperty.PRIVATE_USE_AREA_A, temp); 1487 } 1488 if (null != (temp = result.get(UnicodeProperty.SAMPLE_PRIVATE_USE_AREA_B))) { 1489 result.putAll(UnicodeProperty.PRIVATE_USE_AREA_B, temp); 1490 } 1491 return result; 1492 } 1493 isDefault(int cp)1494 public boolean isDefault(int cp) { 1495 String value = getValue(cp); 1496 if (isType(STRING_OR_MISC_MASK)) { 1497 return equals(cp, value); 1498 } 1499 String defaultValue = getValue(getSAMPLE_UNASSIGNED()); 1500 return defaultValue == null ? value == null : defaultValue.equals(value); 1501 } 1502 hasUniformUnassigned()1503 public boolean hasUniformUnassigned() { 1504 return hasUniformUnassigned; 1505 } setUniformUnassigned(boolean hasUniformUnassigned)1506 protected UnicodeProperty setUniformUnassigned(boolean hasUniformUnassigned) { 1507 this.hasUniformUnassigned = hasUniformUnassigned; 1508 return this; 1509 } 1510 1511 public static class UnicodeSetProperty extends BaseProperty { 1512 protected UnicodeSet unicodeSet; 1513 private static final String[] YESNO_ARRAY = new String[]{"Yes", "No"}; 1514 private static final List YESNO = Arrays.asList(YESNO_ARRAY); 1515 set(UnicodeSet set)1516 public UnicodeSetProperty set(UnicodeSet set) { 1517 unicodeSet = set.freeze(); 1518 return this; 1519 } 1520 set(String string)1521 public UnicodeSetProperty set(String string) { 1522 // TODO Auto-generated method stub 1523 return set(new UnicodeSet(string).freeze()); 1524 } 1525 _getValue(int codepoint)1526 protected String _getValue(int codepoint) { 1527 return YESNO_ARRAY[unicodeSet.contains(codepoint) ? 0 : 1]; 1528 } 1529 _getAvailableValues(List result)1530 protected List _getAvailableValues(List result) { 1531 return YESNO; 1532 } 1533 } 1534 1535 // private static class StringTransformProperty extends SimpleProperty { 1536 // Transform<String,String> transform; 1537 // 1538 // public StringTransformProperty(Transform<String,String> transform, boolean hasUniformUnassigned) { 1539 // this.transform = transform; 1540 // setUniformUnassigned(hasUniformUnassigned); 1541 // } 1542 // protected String _getValue(int codepoint) { 1543 // return transform.transform(UTF16.valueOf(codepoint)); 1544 // } 1545 // } 1546 // 1547 // private static class CodepointTransformProperty extends SimpleProperty { 1548 // Transform<Integer,String> transform; 1549 // 1550 // public CodepointTransformProperty(Transform<Integer,String> transform, boolean hasUniformUnassigned) { 1551 // this.transform = transform; 1552 // setUniformUnassigned(hasUniformUnassigned); 1553 // } 1554 // protected String _getValue(int codepoint) { 1555 // return transform.transform(codepoint); 1556 // } 1557 // } 1558 } 1559 1560