1 package org.unicode.cldr.util; 2 3 import com.google.common.base.Joiner; 4 import com.google.common.base.Objects; 5 import com.google.common.base.Splitter; 6 import com.google.common.collect.ImmutableSet; 7 import com.ibm.icu.impl.UnicodeMap; 8 import com.ibm.icu.impl.Utility; 9 import com.ibm.icu.lang.CharSequences; 10 import com.ibm.icu.text.SimpleFormatter; 11 import com.ibm.icu.text.Transform; 12 import com.ibm.icu.text.UTF16; 13 import com.ibm.icu.text.UnicodeSet; 14 import com.ibm.icu.text.UnicodeSet.SpanCondition; 15 import com.ibm.icu.text.UnicodeSetSpanner; 16 import java.io.File; 17 import java.util.ArrayList; 18 import java.util.Arrays; 19 import java.util.Collection; 20 import java.util.Collections; 21 import java.util.HashSet; 22 import java.util.LinkedHashSet; 23 import java.util.List; 24 import java.util.Locale; 25 import java.util.Map; 26 import java.util.Set; 27 import java.util.TreeSet; 28 import java.util.concurrent.ConcurrentHashMap; 29 import java.util.regex.Pattern; 30 import org.unicode.cldr.tool.ChartAnnotations; 31 import org.unicode.cldr.tool.SubdivisionNames; 32 import org.unicode.cldr.util.Factory.SourceTreeType; 33 import org.unicode.cldr.util.XMLFileReader.SimpleHandler; 34 35 public class Annotations { 36 private static final boolean DEBUG = false; 37 38 public static final String BAD_MARKER = "⊗"; 39 public static final String MISSING_MARKER = "⊖"; 40 public static final String ENGLISH_MARKER = "⊕"; 41 public static final String EQUIVALENT = "≣"; 42 public static final String NEUTRAL_HOLDING = ""; 43 44 public static final Splitter splitter = 45 Splitter.on(Pattern.compile("[|;]")).trimResults().omitEmptyStrings(); 46 static final Splitter dotSplitter = Splitter.on(".").trimResults(); 47 48 static final Map<String, Map<String, AnnotationSet>> cache = new ConcurrentHashMap<>(); 49 static final Set<String> LOCALES; 50 static final Set<String> ALL_LOCALES; 51 static final Factory ANNOTATIONS_FACTORY; 52 private static final AnnotationSet ENGLISH_DATA; 53 54 private final Set<String> annotations; 55 private final String tts; 56 57 static { 58 ANNOTATIONS_FACTORY = CLDRConfig.getInstance().getAnnotationsFactory(); 59 ALL_LOCALES = ANNOTATIONS_FACTORY.getAvailable(); 60 final Set<String> commonList = new HashSet<>(); 61 // calculate those in common 62 for (final String loc : ALL_LOCALES) { 63 final File f = getDirForLocale(loc); 64 if (SimpleFactory.getSourceTreeType(f) == SourceTreeType.common) { 65 commonList.add(loc); 66 } 67 } 68 LOCALES = Collections.unmodifiableSet(commonList); 69 ENGLISH_DATA = getDataSet("en"); 70 } 71 72 static class MyHandler extends SimpleHandler { 73 private final String locale; 74 private final UnicodeMap<Annotations> localeData = new UnicodeMap<>(); 75 private final AnnotationSet parentData; 76 private final Map<String, AnnotationSet> dirCache; 77 MyHandler( Map<String, AnnotationSet> dirCache, String locale, AnnotationSet parentData)78 public MyHandler( 79 Map<String, AnnotationSet> dirCache, String locale, AnnotationSet parentData) { 80 this.locale = locale; 81 this.parentData = parentData; 82 this.dirCache = dirCache; 83 } 84 cleanup()85 public AnnotationSet cleanup() { 86 // add parent data (may be overridden) 87 UnicodeMap<Annotations> templocaleData = null; 88 if (parentData != null) { 89 templocaleData = new UnicodeMap<>(); 90 UnicodeSet keys = 91 new UnicodeSet(parentData.baseData.keySet()).addAll(localeData.keySet()); 92 for (String key : keys) { 93 Annotations parentValue = parentData.baseData.get(key); 94 Annotations myValue = localeData.get(key); 95 if (parentValue == null) { 96 templocaleData.put(key, myValue); 97 } else if (myValue == null) { 98 templocaleData.put(key, parentValue); 99 } else { // need to combine 100 String tts = myValue.tts == null ? parentValue.tts : myValue.tts; 101 Set<String> annotations = 102 myValue.annotations == null || myValue.annotations.isEmpty() 103 ? parentValue.annotations 104 : myValue.annotations; 105 templocaleData.put(key, new Annotations(annotations, tts)); 106 } 107 } 108 } 109 110 final AnnotationSet result = new AnnotationSet(locale, localeData, templocaleData); 111 dirCache.put(locale, result); 112 return result; 113 } 114 115 static final Pattern SPACES = Pattern.compile("\\s+"); 116 117 @Override handlePathValue(String path, String value)118 public void handlePathValue(String path, String value) { 119 if (value.contains(CldrUtility.INHERITANCE_MARKER)) { 120 return; // skip all ^^^ 121 } 122 XPathParts parts = XPathParts.getFrozenInstance(path); 123 String lastElement = parts.getElement(-1); 124 if (!lastElement.equals("annotation")) { 125 if (!"identity".equals(parts.getElement(1))) { 126 throw new IllegalArgumentException("Unexpected path"); 127 } 128 return; 129 } 130 String usString = parts.getAttributeValue(-1, "cp"); 131 UnicodeSet us1 = 132 usString.startsWith("[") && usString.endsWith("]") 133 ? new UnicodeSet(usString) 134 : new UnicodeSet().add(usString); 135 UnicodeSet us = new UnicodeSet(); 136 for (String s : us1) { 137 us.add(s.replace(EmojiConstants.EMOJI_VARIANT_STRING, "")); 138 } 139 String tts = parts.getAttributeValue(-1, "tts"); 140 String type = parts.getAttributeValue(-1, "type"); 141 String alt = parts.getAttributeValue(-1, "alt"); 142 143 // clean up value 144 String value2 = SPACES.matcher(value).replaceAll(" ").trim(); 145 if (!value2.equals(value)) { 146 value = value2; 147 } 148 if (alt != null) { 149 // do nothing for now 150 } else if ("tts".equals(type)) { 151 addItems(localeData, us, Collections.<String>emptySet(), value); 152 } else { 153 Set<String> attributes = new TreeSet<>(splitter.splitToList(value)); 154 addItems(localeData, us, attributes, tts); 155 } 156 } 157 addItems( UnicodeMap<Annotations> unicodeMap, UnicodeSet us, Set<String> attributes, String tts)158 private void addItems( 159 UnicodeMap<Annotations> unicodeMap, 160 UnicodeSet us, 161 Set<String> attributes, 162 String tts) { 163 for (String entry : us) { 164 addItems(unicodeMap, entry, attributes, tts); 165 } 166 } 167 addItems( UnicodeMap<Annotations> unicodeMap, String entry, Set<String> attributes, String tts)168 private void addItems( 169 UnicodeMap<Annotations> unicodeMap, 170 String entry, 171 Set<String> attributes, 172 String tts) { 173 Annotations annotations = unicodeMap.get(entry); 174 if (annotations == null) { 175 unicodeMap.put(entry, new Annotations(attributes, tts)); 176 } else { 177 unicodeMap.put(entry, annotations.add(attributes, tts)); // creates new item 178 } 179 } 180 } 181 Annotations(Set<String> attributes, String tts2)182 public Annotations(Set<String> attributes, String tts2) { 183 annotations = 184 attributes == null 185 ? Collections.<String>emptySet() 186 : ImmutableSet.copyOf(attributes); 187 for (String attr : annotations) { 188 if (attr.contains(CldrUtility.INHERITANCE_MARKER)) { 189 throw new IllegalArgumentException(CldrUtility.INHERITANCE_MARKER); 190 } 191 } 192 tts = tts2; 193 if (tts != null && tts.contains(CldrUtility.INHERITANCE_MARKER)) { 194 throw new IllegalArgumentException(CldrUtility.INHERITANCE_MARKER); 195 } 196 } 197 add(Set<String> attributes, String tts2)198 public Annotations add(Set<String> attributes, String tts2) { 199 return new Annotations( 200 getKeywords() == null 201 ? attributes 202 : attributes == null ? getKeywords() : union(attributes, getKeywords()), 203 getShortName() == null ? tts2 : tts2 == null ? getShortName() : throwDup()); 204 } 205 throwDup()206 private String throwDup() { 207 throw new IllegalArgumentException("Duplicate tts"); 208 } 209 union(Set<String> a, Set<String> b)210 private Set<String> union(Set<String> a, Set<String> b) { 211 TreeSet<String> result = new TreeSet<>(a); 212 result.addAll(b); 213 return result; 214 } 215 216 /** 217 * @return all common locales 218 */ getAvailable()219 public static Set<String> getAvailable() { 220 return LOCALES; 221 } 222 223 /** 224 * @return all common locales 225 */ getAvailableLocales()226 public static Set<String> getAvailableLocales() { 227 return LOCALES; 228 } 229 230 /** 231 * @return all locales, including seed 232 */ getAllAvailable()233 public static Set<String> getAllAvailable() { 234 return ALL_LOCALES; 235 } 236 237 public static final class AnnotationSet { 238 239 private static final CLDRConfig CONFIG = CLDRConfig.getInstance(); 240 241 static final Factory factory = CONFIG.getCldrFactory(); 242 static final CLDRFile ENGLISH = CONFIG.getEnglish(); 243 static final CLDRFile ENGLISH_ANNOTATIONS = null; 244 static final SubdivisionNames englishSubdivisionIdToName = 245 new SubdivisionNames("en", "main"); 246 247 private static final String BLACK_RIGHTWARDS_ARROW = "\u27A1"; 248 private static final String BLACK_LEFTWARDS_ARROW = "\u2B05"; 249 // CLDRConfig.getInstance().getAnnotationsFactory().make("en", false); 250 251 private final String locale; 252 private final UnicodeMap<Annotations> baseData; 253 private final UnicodeMap<Annotations> unresolvedData; 254 private final CLDRFile cldrFile; 255 private final SubdivisionNames subdivisionIdToName; 256 private final SimpleFormatter initialPattern; 257 private final SimpleFormatter rightwardsArrowPattern; 258 private final Pattern initialRegexPattern; 259 private final XListFormatter listPattern; 260 private final Set<String> flagLabelSet; 261 private final Set<String> keycapLabelSet; 262 private final String keycapLabel; 263 private final String flagLabel; 264 // private final String maleLabel; 265 // private final String femaleLabel; 266 private final Map<String, Annotations> localeCache = new ConcurrentHashMap<>(); 267 268 static UnicodeSetSpanner uss = 269 new UnicodeSetSpanner(EmojiConstants.COMPONENTS); // must be sync'ed 270 AnnotationSet( String locale, UnicodeMap<Annotations> source, UnicodeMap<Annotations> resolvedSource)271 private AnnotationSet( 272 String locale, 273 UnicodeMap<Annotations> source, 274 UnicodeMap<Annotations> resolvedSource) { 275 this.locale = locale; 276 unresolvedData = source.freeze(); 277 this.baseData = resolvedSource == null ? unresolvedData : resolvedSource.freeze(); 278 cldrFile = factory.make(locale, true); 279 subdivisionIdToName = new SubdivisionNames(locale, "main", "subdivisions"); 280 // EmojiSubdivisionNames.getSubdivisionIdToName(locale); 281 listPattern = new XListFormatter(cldrFile, EmojiConstants.COMPOSED_NAME_LIST); 282 final String initialPatternString = 283 getStringValue( 284 "//ldml/characterLabels/characterLabelPattern[@type=\"category-list\"]"); 285 initialPattern = SimpleFormatter.compile(initialPatternString); 286 // <characterLabelPattern type="facing-right">{0} facing 287 // right</characterLabelPattern> 288 final String facingRightPatternString = 289 getStringValue( 290 "//ldml/characterLabels/characterLabelPattern[@type=\"facing-right\"]"); 291 292 rightwardsArrowPattern = 293 facingRightPatternString == null 294 ? null 295 : SimpleFormatter.compile(facingRightPatternString); 296 final String regexPattern = 297 ("\\Q" 298 + initialPatternString 299 .replace("{0}", "\\E.*\\Q") 300 .replace("{1}", "\\E.*\\Q") 301 + "\\E") 302 .replace("\\Q\\E", ""); // HACK to detect use of prefix pattern 303 initialRegexPattern = Pattern.compile(regexPattern); 304 flagLabelSet = getLabelSet("flag"); 305 flagLabel = flagLabelSet.isEmpty() ? null : flagLabelSet.iterator().next(); 306 keycapLabelSet = getLabelSet("keycap"); 307 keycapLabel = keycapLabelSet.isEmpty() ? null : keycapLabelSet.iterator().next(); 308 // maleLabel = 309 // getStringValue("//ldml/characterLabels/characterLabel[@type=\"male\"]"); 310 // femaleLabel = 311 // getStringValue("//ldml/characterLabels/characterLabel[@type=\"female\"]"); 312 } 313 314 /** 315 * @deprecated Use {@link #getLabelSet(String)} instead 316 */ 317 @Deprecated getLabelSet()318 private Set<String> getLabelSet() { 319 return getLabelSet("flag"); 320 } 321 getLabelSet(String typeAttributeValue)322 private Set<String> getLabelSet(String typeAttributeValue) { 323 String label = 324 getStringValue( 325 "//ldml/characterLabels/characterLabel[@type=\"" 326 + typeAttributeValue 327 + "\"]"); 328 return label == null ? Collections.<String>emptySet() : Collections.singleton(label); 329 } 330 getStringValue(String xpath)331 private String getStringValue(String xpath) { 332 return getStringValue(xpath, cldrFile, ENGLISH); 333 } 334 getStringValue(String xpath, CLDRFile cldrFile2, CLDRFile english)335 private String getStringValue(String xpath, CLDRFile cldrFile2, CLDRFile english) { 336 String result = cldrFile2.getStringValueWithBailey(xpath); 337 if (result == null) { 338 return ENGLISH_MARKER + english.getStringValueWithBailey(xpath); 339 } 340 String sourceLocale = cldrFile2.getSourceLocaleID(xpath, null); 341 if (sourceLocale.equals(XMLSource.CODE_FALLBACK_ID) 342 || sourceLocale.equals(XMLSource.ROOT_ID)) { 343 if (!xpath.equals( 344 "//ldml/characterLabels/characterLabelPattern[@type=\"category-list\"]")) { 345 return MISSING_MARKER + result; 346 } 347 } 348 return result; 349 } 350 getShortName(String code)351 public String getShortName(String code) { 352 return getShortName(code, null); 353 } 354 getShortName(String code, Transform<String, String> otherSource)355 public String getShortName(String code, Transform<String, String> otherSource) { 356 if (code.equals("♀️")) { 357 int debug = 0; 358 } 359 360 code = code.replace(EmojiConstants.EMOJI_VARIANT_STRING, ""); 361 Annotations stock = baseData.get(code); 362 if (stock != null && stock.tts != null) { 363 return stock.tts; 364 } 365 stock = localeCache.get(code); 366 if (stock != null) { 367 return stock.tts; 368 } 369 stock = synthesize(code, otherSource); 370 if (stock != null) { 371 localeCache.put(code, stock); 372 return stock.tts; 373 } 374 return null; 375 } 376 getKeywords(String code)377 public Set<String> getKeywords(String code) { 378 code = code.replace(EmojiConstants.EMOJI_VARIANT_STRING, ""); 379 Annotations stock = baseData.get(code); 380 if (stock != null && stock.annotations != null) { 381 return stock.annotations; 382 } 383 stock = localeCache.get(code); 384 if (stock != null) { 385 return stock.annotations; 386 } 387 stock = synthesize(code, null); 388 if (stock != null) { 389 localeCache.put(code, stock); 390 return stock.annotations; 391 } 392 return Collections.<String>emptySet(); 393 } 394 395 /** 396 * Returns the set of all keys for which annotations are available. WARNING: keys have the 397 * Emoji Presentation Selector removed! 398 */ keySet()399 public UnicodeSet keySet() { 400 return baseData.keySet(); 401 } 402 synthesize(String code, Transform<String, String> otherSource)403 private Annotations synthesize(String code, Transform<String, String> otherSource) { 404 if (code.equals("♂")) { 405 int debug = 0; 406 } 407 String shortName = null; 408 int len = code.codePointCount(0, code.length()); 409 boolean isKeycap10 = code.equals(""); 410 if (len == 1 && !isKeycap10) { 411 String tempName = null; 412 if (locale.equals("en")) { 413 if (otherSource != null) { 414 tempName = otherSource.transform(code); 415 } 416 if (tempName == null) { 417 return null; 418 } 419 return new Annotations(Collections.<String>emptySet(), tempName); 420 } else { // fall back to English if possible, but mark it. 421 tempName = getDataSet("en").getShortName(code); 422 if (tempName == null) { 423 return null; 424 } 425 return new Annotations( 426 Collections.<String>emptySet(), ENGLISH_MARKER + tempName); 427 } 428 } else if (EmojiConstants.REGIONAL_INDICATORS.containsAll(code)) { 429 String countryCode = EmojiConstants.getFlagCode(code); 430 String path = CLDRFile.getKey(CLDRFile.TERRITORY_NAME, countryCode); 431 String regionName = getStringValue(path); 432 if (regionName == null) { 433 regionName = ENGLISH_MARKER + ENGLISH.getStringValueWithBailey(path); 434 } 435 String flagName = 436 flagLabel == null 437 ? regionName 438 : initialPattern.format(flagLabel, regionName); 439 return new Annotations(flagLabelSet, flagName); 440 } else if (code.startsWith(EmojiConstants.BLACK_FLAG) 441 && code.endsWith(EmojiConstants.TAG_TERM)) { 442 String subdivisionCode = EmojiConstants.getTagSpec(code); 443 String subdivisionName = subdivisionIdToName.get(subdivisionCode); 444 if (subdivisionName == null) { 445 // subdivisionName = 446 // englishSubdivisionIdToName.get(subdivisionCode); 447 // if (subdivisionName != null) { 448 // subdivisionName = ENGLISH_MARKER + subdivisionCode; 449 // } else { 450 subdivisionName = MISSING_MARKER + subdivisionCode; 451 // } 452 } 453 String flagName = 454 flagLabel == null 455 ? subdivisionName 456 : initialPattern.format(flagLabel, subdivisionName); 457 return new Annotations(flagLabelSet, flagName); 458 } else if (isKeycap10 || code.contains(EmojiConstants.KEYCAP_MARK_STRING)) { 459 final String rem = code.equals("") ? "10" : UTF16.valueOf(code.charAt(0)); 460 shortName = initialPattern.format(keycapLabel, rem); 461 return new Annotations(keycapLabelSet, shortName); 462 } 463 UnicodeSet skipSet = EmojiConstants.REM_SKIP_SET; 464 String rem = ""; 465 SimpleFormatter startPattern = initialPattern; 466 if (EmojiConstants.COMPONENTS.containsSome(code)) { 467 synchronized (uss) { 468 rem = uss.deleteFrom(code, SpanCondition.NOT_CONTAINED); 469 code = uss.deleteFrom(code, SpanCondition.CONTAINED); 470 } 471 } 472 if (code.contains(EmojiConstants.JOINER_STRING)) { 473 if (code.contains(BLACK_RIGHTWARDS_ARROW)) { 474 String code2 = 475 code.replace(EmojiConstants.JOINER_STRING + BLACK_RIGHTWARDS_ARROW, ""); 476 if (!Objects.equal(code2, code)) { 477 Set<String> keywords = getKeywords(code2); 478 String baseName = getShortName(code2); 479 if (baseName == null 480 || keywords == null 481 || rightwardsArrowPattern == null) { 482 return null; 483 } 484 shortName = rightwardsArrowPattern.format(baseName); 485 return new Annotations(keywords, shortName); 486 } 487 } else if (code.contains(BLACK_LEFTWARDS_ARROW)) { 488 throw new UnsupportedOperationException( 489 "Implement if leftwards emoji are added"); 490 } 491 // if (code.endsWith(EmojiConstants.JOINER_MALE_SIGN)){ 492 // if (matchesInitialPattern(code)) { // "♂️","police 493 // officer: man, medium-light skin tone" 494 // rem = EmojiConstants.MAN + rem; 495 // code = 496 // code.substring(0,code.length()-EmojiConstants.JOINER_MALE_SIGN.length()); 497 // } // otherwise "♂️","man biking: dark skin tone" 498 // } else if (code.endsWith(EmojiConstants.JOINER_FEMALE_SIGN)){ 499 // if (matchesInitialPattern(code)) { // 500 // rem = EmojiConstants.WOMAN + rem; 501 // code = 502 // code.substring(0,code.length()-EmojiConstants.JOINER_FEMALE_SIGN.length()); 503 // } 504 // } else 505 if (code.contains(EmojiConstants.KISS)) { 506 rem = code + rem; 507 code = ""; 508 skipSet = EmojiConstants.REM_GROUP_SKIP_SET; 509 } else if (code.contains(EmojiConstants.HEART) 510 && !code.startsWith(EmojiConstants.HEART)) { 511 rem = code + rem; 512 code = ""; 513 skipSet = EmojiConstants.REM_GROUP_SKIP_SET; 514 } else if (code.equals(EmojiConstants.COMPOSED_HANDSHAKE)) { 515 code = EmojiConstants.HANDSHAKE; 516 } else if (code.contains(EmojiConstants.HANDSHAKE)) { 517 code = 518 code.startsWith(EmojiConstants.MAN) 519 ? "" 520 : code.endsWith(EmojiConstants.MAN) 521 ? "" 522 : code.startsWith(EmojiConstants.WOMAN) 523 ? "" 524 : NEUTRAL_HOLDING; 525 skipSet = EmojiConstants.REM_GROUP_SKIP_SET; 526 } else if (EmojiConstants.FAMILY_MARKERS.containsAll(code)) { 527 rem = code + rem; 528 code = ""; 529 skipSet = EmojiConstants.REM_GROUP_SKIP_SET; 530 // } else { 531 // startPattern = listPattern; 532 } 533 // left over is "⚖","judge: man, dark skin tone" 534 } 535 return getBasePlusRemainder(cldrFile, code, rem, skipSet, startPattern, otherSource); 536 } 537 matchesInitialPattern(String code)538 private boolean matchesInitialPattern(String code) { 539 Annotations baseAnnotation = baseData.get(code); 540 String baseName = baseAnnotation == null ? null : baseAnnotation.getShortName(); 541 return baseName != null && initialRegexPattern.matcher(baseName).matches(); 542 } 543 getBasePlusRemainder( CLDRFile cldrFile, String base, String rem, UnicodeSet ignore, SimpleFormatter pattern, Transform<String, String> otherSource)544 private Annotations getBasePlusRemainder( 545 CLDRFile cldrFile, 546 String base, 547 String rem, 548 UnicodeSet ignore, 549 SimpleFormatter pattern, 550 Transform<String, String> otherSource) { 551 String shortName = null; 552 Set<String> annotations = new LinkedHashSet<>(); 553 boolean needMarker = true; 554 555 if (base != null) { 556 needMarker = false; 557 Annotations stock = baseData.get(base); 558 if (stock != null) { 559 shortName = stock.getShortName(); 560 annotations.addAll(stock.getKeywords()); 561 } else if (otherSource != null) { 562 shortName = otherSource.transform(base); 563 } else { 564 return null; 565 } 566 if (shortName == null) { 567 return null; 568 } 569 } 570 571 boolean hackBlond = EmojiConstants.HAIR_EXPLICIT.contains(base.codePointAt(0)); 572 Collection<String> arguments = new ArrayList<>(); 573 int lastSkin = -1; 574 575 for (int mod : CharSequences.codePoints(rem)) { 576 if (ignore.contains(mod)) { 577 continue; 578 } 579 if (EmojiConstants.MODIFIERS.contains(mod)) { 580 if (lastSkin == mod) { 581 continue; 582 } 583 lastSkin = 584 mod; // collapse skin tones. TODO fix if we ever do multi-skin families 585 } 586 Annotations stock = baseData.get(mod); 587 String modName = null; 588 if (stock != null) { 589 modName = stock.getShortName(); 590 } else if (otherSource != null) { 591 modName = otherSource.transform(base); 592 } 593 if (modName == null) { 594 needMarker = true; 595 if (ENGLISH_DATA != null) { 596 Annotations engName = ENGLISH_DATA.baseData.get(mod); 597 if (engName != null) { 598 modName = engName.getShortName(); 599 } 600 } 601 if (modName == null) { 602 modName = Utility.hex(mod); // ultimate fallback 603 } 604 } 605 if (hackBlond && shortName != null) { 606 // HACK: make the blond names look like the other hair names 607 // Split the short name into pieces, if possible, and insert the modName first 608 String sep = initialPattern.format("", ""); 609 int splitPoint = shortName.indexOf(sep); 610 if (splitPoint >= 0) { 611 String modName0 = shortName.substring(splitPoint + sep.length()); 612 shortName = shortName.substring(0, splitPoint); 613 if (modName != null) { 614 arguments.add(modName); 615 annotations.add(modName); 616 } 617 modName = modName0; 618 } 619 hackBlond = false; 620 } 621 622 if (modName != null) { 623 arguments.add(modName); 624 annotations.add(modName); 625 } 626 } 627 if (!arguments.isEmpty()) { 628 shortName = pattern.format(shortName, listPattern.format(arguments)); 629 } 630 Annotations result = 631 new Annotations(annotations, (needMarker ? ENGLISH_MARKER : "") + shortName); 632 return result; 633 } 634 635 /** 636 * @deprecated Use {@link #toString(String,boolean,AnnotationSet)} instead 637 */ 638 @Deprecated toString(String code, boolean html)639 public String toString(String code, boolean html) { 640 return toString(code, html, null); 641 } 642 toString(String code, boolean html, AnnotationSet parentAnnotations)643 public String toString(String code, boolean html, AnnotationSet parentAnnotations) { 644 if (locale.equals("be") && code.equals("")) { 645 int debug = 0; 646 } 647 String shortName = getShortName(code); 648 if (shortName == null 649 || shortName.startsWith(BAD_MARKER) 650 || shortName.startsWith(ENGLISH_MARKER)) { 651 return MISSING_MARKER; 652 } 653 654 String parentShortName = 655 parentAnnotations == null ? null : parentAnnotations.getShortName(code); 656 if (shortName != null && Objects.equal(shortName, parentShortName)) { 657 shortName = EQUIVALENT; 658 } 659 660 Set<String> keywords = getKeywordsMinus(code); 661 Set<String> parentKeywords = 662 parentAnnotations == null ? null : parentAnnotations.getKeywordsMinus(code); 663 if (keywords != null 664 && !keywords.isEmpty() 665 && Objects.equal(keywords, parentKeywords)) { 666 keywords = Collections.singleton(EQUIVALENT); 667 } 668 669 String result = Joiner.on(" |\u00a0").join(keywords); 670 if (shortName != null) { 671 String ttsString = (html ? "*<b>" : "*") + shortName + (html ? "</b>" : "*"); 672 if (result.isEmpty()) { 673 result = ttsString; 674 } else { 675 result = ttsString + (html ? "<br>|\u00a0" : " |\u00a0") + result; 676 } 677 } 678 return result; 679 } 680 getExplicitValues()681 public UnicodeMap<Annotations> getExplicitValues() { 682 return baseData; 683 } 684 getUnresolvedExplicitValues()685 public UnicodeMap<Annotations> getUnresolvedExplicitValues() { 686 return unresolvedData; 687 } 688 getKeywordsMinus(String code)689 public Set<String> getKeywordsMinus(String code) { 690 String shortName = getShortName(code); 691 Set<String> keywords = getKeywords(code); 692 if (shortName != null && keywords.contains(shortName)) { 693 keywords = new LinkedHashSet<>(keywords); 694 keywords.remove(shortName); 695 } 696 return keywords; 697 } 698 } 699 getDataSet(String locale)700 public static AnnotationSet getDataSet(String locale) { 701 final File theDir = getDirForLocale(locale); 702 return getDataSet(theDir.getAbsolutePath(), locale); 703 } 704 getDirForLocale(String locale)705 private static File getDirForLocale(String locale) { 706 // use the annotations Factory to find the XML file 707 List<File> dirs = ANNOTATIONS_FACTORY.getSourceDirectoriesForLocale(locale); 708 if (dirs == null || dirs.isEmpty()) { 709 throw new IllegalArgumentException( 710 "Cannot find source annotation directory for locale " + locale); 711 } else if (dirs.size() != 1) { 712 throw new IllegalArgumentException( 713 "Did not find exactly one source directory for locale " 714 + locale 715 + " - " 716 + dirs); 717 } 718 final File theDir = dirs.get(0); 719 return theDir; 720 } 721 getDataSet(String dir, String locale)722 public static AnnotationSet getDataSet(String dir, String locale) { 723 Map<String, AnnotationSet> dirCache = cache.get(dir); 724 if (dirCache == null) { 725 cache.put(dir, dirCache = new ConcurrentHashMap<>()); 726 } 727 AnnotationSet result = dirCache.get(locale); 728 if (result != null) { 729 return result; 730 } 731 if (!LOCALES.contains(locale)) { 732 return null; 733 } 734 String parentString = LocaleIDParser.getParent(locale); 735 AnnotationSet parentData = null; 736 if (parentString != null && !parentString.equals("root")) { 737 parentData = getDataSet(dir, parentString); 738 } 739 MyHandler myHandler = new MyHandler(dirCache, locale, parentData); 740 XMLFileReader xfr = new XMLFileReader().setHandler(myHandler); 741 xfr.read(dir + "/" + locale + ".xml", -1, true); 742 return myHandler.cleanup(); 743 } 744 getData(String locale)745 public static UnicodeMap<Annotations> getData(String locale) { 746 final File theDir = getDirForLocale(locale); 747 return getData(theDir.getAbsolutePath(), locale); 748 } 749 getData(String dir, String locale)750 public static UnicodeMap<Annotations> getData(String dir, String locale) { 751 AnnotationSet result = getDataSet(dir, locale); 752 return result == null ? null : result.baseData; 753 } 754 755 @Override toString()756 public String toString() { 757 return toString(false); 758 } 759 toString(boolean html)760 public String toString(boolean html) { 761 Set<String> annotations2 = getKeywords(); 762 if (getShortName() != null && annotations2.contains(getShortName())) { 763 annotations2 = new LinkedHashSet<>(getKeywords()); 764 annotations2.remove(getShortName()); 765 } 766 String result = Joiner.on(" |\u00a0").join(annotations2); 767 if (getShortName() != null) { 768 String ttsString = (html ? "*<b>" : "*") + getShortName() + (html ? "</b>" : "*"); 769 if (result.isEmpty()) { 770 result = ttsString; 771 } else { 772 result = ttsString + (html ? "<br>|\u00a0" : " |\u00a0") + result; 773 } 774 } 775 return result; 776 } 777 778 /** 779 * @return the annotations 780 */ getKeywords()781 public Set<String> getKeywords() { 782 return annotations; 783 } 784 785 /** 786 * @return the tts 787 */ getShortName()788 public String getShortName() { 789 return tts; 790 } 791 main(String[] args)792 public static void main(String[] args) { 793 if (true) { 794 writeList(); 795 } else { 796 writeEnglish(); 797 } 798 } 799 writeList()800 private static void writeList() { 801 AnnotationSet eng = Annotations.getDataSet("en"); 802 Annotations an = eng.baseData.get("❤"); 803 final UnicodeMap<Annotations> map = eng.getUnresolvedExplicitValues(); 804 Set<String> keys = new TreeSet<>(ChartAnnotations.RBC); 805 map.keySet().addAllTo(keys); 806 // keys.add("⚖"); 807 for (String key : keys) { 808 System.out.println( 809 Utility.hex(key, 4, "_").toLowerCase(Locale.ROOT) 810 + "\t" 811 + key 812 + "\t" 813 + map.get(key).getShortName() 814 + "\t" 815 + Joiner.on(" | ").join(map.get(key).getKeywords())); 816 } 817 for (String s : 818 Arrays.asList( 819 "", 820 "❤️", 821 "", 822 "❤️", 823 "", 824 "", 825 "", 826 "", 827 "⚖", 828 "⚖", 829 "⚖", 830 "⚖", 831 "", 832 "♂️", 833 "♂️", 834 "♀️", 835 "♀️", 836 "", 837 "", 838 "♂️", 839 "♂️", 840 "♀️", 841 "♀️")) { 842 final String shortName = eng.getShortName(s); 843 final Set<String> keywords = eng.getKeywords(s); 844 System.out.println( 845 "{\"" 846 + s 847 + "\",\"" 848 + shortName 849 + "\",\"" 850 + Joiner.on("|").join(keywords) 851 + "\"},"); 852 } 853 } 854 writeEnglish()855 private static void writeEnglish() { 856 AnnotationSet eng = Annotations.getDataSet("en"); 857 System.out.println(Annotations.getAvailable()); 858 AnnotationSet eng100 = Annotations.getDataSet("en_001"); 859 UnicodeMap<Annotations> map100 = eng100.getUnresolvedExplicitValues(); 860 final UnicodeMap<Annotations> map = eng.getUnresolvedExplicitValues(); 861 Set<String> keys = new TreeSet<>(ChartAnnotations.RBC); 862 map.keySet().addAllTo(keys); 863 for (String key : keys) { 864 Annotations value = map.get(key); 865 Annotations value100 = map100.get(key); 866 Set<String> keywords100 = (value100 == null ? null : value100.getKeywords()); 867 System.out.println( 868 key 869 + "\tname\t" 870 + "\t" 871 + value.getShortName() 872 + "\t" 873 + (value100 == null ? "" : value100.getShortName()) 874 + "\t" 875 + Joiner.on(" | ").join(value.getKeywords()) 876 + "\t" 877 + (keywords100 == null ? "" : Joiner.on(" | ").join(keywords100))); 878 } 879 } 880 } 881