1 package org.unicode.cldr.util; 2 3 import java.io.File; 4 import java.util.ArrayList; 5 import java.util.Arrays; 6 import java.util.Collection; 7 import java.util.Collections; 8 import java.util.HashSet; 9 import java.util.LinkedHashSet; 10 import java.util.List; 11 import java.util.Locale; 12 import java.util.Map; 13 import java.util.Set; 14 import java.util.TreeSet; 15 import java.util.concurrent.ConcurrentHashMap; 16 import java.util.regex.Pattern; 17 18 import org.unicode.cldr.tool.ChartAnnotations; 19 import org.unicode.cldr.tool.SubdivisionNames; 20 import org.unicode.cldr.util.Factory.SourceTreeType; 21 import org.unicode.cldr.util.XMLFileReader.SimpleHandler; 22 23 import com.google.common.base.Joiner; 24 import com.google.common.base.Objects; 25 import com.google.common.base.Splitter; 26 import com.google.common.collect.ImmutableSet; 27 import com.ibm.icu.dev.util.UnicodeMap; 28 import com.ibm.icu.impl.Utility; 29 import com.ibm.icu.lang.CharSequences; 30 import com.ibm.icu.text.SimpleFormatter; 31 import com.ibm.icu.text.Transform; 32 import com.ibm.icu.text.UTF16; 33 import com.ibm.icu.text.UnicodeSet; 34 import com.ibm.icu.text.UnicodeSet.SpanCondition; 35 import com.ibm.icu.text.UnicodeSetSpanner; 36 37 public class Annotations { 38 private static final boolean DEBUG = false; 39 40 public static final String BAD_MARKER = "⊗"; 41 public static final String MISSING_MARKER = "⊖"; 42 public static final String ENGLISH_MARKER = "⊕"; 43 public static final String EQUIVALENT = "≣"; 44 public static final String NEUTRAL_HOLDING = ""; 45 46 public static final Splitter splitter = Splitter.on(Pattern.compile("[|;]")).trimResults().omitEmptyStrings(); 47 static final Splitter dotSplitter = Splitter.on(".").trimResults(); 48 49 static final Map<String, Map<String, AnnotationSet>> cache = new ConcurrentHashMap<>(); 50 static final Set<String> LOCALES; 51 static final Set<String> ALL_LOCALES; 52 static final Factory ANNOTATIONS_FACTORY; 53 private static final AnnotationSet ENGLISH_DATA; 54 55 private final Set<String> annotations; 56 private final String tts; 57 58 static { 59 ANNOTATIONS_FACTORY = CLDRConfig.getInstance().getAnnotationsFactory(); 60 ALL_LOCALES = ANNOTATIONS_FACTORY.getAvailable(); 61 final Set<String> commonList = new HashSet<String>(); 62 // calculate those in common 63 for(final String loc : ALL_LOCALES) { 64 final File f = getDirForLocale(loc); 65 if (SimpleFactory.getSourceTreeType(f) == SourceTreeType.common) { 66 commonList.add(loc); 67 } 68 } 69 LOCALES = Collections.unmodifiableSet(commonList); 70 ENGLISH_DATA = getDataSet("en"); 71 } 72 73 static class MyHandler extends SimpleHandler { 74 private final String locale; 75 private final UnicodeMap<Annotations> localeData = new UnicodeMap<>(); 76 private final AnnotationSet parentData; 77 private final Map<String, AnnotationSet> dirCache; 78 MyHandler(Map<String, AnnotationSet> dirCache, String locale, AnnotationSet parentData)79 public MyHandler(Map<String, AnnotationSet> dirCache, String locale, AnnotationSet parentData) { 80 this.locale = locale; 81 this.parentData = parentData; 82 this.dirCache = dirCache; 83 } 84 cleanup()85 public AnnotationSet cleanup() { 86 // add parent data (may be overridden) 87 UnicodeMap<Annotations> templocaleData = null; 88 if (parentData != null) { 89 templocaleData = new UnicodeMap<>(); 90 UnicodeSet keys = new UnicodeSet(parentData.baseData.keySet()).addAll(localeData.keySet()); 91 for (String key : keys) { 92 Annotations parentValue = parentData.baseData.get(key); 93 Annotations myValue = localeData.get(key); 94 if (parentValue == null) { 95 templocaleData.put(key, myValue); 96 } else if (myValue == null) { 97 templocaleData.put(key, parentValue); 98 } else { // need to combine 99 String tts = myValue.tts == null 100 ? parentValue.tts : myValue.tts; 101 Set<String> annotations = myValue.annotations == null || myValue.annotations.isEmpty() 102 ? parentValue.annotations : myValue.annotations; 103 templocaleData.put(key, new Annotations(annotations, tts)); 104 } 105 } 106 } 107 108 final AnnotationSet result = new AnnotationSet(locale, localeData, templocaleData); 109 dirCache.put(locale, result); 110 return result; 111 } 112 113 static final Pattern SPACES = Pattern.compile("\\s+"); 114 115 @Override handlePathValue(String path, String value)116 public void handlePathValue(String path, String value) { 117 if (value.contains(CldrUtility.INHERITANCE_MARKER)) { 118 return; // skip all ^^^ 119 } 120 XPathParts parts = XPathParts.getFrozenInstance(path); 121 String lastElement = parts.getElement(-1); 122 if (!lastElement.equals("annotation")) { 123 if (!"identity".equals(parts.getElement(1))) { 124 throw new IllegalArgumentException("Unexpected path"); 125 } 126 return; 127 } 128 String usString = parts.getAttributeValue(-1, "cp"); 129 UnicodeSet us1 = usString.startsWith("[") && usString.endsWith("]") ? new UnicodeSet(usString) : new UnicodeSet().add(usString); 130 UnicodeSet us = new UnicodeSet(); 131 for (String s : us1) { 132 us.add(s.replace(EmojiConstants.EMOJI_VARIANT_STRING, "")); 133 } 134 String tts = parts.getAttributeValue(-1, "tts"); 135 String type = parts.getAttributeValue(-1, "type"); 136 String alt = parts.getAttributeValue(-1, "alt"); 137 138 // clean up value 139 String value2 = SPACES.matcher(value).replaceAll(" ").trim(); 140 if (!value2.equals(value)) { 141 value = value2; 142 } 143 if (alt != null) { 144 // do nothing for now 145 } else if ("tts".equals(type)) { 146 addItems(localeData, us, Collections.<String> emptySet(), value); 147 } else { 148 Set<String> attributes = new TreeSet<>(splitter.splitToList(value)); 149 addItems(localeData, us, attributes, tts); 150 } 151 } 152 addItems(UnicodeMap<Annotations> unicodeMap, UnicodeSet us, Set<String> attributes, String tts)153 private void addItems(UnicodeMap<Annotations> unicodeMap, UnicodeSet us, Set<String> attributes, String tts) { 154 for (String entry : us) { 155 addItems(unicodeMap, entry, attributes, tts); 156 } 157 } 158 addItems(UnicodeMap<Annotations> unicodeMap, String entry, Set<String> attributes, String tts)159 private void addItems(UnicodeMap<Annotations> unicodeMap, String entry, Set<String> attributes, String tts) { 160 Annotations annotations = unicodeMap.get(entry); 161 if (annotations == null) { 162 unicodeMap.put(entry, new Annotations(attributes, tts)); 163 } else { 164 unicodeMap.put(entry, annotations.add(attributes, tts)); // creates new item 165 } 166 } 167 } 168 Annotations(Set<String> attributes, String tts2)169 public Annotations(Set<String> attributes, String tts2) { 170 annotations = attributes == null ? Collections.<String> emptySet() : ImmutableSet.copyOf(attributes); 171 for (String attr : annotations) { 172 if (attr.contains(CldrUtility.INHERITANCE_MARKER)) { 173 throw new IllegalArgumentException(CldrUtility.INHERITANCE_MARKER); 174 } 175 176 } 177 tts = tts2; 178 if (tts != null && tts.contains(CldrUtility.INHERITANCE_MARKER)) { 179 throw new IllegalArgumentException(CldrUtility.INHERITANCE_MARKER); 180 } 181 } 182 add(Set<String> attributes, String tts2)183 public Annotations add(Set<String> attributes, String tts2) { 184 return new Annotations(getKeywords() == null ? attributes : attributes == null ? getKeywords() : union(attributes, getKeywords()), 185 getShortName() == null ? tts2 : tts2 == null ? getShortName() : throwDup()); 186 } 187 throwDup()188 private String throwDup() { 189 throw new IllegalArgumentException("Duplicate tts"); 190 } 191 union(Set<String> a, Set<String> b)192 private Set<String> union(Set<String> a, Set<String> b) { 193 TreeSet<String> result = new TreeSet<>(a); 194 result.addAll(b); 195 return result; 196 } 197 198 /** 199 * @return all common locales 200 */ getAvailable()201 public static Set<String> getAvailable() { 202 return LOCALES; 203 } 204 205 /** 206 * @return all common locales 207 */ getAvailableLocales()208 public static Set<String> getAvailableLocales() { 209 return LOCALES; 210 } 211 212 /** 213 * @return all locales, including seed 214 */ getAllAvailable()215 public static Set<String> getAllAvailable() { 216 return ALL_LOCALES; 217 } 218 219 public static final class AnnotationSet { 220 221 private static final CLDRConfig CONFIG = CLDRConfig.getInstance(); 222 223 static final Factory factory = CONFIG.getCldrFactory(); 224 static final CLDRFile ENGLISH = CONFIG.getEnglish(); 225 static final CLDRFile ENGLISH_ANNOTATIONS = null; 226 static final SubdivisionNames englishSubdivisionIdToName = new SubdivisionNames("en", "main"); 227 //CLDRConfig.getInstance().getAnnotationsFactory().make("en", false); 228 229 private final String locale; 230 private final UnicodeMap<Annotations> baseData; 231 private final UnicodeMap<Annotations> unresolvedData; 232 private final CLDRFile cldrFile; 233 private final SubdivisionNames subdivisionIdToName; 234 private final SimpleFormatter initialPattern; 235 private final Pattern initialRegexPattern; 236 private final XListFormatter listPattern; 237 private final Set<String> flagLabelSet; 238 private final Set<String> keycapLabelSet; 239 private final String keycapLabel; 240 private final String flagLabel; 241 // private final String maleLabel; 242 // private final String femaleLabel; 243 private final Map<String, Annotations> localeCache = new ConcurrentHashMap<>(); 244 245 static UnicodeSetSpanner uss = new UnicodeSetSpanner(EmojiConstants.COMPONENTS); // must be sync'ed 246 AnnotationSet(String locale, UnicodeMap<Annotations> source, UnicodeMap<Annotations> resolvedSource)247 private AnnotationSet(String locale, UnicodeMap<Annotations> source, UnicodeMap<Annotations> resolvedSource) { 248 this.locale = locale; 249 unresolvedData = source.freeze(); 250 this.baseData = resolvedSource == null ? unresolvedData : resolvedSource.freeze(); 251 cldrFile = factory.make(locale, true); 252 subdivisionIdToName = new SubdivisionNames(locale, "main", "subdivisions"); 253 // EmojiSubdivisionNames.getSubdivisionIdToName(locale); 254 listPattern = new XListFormatter(cldrFile, EmojiConstants.COMPOSED_NAME_LIST); 255 final String initialPatternString = getStringValue("//ldml/characterLabels/characterLabelPattern[@type=\"category-list\"]"); 256 initialPattern = SimpleFormatter.compile(initialPatternString); 257 final String regexPattern = ("\\Q" + initialPatternString.replace("{0}", "\\E.*\\Q").replace("{1}", "\\E.*\\Q") + "\\E") 258 .replace("\\Q\\E", ""); // HACK to detect use of prefix pattern 259 initialRegexPattern = Pattern.compile(regexPattern); 260 flagLabelSet = getLabelSet("flag"); 261 flagLabel = flagLabelSet.isEmpty() ? null : flagLabelSet.iterator().next(); 262 keycapLabelSet = getLabelSet("keycap"); 263 keycapLabel = keycapLabelSet.isEmpty() ? null : keycapLabelSet.iterator().next(); 264 // maleLabel = getStringValue("//ldml/characterLabels/characterLabel[@type=\"male\"]"); 265 // femaleLabel = getStringValue("//ldml/characterLabels/characterLabel[@type=\"female\"]"); 266 } 267 268 /** 269 * @deprecated Use {@link #getLabelSet(String)} instead 270 */ 271 @Deprecated getLabelSet()272 private Set<String> getLabelSet() { 273 return getLabelSet("flag"); 274 } 275 getLabelSet(String typeAttributeValue)276 private Set<String> getLabelSet(String typeAttributeValue) { 277 String label = getStringValue("//ldml/characterLabels/characterLabel[@type=\"" + typeAttributeValue + "\"]"); 278 return label == null ? Collections.<String> emptySet() : Collections.singleton(label); 279 } 280 getStringValue(String xpath)281 private String getStringValue(String xpath) { 282 return getStringValue(xpath, cldrFile, ENGLISH); 283 } 284 getStringValue(String xpath, CLDRFile cldrFile2, CLDRFile english)285 private String getStringValue(String xpath, CLDRFile cldrFile2, CLDRFile english) { 286 String result = cldrFile2.getStringValueWithBailey(xpath); 287 if (result == null) { 288 return ENGLISH_MARKER + english.getStringValueWithBailey(xpath); 289 } 290 String sourceLocale = cldrFile2.getSourceLocaleID(xpath, null); 291 if (sourceLocale.equals(XMLSource.CODE_FALLBACK_ID) || sourceLocale.equals(XMLSource.ROOT_ID)) { 292 if (!xpath.equals("//ldml/characterLabels/characterLabelPattern[@type=\"category-list\"]")) { 293 return MISSING_MARKER + result; 294 } 295 } 296 return result; 297 } 298 getShortName(String code)299 public String getShortName(String code) { 300 return getShortName(code, null); 301 } 302 getShortName(String code, Transform<String, String> otherSource)303 public String getShortName(String code, Transform<String, String> otherSource) { 304 if (code.equals("♀️")) { 305 int debug = 0; 306 } 307 308 code = code.replace(EmojiConstants.EMOJI_VARIANT_STRING, ""); 309 Annotations stock = baseData.get(code); 310 if (stock != null && stock.tts != null) { 311 return stock.tts; 312 } 313 stock = localeCache.get(code); 314 if (stock != null) { 315 return stock.tts; 316 } 317 stock = synthesize(code, otherSource); 318 if (stock != null) { 319 localeCache.put(code, stock); 320 return stock.tts; 321 } 322 return null; 323 } 324 getKeywords(String code)325 public Set<String> getKeywords(String code) { 326 code = code.replace(EmojiConstants.EMOJI_VARIANT_STRING, ""); 327 Annotations stock = baseData.get(code); 328 if (stock != null && stock.annotations != null) { 329 return stock.annotations; 330 } 331 stock = localeCache.get(code); 332 if (stock != null) { 333 return stock.annotations; 334 } 335 stock = synthesize(code, null); 336 if (stock != null) { 337 localeCache.put(code, stock); 338 return stock.annotations; 339 } 340 return Collections.<String> emptySet(); 341 } 342 343 /** Returns the set of all keys for which annotations are available. WARNING: keys have the Emoji Presentation Selector removed! 344 */ keySet()345 public UnicodeSet keySet() { 346 return baseData.keySet(); 347 } 348 synthesize(String code, Transform<String, String> otherSource)349 private Annotations synthesize(String code, Transform<String, String> otherSource) { 350 if (code.equals("♂")) { 351 int debug = 0; 352 } 353 String shortName = null; 354 int len = code.codePointCount(0, code.length()); 355 boolean isKeycap10 = code.equals(""); 356 if (len == 1 && !isKeycap10) { 357 String tempName = null; 358 if (locale.equals("en")) { 359 if (otherSource != null) { 360 tempName = otherSource.transform(code); 361 } 362 if (tempName == null) { 363 return null; 364 } 365 return new Annotations(Collections.<String> emptySet(), tempName); 366 } else { // fall back to English if possible, but mark it. 367 tempName = getDataSet("en").getShortName(code); 368 if (tempName == null) { 369 return null; 370 } 371 return new Annotations(Collections.<String> emptySet(), ENGLISH_MARKER + tempName); 372 } 373 } else if (EmojiConstants.REGIONAL_INDICATORS.containsAll(code)) { 374 String countryCode = EmojiConstants.getFlagCode(code); 375 String path = CLDRFile.getKey(CLDRFile.TERRITORY_NAME, countryCode); 376 String regionName = getStringValue(path); 377 if (regionName == null) { 378 regionName = ENGLISH_MARKER + ENGLISH.getStringValueWithBailey(path); 379 } 380 String flagName = flagLabel == null ? regionName : initialPattern.format(flagLabel, regionName); 381 return new Annotations(flagLabelSet, flagName); 382 } else if (code.startsWith(EmojiConstants.BLACK_FLAG) 383 && code.endsWith(EmojiConstants.TAG_TERM)) { 384 String subdivisionCode = EmojiConstants.getTagSpec(code); 385 String subdivisionName = subdivisionIdToName.get(subdivisionCode); 386 if (subdivisionName == null) { 387 // subdivisionName = englishSubdivisionIdToName.get(subdivisionCode); 388 // if (subdivisionName != null) { 389 // subdivisionName = ENGLISH_MARKER + subdivisionCode; 390 // } else { 391 subdivisionName = MISSING_MARKER + subdivisionCode; 392 // } 393 } 394 String flagName = flagLabel == null ? subdivisionName : initialPattern.format(flagLabel, subdivisionName); 395 return new Annotations(flagLabelSet, flagName); 396 } else if (isKeycap10 || code.contains(EmojiConstants.KEYCAP_MARK_STRING)) { 397 final String rem = code.equals("") ? "10" : UTF16.valueOf(code.charAt(0)); 398 shortName = initialPattern.format(keycapLabel, rem); 399 return new Annotations(keycapLabelSet, shortName); 400 } 401 UnicodeSet skipSet = EmojiConstants.REM_SKIP_SET; 402 String rem = ""; 403 SimpleFormatter startPattern = initialPattern; 404 if (EmojiConstants.COMPONENTS.containsSome(code)) { 405 synchronized (uss) { 406 rem = uss.deleteFrom(code, SpanCondition.NOT_CONTAINED); 407 code = uss.deleteFrom(code, SpanCondition.CONTAINED); 408 } 409 } 410 if (code.contains(EmojiConstants.JOINER_STRING)) { 411 // if (code.endsWith(EmojiConstants.JOINER_MALE_SIGN)){ 412 // if (matchesInitialPattern(code)) { // "♂️","police officer: man, medium-light skin tone" 413 // rem = EmojiConstants.MAN + rem; 414 // code = code.substring(0,code.length()-EmojiConstants.JOINER_MALE_SIGN.length()); 415 // } // otherwise "♂️","man biking: dark skin tone" 416 // } else if (code.endsWith(EmojiConstants.JOINER_FEMALE_SIGN)){ 417 // if (matchesInitialPattern(code)) { // 418 // rem = EmojiConstants.WOMAN + rem; 419 // code = code.substring(0,code.length()-EmojiConstants.JOINER_FEMALE_SIGN.length()); 420 // } 421 // } else 422 if (code.contains(EmojiConstants.KISS)) { 423 rem = code + rem; 424 code = ""; 425 skipSet = EmojiConstants.REM_GROUP_SKIP_SET; 426 } else if (code.contains(EmojiConstants.HEART) && !code.startsWith(EmojiConstants.HEART)) { 427 rem = code + rem; 428 code = ""; 429 skipSet = EmojiConstants.REM_GROUP_SKIP_SET; 430 } else if (code.equals(EmojiConstants.COMPOSED_HANDSHAKE)) { 431 code = EmojiConstants.HANDSHAKE; 432 } else if (code.contains(EmojiConstants.HANDSHAKE)) { 433 code = code.startsWith(EmojiConstants.MAN) ? "" 434 : code.endsWith(EmojiConstants.MAN) ? "" 435 : code.startsWith(EmojiConstants.WOMAN) ? "" 436 : NEUTRAL_HOLDING; 437 skipSet = EmojiConstants.REM_GROUP_SKIP_SET; 438 } else if (EmojiConstants.FAMILY_MARKERS.containsAll(code)) { 439 rem = code + rem; 440 code = ""; 441 skipSet = EmojiConstants.REM_GROUP_SKIP_SET; 442 // } else { 443 // startPattern = listPattern; 444 } 445 // left over is "⚖","judge: man, dark skin tone" 446 } 447 return getBasePlusRemainder(cldrFile, code, rem, skipSet, startPattern, otherSource); 448 } 449 matchesInitialPattern(String code)450 private boolean matchesInitialPattern(String code) { 451 Annotations baseAnnotation = baseData.get(code); 452 String baseName = baseAnnotation == null ? null : baseAnnotation.getShortName(); 453 return baseName != null && initialRegexPattern.matcher(baseName).matches(); 454 } 455 getBasePlusRemainder(CLDRFile cldrFile, String base, String rem, UnicodeSet ignore, SimpleFormatter pattern, Transform<String, String> otherSource)456 private Annotations getBasePlusRemainder(CLDRFile cldrFile, String base, String rem, UnicodeSet ignore, SimpleFormatter pattern, 457 Transform<String, String> otherSource) { 458 String shortName = null; 459 Set<String> annotations = new LinkedHashSet<>(); 460 boolean needMarker = true; 461 462 if (base != null) { 463 needMarker = false; 464 Annotations stock = baseData.get(base); 465 if (stock != null) { 466 shortName = stock.getShortName(); 467 annotations.addAll(stock.getKeywords()); 468 } else if (otherSource != null) { 469 shortName = otherSource.transform(base); 470 } else { 471 return null; 472 } 473 if (shortName == null) { 474 return null; 475 } 476 } 477 478 boolean hackBlond = EmojiConstants.HAIR_EXPLICIT.contains(base.codePointAt(0)); 479 Collection<String> arguments = new ArrayList<>(); 480 int lastSkin = -1; 481 482 for (int mod : CharSequences.codePoints(rem)) { 483 if (ignore.contains(mod)) { 484 continue; 485 } 486 if (EmojiConstants.MODIFIERS.contains(mod)) { 487 if (lastSkin == mod) { 488 continue; 489 } 490 lastSkin = mod; // collapse skin tones. TODO fix if we ever do multi-skin families 491 } 492 Annotations stock = baseData.get(mod); 493 String modName = null; 494 if (stock != null) { 495 modName = stock.getShortName(); 496 } else if (otherSource != null) { 497 modName = otherSource.transform(base); 498 } 499 if (modName == null) { 500 needMarker = true; 501 if (ENGLISH_DATA != null) { 502 Annotations engName = ENGLISH_DATA.baseData.get(mod); 503 if (engName != null) { 504 modName = engName.getShortName(); 505 } 506 } 507 if (modName == null) { 508 modName = Utility.hex(mod); // ultimate fallback 509 } 510 } 511 if (hackBlond && shortName != null) { 512 // HACK: make the blond names look like the other hair names 513 // Split the short name into pieces, if possible, and insert the modName first 514 String sep = initialPattern.format("", ""); 515 int splitPoint = shortName.indexOf(sep); 516 if (splitPoint >= 0) { 517 String modName0 = shortName.substring(splitPoint+sep.length()); 518 shortName = shortName.substring(0, splitPoint); 519 if (modName != null) { 520 arguments.add(modName); 521 annotations.add(modName); 522 } 523 modName = modName0; 524 } 525 hackBlond = false; 526 } 527 528 if (modName != null) { 529 arguments.add(modName); 530 annotations.add(modName); 531 } 532 } 533 if (!arguments.isEmpty()) { 534 shortName = pattern.format(shortName, listPattern.format(arguments)); 535 } 536 Annotations result = new Annotations(annotations, (needMarker ? ENGLISH_MARKER : "") + shortName); 537 return result; 538 } 539 540 /** 541 * @deprecated Use {@link #toString(String,boolean,AnnotationSet)} instead 542 */ 543 @Deprecated toString(String code, boolean html)544 public String toString(String code, boolean html) { 545 return toString(code, html, null); 546 } 547 toString(String code, boolean html, AnnotationSet parentAnnotations)548 public String toString(String code, boolean html, AnnotationSet parentAnnotations) { 549 if (locale.equals("be") && code.equals("")) { 550 int debug = 0; 551 } 552 String shortName = getShortName(code); 553 if (shortName == null || shortName.startsWith(BAD_MARKER) || shortName.startsWith(ENGLISH_MARKER)) { 554 return MISSING_MARKER; 555 } 556 557 String parentShortName = parentAnnotations == null ? null : parentAnnotations.getShortName(code); 558 if (shortName != null && Objects.equal(shortName, parentShortName)) { 559 shortName = EQUIVALENT; 560 } 561 562 Set<String> keywords = getKeywordsMinus(code); 563 Set<String> parentKeywords = parentAnnotations == null ? null : parentAnnotations.getKeywordsMinus(code); 564 if (keywords != null && !keywords.isEmpty() && Objects.equal(keywords, parentKeywords)) { 565 keywords = Collections.singleton(EQUIVALENT); 566 } 567 568 String result = Joiner.on(" |\u00a0").join(keywords); 569 if (shortName != null) { 570 String ttsString = (html ? "*<b>" : "*") + shortName + (html ? "</b>" : "*"); 571 if (result.isEmpty()) { 572 result = ttsString; 573 } else { 574 result = ttsString + (html ? "<br>|\u00a0" : " |\u00a0") + result; 575 } 576 } 577 return result; 578 } 579 getExplicitValues()580 public UnicodeMap<Annotations> getExplicitValues() { 581 return baseData; 582 } 583 getUnresolvedExplicitValues()584 public UnicodeMap<Annotations> getUnresolvedExplicitValues() { 585 return unresolvedData; 586 } 587 getKeywordsMinus(String code)588 public Set<String> getKeywordsMinus(String code) { 589 String shortName = getShortName(code); 590 Set<String> keywords = getKeywords(code); 591 if (shortName != null && keywords.contains(shortName)) { 592 keywords = new LinkedHashSet<>(keywords); 593 keywords.remove(shortName); 594 } 595 return keywords; 596 } 597 } 598 getDataSet(String locale)599 public static AnnotationSet getDataSet(String locale) { 600 final File theDir = getDirForLocale(locale); 601 return getDataSet(theDir.getAbsolutePath(), locale); 602 } 603 getDirForLocale(String locale)604 private static File getDirForLocale(String locale) { 605 // use the annotations Factory to find the XML file 606 List<File> dirs = ANNOTATIONS_FACTORY.getSourceDirectoriesForLocale(locale); 607 if (dirs == null || dirs.isEmpty()) { 608 throw new IllegalArgumentException("Cannot find source annotation directory for locale " + locale); 609 } else if (dirs.size() != 1) { 610 throw new IllegalArgumentException( 611 "Did not find exactly one source directory for locale " + locale + " - " + dirs); 612 } 613 final File theDir = dirs.get(0); 614 return theDir; 615 } 616 getDataSet(String dir, String locale)617 public static AnnotationSet getDataSet(String dir, String locale) { 618 Map<String, AnnotationSet> dirCache = cache.get(dir); 619 if (dirCache == null) { 620 cache.put(dir, dirCache = new ConcurrentHashMap<>()); 621 } 622 AnnotationSet result = dirCache.get(locale); 623 if (result != null) { 624 return result; 625 } 626 if (!LOCALES.contains(locale)) { 627 return null; 628 } 629 String parentString = LocaleIDParser.getParent(locale); 630 AnnotationSet parentData = null; 631 if (parentString != null && !parentString.equals("root")) { 632 parentData = getDataSet(dir, parentString); 633 } 634 MyHandler myHandler = new MyHandler(dirCache, locale, parentData); 635 XMLFileReader xfr = new XMLFileReader().setHandler(myHandler); 636 xfr.read(dir + "/" + locale + ".xml", -1, true); 637 return myHandler.cleanup(); 638 } 639 getData(String locale)640 public static UnicodeMap<Annotations> getData(String locale) { 641 final File theDir = getDirForLocale(locale); 642 return getData(theDir.getAbsolutePath(), locale); 643 } 644 getData(String dir, String locale)645 public static UnicodeMap<Annotations> getData(String dir, String locale) { 646 AnnotationSet result = getDataSet(dir, locale); 647 return result == null ? null : result.baseData; 648 } 649 650 @Override toString()651 public String toString() { 652 return toString(false); 653 } 654 toString(boolean html)655 public String toString(boolean html) { 656 Set<String> annotations2 = getKeywords(); 657 if (getShortName() != null && annotations2.contains(getShortName())) { 658 annotations2 = new LinkedHashSet<>(getKeywords()); 659 annotations2.remove(getShortName()); 660 } 661 String result = Joiner.on(" |\u00a0").join(annotations2); 662 if (getShortName() != null) { 663 String ttsString = (html ? "*<b>" : "*") + getShortName() + (html ? "</b>" : "*"); 664 if (result.isEmpty()) { 665 result = ttsString; 666 } else { 667 result = ttsString + (html ? "<br>|\u00a0" : " |\u00a0") + result; 668 } 669 } 670 return result; 671 } 672 673 /** 674 * @return the annotations 675 */ getKeywords()676 public Set<String> getKeywords() { 677 return annotations; 678 } 679 680 /** 681 * @return the tts 682 */ getShortName()683 public String getShortName() { 684 return tts; 685 } 686 main(String[] args)687 public static void main(String[] args) { 688 if (true) { 689 writeList(); 690 } else { 691 writeEnglish(); 692 } 693 } 694 writeList()695 private static void writeList() { 696 AnnotationSet eng = Annotations.getDataSet("en"); 697 Annotations an = eng.baseData.get("❤"); 698 final UnicodeMap<Annotations> map = eng.getUnresolvedExplicitValues(); 699 Set<String> keys = new TreeSet<>(ChartAnnotations.RBC); 700 map.keySet().addAllTo(keys); 701 // keys.add("⚖"); 702 for (String key : keys) { 703 System.out.println(Utility.hex(key, 4, "_").toLowerCase(Locale.ROOT) 704 + "\t" + key 705 + "\t" + map.get(key).getShortName() 706 + "\t" + Joiner.on(" | ").join(map.get(key).getKeywords())); 707 } 708 for (String s : Arrays.asList( 709 "", "❤️", 710 "", "❤️", 711 "", "", 712 "", "", 713 "⚖", "⚖", "⚖", "⚖", 714 "", "♂️", "♂️", "♀️", "♀️", 715 "", "", "♂️", "♂️", "♀️", "♀️")) { 716 final String shortName = eng.getShortName(s); 717 final Set<String> keywords = eng.getKeywords(s); 718 System.out.println("{\"" + s + "\",\"" + shortName + "\",\"" + Joiner.on("|") 719 .join(keywords) + "\"},"); 720 } 721 } 722 writeEnglish()723 private static void writeEnglish() { 724 AnnotationSet eng = Annotations.getDataSet("en"); 725 System.out.println(Annotations.getAvailable()); 726 AnnotationSet eng100 = Annotations.getDataSet("en_001"); 727 UnicodeMap<Annotations> map100 = eng100.getUnresolvedExplicitValues(); 728 final UnicodeMap<Annotations> map = eng.getUnresolvedExplicitValues(); 729 Set<String> keys = new TreeSet<>(ChartAnnotations.RBC); 730 map.keySet().addAllTo(keys); 731 for (String key : keys) { 732 Annotations value = map.get(key); 733 Annotations value100 = map100.get(key); 734 Set<String> keywords100 = (value100 == null ? null : value100.getKeywords()); 735 System.out.println(key + "\tname\t" 736 + "\t" + value.getShortName() 737 + "\t" + (value100 == null ? "" : value100.getShortName()) 738 + "\t" + Joiner.on(" | ").join(value.getKeywords()) 739 + "\t" + (keywords100 == null ? "" : Joiner.on(" | ").join(keywords100))); 740 } 741 } 742 } 743