• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.util;
2 
3 import java.io.File;
4 import java.util.ArrayList;
5 import java.util.Arrays;
6 import java.util.Collection;
7 import java.util.Collections;
8 import java.util.HashSet;
9 import java.util.LinkedHashSet;
10 import java.util.List;
11 import java.util.Locale;
12 import java.util.Map;
13 import java.util.Set;
14 import java.util.TreeSet;
15 import java.util.concurrent.ConcurrentHashMap;
16 import java.util.regex.Pattern;
17 
18 import org.unicode.cldr.tool.ChartAnnotations;
19 import org.unicode.cldr.tool.SubdivisionNames;
20 import org.unicode.cldr.util.Factory.SourceTreeType;
21 import org.unicode.cldr.util.XMLFileReader.SimpleHandler;
22 
23 import com.google.common.base.Joiner;
24 import com.google.common.base.Objects;
25 import com.google.common.base.Splitter;
26 import com.google.common.collect.ImmutableSet;
27 import com.ibm.icu.dev.util.UnicodeMap;
28 import com.ibm.icu.impl.Utility;
29 import com.ibm.icu.lang.CharSequences;
30 import com.ibm.icu.text.SimpleFormatter;
31 import com.ibm.icu.text.Transform;
32 import com.ibm.icu.text.UTF16;
33 import com.ibm.icu.text.UnicodeSet;
34 import com.ibm.icu.text.UnicodeSet.SpanCondition;
35 import com.ibm.icu.text.UnicodeSetSpanner;
36 
37 public class Annotations {
38     private static final boolean DEBUG = false;
39 
40     public static final String BAD_MARKER = "⊗";
41     public static final String MISSING_MARKER = "⊖";
42     public static final String ENGLISH_MARKER = "⊕";
43     public static final String EQUIVALENT = "≣";
44     public static final String NEUTRAL_HOLDING = "��‍��‍��";
45 
46     public static final Splitter splitter = Splitter.on(Pattern.compile("[|;]")).trimResults().omitEmptyStrings();
47     static final Splitter dotSplitter = Splitter.on(".").trimResults();
48 
49     static final Map<String, Map<String, AnnotationSet>> cache = new ConcurrentHashMap<>();
50     static final Set<String> LOCALES;
51     static final Set<String> ALL_LOCALES;
52     static final Factory ANNOTATIONS_FACTORY;
53     private static final AnnotationSet ENGLISH_DATA;
54 
55     private final Set<String> annotations;
56     private final String tts;
57 
58     static {
59         ANNOTATIONS_FACTORY = CLDRConfig.getInstance().getAnnotationsFactory();
60         ALL_LOCALES = ANNOTATIONS_FACTORY.getAvailable();
61         final Set<String> commonList = new HashSet<String>();
62         // calculate those in common
63         for(final String loc : ALL_LOCALES) {
64             final File f = getDirForLocale(loc);
65             if (SimpleFactory.getSourceTreeType(f) == SourceTreeType.common) {
66                 commonList.add(loc);
67             }
68         }
69         LOCALES = Collections.unmodifiableSet(commonList);
70         ENGLISH_DATA = getDataSet("en");
71     }
72 
73     static class MyHandler extends SimpleHandler {
74         private final String locale;
75         private final UnicodeMap<Annotations> localeData = new UnicodeMap<>();
76         private final AnnotationSet parentData;
77         private final Map<String, AnnotationSet> dirCache;
78 
MyHandler(Map<String, AnnotationSet> dirCache, String locale, AnnotationSet parentData)79         public MyHandler(Map<String, AnnotationSet> dirCache, String locale, AnnotationSet parentData) {
80             this.locale = locale;
81             this.parentData = parentData;
82             this.dirCache = dirCache;
83         }
84 
cleanup()85         public AnnotationSet cleanup() {
86             // add parent data (may be overridden)
87             UnicodeMap<Annotations> templocaleData = null;
88             if (parentData != null) {
89                 templocaleData = new UnicodeMap<>();
90                 UnicodeSet keys = new UnicodeSet(parentData.baseData.keySet()).addAll(localeData.keySet());
91                 for (String key : keys) {
92                     Annotations parentValue = parentData.baseData.get(key);
93                     Annotations myValue = localeData.get(key);
94                     if (parentValue == null) {
95                         templocaleData.put(key, myValue);
96                     } else if (myValue == null) {
97                         templocaleData.put(key, parentValue);
98                     } else { // need to combine
99                         String tts = myValue.tts == null
100                             ? parentValue.tts : myValue.tts;
101                         Set<String> annotations = myValue.annotations == null || myValue.annotations.isEmpty()
102                             ? parentValue.annotations : myValue.annotations;
103                         templocaleData.put(key, new Annotations(annotations, tts));
104                     }
105                 }
106             }
107 
108             final AnnotationSet result = new AnnotationSet(locale, localeData, templocaleData);
109             dirCache.put(locale, result);
110             return result;
111         }
112 
113         static final Pattern SPACES = Pattern.compile("\\s+");
114 
115         @Override
handlePathValue(String path, String value)116         public void handlePathValue(String path, String value) {
117             if (value.contains(CldrUtility.INHERITANCE_MARKER)) {
118                 return; // skip all ^^^
119             }
120             XPathParts parts = XPathParts.getFrozenInstance(path);
121             String lastElement = parts.getElement(-1);
122             if (!lastElement.equals("annotation")) {
123                 if (!"identity".equals(parts.getElement(1))) {
124                     throw new IllegalArgumentException("Unexpected path");
125                 }
126                 return;
127             }
128             String usString = parts.getAttributeValue(-1, "cp");
129             UnicodeSet us1 = usString.startsWith("[") && usString.endsWith("]") ? new UnicodeSet(usString) : new UnicodeSet().add(usString);
130             UnicodeSet us = new UnicodeSet();
131             for (String s : us1) {
132                 us.add(s.replace(EmojiConstants.EMOJI_VARIANT_STRING, ""));
133             }
134             String tts = parts.getAttributeValue(-1, "tts");
135             String type = parts.getAttributeValue(-1, "type");
136             String alt = parts.getAttributeValue(-1, "alt");
137 
138             // clean up value
139             String value2 = SPACES.matcher(value).replaceAll(" ").trim();
140             if (!value2.equals(value)) {
141                 value = value2;
142             }
143             if (alt != null) {
144                 // do nothing for now
145             } else if ("tts".equals(type)) {
146                 addItems(localeData, us, Collections.<String> emptySet(), value);
147             } else {
148                 Set<String> attributes = new TreeSet<>(splitter.splitToList(value));
149                 addItems(localeData, us, attributes, tts);
150             }
151         }
152 
addItems(UnicodeMap<Annotations> unicodeMap, UnicodeSet us, Set<String> attributes, String tts)153         private void addItems(UnicodeMap<Annotations> unicodeMap, UnicodeSet us, Set<String> attributes, String tts) {
154             for (String entry : us) {
155                 addItems(unicodeMap, entry, attributes, tts);
156             }
157         }
158 
addItems(UnicodeMap<Annotations> unicodeMap, String entry, Set<String> attributes, String tts)159         private void addItems(UnicodeMap<Annotations> unicodeMap, String entry, Set<String> attributes, String tts) {
160             Annotations annotations = unicodeMap.get(entry);
161             if (annotations == null) {
162                 unicodeMap.put(entry, new Annotations(attributes, tts));
163             } else {
164                 unicodeMap.put(entry, annotations.add(attributes, tts)); // creates new item
165             }
166         }
167     }
168 
Annotations(Set<String> attributes, String tts2)169     public Annotations(Set<String> attributes, String tts2) {
170         annotations = attributes == null ? Collections.<String> emptySet() : ImmutableSet.copyOf(attributes);
171         for (String attr : annotations) {
172             if (attr.contains(CldrUtility.INHERITANCE_MARKER)) {
173                 throw new IllegalArgumentException(CldrUtility.INHERITANCE_MARKER);
174             }
175 
176         }
177         tts = tts2;
178         if (tts != null && tts.contains(CldrUtility.INHERITANCE_MARKER)) {
179             throw new IllegalArgumentException(CldrUtility.INHERITANCE_MARKER);
180         }
181     }
182 
add(Set<String> attributes, String tts2)183     public Annotations add(Set<String> attributes, String tts2) {
184         return new Annotations(getKeywords() == null ? attributes : attributes == null ? getKeywords() : union(attributes, getKeywords()),
185             getShortName() == null ? tts2 : tts2 == null ? getShortName() : throwDup());
186     }
187 
throwDup()188     private String throwDup() {
189         throw new IllegalArgumentException("Duplicate tts");
190     }
191 
union(Set<String> a, Set<String> b)192     private Set<String> union(Set<String> a, Set<String> b) {
193         TreeSet<String> result = new TreeSet<>(a);
194         result.addAll(b);
195         return result;
196     }
197 
198     /**
199      * @return all common locales
200      */
getAvailable()201     public static Set<String> getAvailable() {
202         return LOCALES;
203     }
204 
205     /**
206      * @return all common locales
207      */
getAvailableLocales()208     public static Set<String> getAvailableLocales() {
209         return LOCALES;
210     }
211 
212     /**
213      * @return all locales, including seed
214      */
getAllAvailable()215     public static Set<String> getAllAvailable() {
216         return ALL_LOCALES;
217     }
218 
219     public static final class AnnotationSet {
220 
221         private static final CLDRConfig CONFIG = CLDRConfig.getInstance();
222 
223         static final Factory factory = CONFIG.getCldrFactory();
224         static final CLDRFile ENGLISH = CONFIG.getEnglish();
225         static final CLDRFile ENGLISH_ANNOTATIONS = null;
226         static final SubdivisionNames englishSubdivisionIdToName = new SubdivisionNames("en", "main");
227         //CLDRConfig.getInstance().getAnnotationsFactory().make("en", false);
228 
229         private final String locale;
230         private final UnicodeMap<Annotations> baseData;
231         private final UnicodeMap<Annotations> unresolvedData;
232         private final CLDRFile cldrFile;
233         private final SubdivisionNames subdivisionIdToName;
234         private final SimpleFormatter initialPattern;
235         private final Pattern initialRegexPattern;
236         private final XListFormatter listPattern;
237         private final Set<String> flagLabelSet;
238         private final Set<String> keycapLabelSet;
239         private final String keycapLabel;
240         private final String flagLabel;
241 //        private final String maleLabel;
242 //        private final String femaleLabel;
243         private final Map<String, Annotations> localeCache = new ConcurrentHashMap<>();
244 
245         static UnicodeSetSpanner uss = new UnicodeSetSpanner(EmojiConstants.COMPONENTS); // must be sync'ed
246 
AnnotationSet(String locale, UnicodeMap<Annotations> source, UnicodeMap<Annotations> resolvedSource)247         private AnnotationSet(String locale, UnicodeMap<Annotations> source, UnicodeMap<Annotations> resolvedSource) {
248             this.locale = locale;
249             unresolvedData = source.freeze();
250             this.baseData = resolvedSource == null ? unresolvedData : resolvedSource.freeze();
251             cldrFile = factory.make(locale, true);
252             subdivisionIdToName = new SubdivisionNames(locale, "main", "subdivisions");
253 // EmojiSubdivisionNames.getSubdivisionIdToName(locale);
254             listPattern = new XListFormatter(cldrFile, EmojiConstants.COMPOSED_NAME_LIST);
255             final String initialPatternString = getStringValue("//ldml/characterLabels/characterLabelPattern[@type=\"category-list\"]");
256             initialPattern = SimpleFormatter.compile(initialPatternString);
257             final String regexPattern = ("\\Q" + initialPatternString.replace("{0}", "\\E.*\\Q").replace("{1}", "\\E.*\\Q") + "\\E")
258                 .replace("\\Q\\E", ""); // HACK to detect use of prefix pattern
259             initialRegexPattern = Pattern.compile(regexPattern);
260             flagLabelSet = getLabelSet("flag");
261             flagLabel = flagLabelSet.isEmpty() ? null : flagLabelSet.iterator().next();
262             keycapLabelSet = getLabelSet("keycap");
263             keycapLabel = keycapLabelSet.isEmpty() ? null : keycapLabelSet.iterator().next();
264 //            maleLabel = getStringValue("//ldml/characterLabels/characterLabel[@type=\"male\"]");
265 //            femaleLabel = getStringValue("//ldml/characterLabels/characterLabel[@type=\"female\"]");
266         }
267 
268         /**
269          * @deprecated Use {@link #getLabelSet(String)} instead
270          */
271         @Deprecated
getLabelSet()272         private Set<String> getLabelSet() {
273             return getLabelSet("flag");
274         }
275 
getLabelSet(String typeAttributeValue)276         private Set<String> getLabelSet(String typeAttributeValue) {
277             String label = getStringValue("//ldml/characterLabels/characterLabel[@type=\"" + typeAttributeValue + "\"]");
278             return label == null ? Collections.<String> emptySet() : Collections.singleton(label);
279         }
280 
getStringValue(String xpath)281         private String getStringValue(String xpath) {
282             return getStringValue(xpath, cldrFile, ENGLISH);
283         }
284 
getStringValue(String xpath, CLDRFile cldrFile2, CLDRFile english)285         private String getStringValue(String xpath, CLDRFile cldrFile2, CLDRFile english) {
286             String result = cldrFile2.getStringValueWithBailey(xpath);
287             if (result == null) {
288                 return ENGLISH_MARKER + english.getStringValueWithBailey(xpath);
289             }
290             String sourceLocale = cldrFile2.getSourceLocaleID(xpath, null);
291             if (sourceLocale.equals(XMLSource.CODE_FALLBACK_ID) || sourceLocale.equals(XMLSource.ROOT_ID)) {
292                 if (!xpath.equals("//ldml/characterLabels/characterLabelPattern[@type=\"category-list\"]")) {
293                     return MISSING_MARKER + result;
294                 }
295             }
296             return result;
297         }
298 
getShortName(String code)299         public String getShortName(String code) {
300             return getShortName(code, null);
301         }
302 
getShortName(String code, Transform<String, String> otherSource)303         public String getShortName(String code, Transform<String, String> otherSource) {
304             if (code.equals("��‍♀️")) {
305                 int debug = 0;
306             }
307 
308             code = code.replace(EmojiConstants.EMOJI_VARIANT_STRING, "");
309             Annotations stock = baseData.get(code);
310             if (stock != null && stock.tts != null) {
311                 return stock.tts;
312             }
313             stock = localeCache.get(code);
314             if (stock != null) {
315                 return stock.tts;
316             }
317             stock = synthesize(code, otherSource);
318             if (stock != null) {
319                 localeCache.put(code, stock);
320                 return stock.tts;
321             }
322             return null;
323         }
324 
getKeywords(String code)325         public Set<String> getKeywords(String code) {
326             code = code.replace(EmojiConstants.EMOJI_VARIANT_STRING, "");
327             Annotations stock = baseData.get(code);
328             if (stock != null && stock.annotations != null) {
329                 return stock.annotations;
330             }
331             stock = localeCache.get(code);
332             if (stock != null) {
333                 return stock.annotations;
334             }
335             stock = synthesize(code, null);
336             if (stock != null) {
337                 localeCache.put(code, stock);
338                 return stock.annotations;
339             }
340             return Collections.<String> emptySet();
341         }
342 
343         /** Returns the set of all keys for which annotations are available. WARNING: keys have the Emoji Presentation Selector removed!
344          */
keySet()345         public UnicodeSet keySet() {
346             return baseData.keySet();
347         }
348 
synthesize(String code, Transform<String, String> otherSource)349         private Annotations synthesize(String code, Transform<String, String> otherSource) {
350             if (code.equals("����‍♂")) {
351                 int debug = 0;
352             }
353             String shortName = null;
354             int len = code.codePointCount(0, code.length());
355             boolean isKeycap10 = code.equals("��");
356             if (len == 1 && !isKeycap10) {
357                 String tempName = null;
358                 if (locale.equals("en")) {
359                     if (otherSource != null) {
360                         tempName = otherSource.transform(code);
361                     }
362                     if (tempName == null) {
363                         return null;
364                     }
365                     return new Annotations(Collections.<String> emptySet(), tempName);
366                 } else { // fall back to English if possible, but mark it.
367                     tempName = getDataSet("en").getShortName(code);
368                     if (tempName == null) {
369                         return null;
370                     }
371                     return new Annotations(Collections.<String> emptySet(), ENGLISH_MARKER + tempName);
372                 }
373             } else if (EmojiConstants.REGIONAL_INDICATORS.containsAll(code)) {
374                 String countryCode = EmojiConstants.getFlagCode(code);
375                 String path = CLDRFile.getKey(CLDRFile.TERRITORY_NAME, countryCode);
376                 String regionName = getStringValue(path);
377                 if (regionName == null) {
378                     regionName = ENGLISH_MARKER + ENGLISH.getStringValueWithBailey(path);
379                 }
380                 String flagName = flagLabel == null ? regionName : initialPattern.format(flagLabel, regionName);
381                 return new Annotations(flagLabelSet, flagName);
382             } else if (code.startsWith(EmojiConstants.BLACK_FLAG)
383                 && code.endsWith(EmojiConstants.TAG_TERM)) {
384                 String subdivisionCode = EmojiConstants.getTagSpec(code);
385                 String subdivisionName = subdivisionIdToName.get(subdivisionCode);
386                 if (subdivisionName == null) {
387 //                    subdivisionName = englishSubdivisionIdToName.get(subdivisionCode);
388 //                    if (subdivisionName != null) {
389 //                        subdivisionName = ENGLISH_MARKER + subdivisionCode;
390 //                    } else {
391                         subdivisionName = MISSING_MARKER + subdivisionCode;
392 //                    }
393                 }
394                 String flagName = flagLabel == null ? subdivisionName : initialPattern.format(flagLabel, subdivisionName);
395                 return new Annotations(flagLabelSet, flagName);
396             } else if (isKeycap10 || code.contains(EmojiConstants.KEYCAP_MARK_STRING)) {
397                 final String rem = code.equals("��") ? "10" : UTF16.valueOf(code.charAt(0));
398                 shortName = initialPattern.format(keycapLabel, rem);
399                 return new Annotations(keycapLabelSet, shortName);
400             }
401             UnicodeSet skipSet = EmojiConstants.REM_SKIP_SET;
402             String rem = "";
403             SimpleFormatter startPattern = initialPattern;
404             if (EmojiConstants.COMPONENTS.containsSome(code)) {
405                 synchronized (uss) {
406                     rem = uss.deleteFrom(code, SpanCondition.NOT_CONTAINED);
407                     code = uss.deleteFrom(code, SpanCondition.CONTAINED);
408                 }
409             }
410             if (code.contains(EmojiConstants.JOINER_STRING)) {
411 //                if (code.endsWith(EmojiConstants.JOINER_MALE_SIGN)){
412 //                    if (matchesInitialPattern(code)) { // "����‍♂️","police officer: man, medium-light skin tone"
413 //                        rem = EmojiConstants.MAN + rem;
414 //                        code = code.substring(0,code.length()-EmojiConstants.JOINER_MALE_SIGN.length());
415 //                    } // otherwise "����‍♂️","man biking: dark skin tone"
416 //                } else if (code.endsWith(EmojiConstants.JOINER_FEMALE_SIGN)){
417 //                    if (matchesInitialPattern(code)) { //
418 //                        rem = EmojiConstants.WOMAN + rem;
419 //                        code = code.substring(0,code.length()-EmojiConstants.JOINER_FEMALE_SIGN.length());
420 //                    }
421 //                } else
422                 if (code.contains(EmojiConstants.KISS)) {
423                     rem = code + rem;
424                     code = "��";
425                     skipSet = EmojiConstants.REM_GROUP_SKIP_SET;
426                 } else if (code.contains(EmojiConstants.HEART) && !code.startsWith(EmojiConstants.HEART)) {
427                     rem = code + rem;
428                     code = "��";
429                     skipSet = EmojiConstants.REM_GROUP_SKIP_SET;
430                 } else if (code.equals(EmojiConstants.COMPOSED_HANDSHAKE)) {
431                     code = EmojiConstants.HANDSHAKE;
432                 } else if (code.contains(EmojiConstants.HANDSHAKE)) {
433                     code = code.startsWith(EmojiConstants.MAN) ? "��"
434                         : code.endsWith(EmojiConstants.MAN) ? "��"
435                             : code.startsWith(EmojiConstants.WOMAN) ? "��"
436                             : NEUTRAL_HOLDING;
437                     skipSet = EmojiConstants.REM_GROUP_SKIP_SET;
438                 } else if (EmojiConstants.FAMILY_MARKERS.containsAll(code)) {
439                     rem = code + rem;
440                     code = "��";
441                     skipSet = EmojiConstants.REM_GROUP_SKIP_SET;
442 //                } else {
443 //                    startPattern = listPattern;
444                 }
445                 // left over is "����‍⚖","judge: man, dark skin tone"
446             }
447             return getBasePlusRemainder(cldrFile, code, rem, skipSet, startPattern, otherSource);
448         }
449 
matchesInitialPattern(String code)450         private boolean matchesInitialPattern(String code) {
451             Annotations baseAnnotation = baseData.get(code);
452             String baseName = baseAnnotation == null ? null : baseAnnotation.getShortName();
453             return baseName != null && initialRegexPattern.matcher(baseName).matches();
454         }
455 
getBasePlusRemainder(CLDRFile cldrFile, String base, String rem, UnicodeSet ignore, SimpleFormatter pattern, Transform<String, String> otherSource)456         private Annotations getBasePlusRemainder(CLDRFile cldrFile, String base, String rem, UnicodeSet ignore, SimpleFormatter pattern,
457             Transform<String, String> otherSource) {
458             String shortName = null;
459             Set<String> annotations = new LinkedHashSet<>();
460             boolean needMarker = true;
461 
462             if (base != null) {
463                 needMarker = false;
464                 Annotations stock = baseData.get(base);
465                 if (stock != null) {
466                     shortName = stock.getShortName();
467                     annotations.addAll(stock.getKeywords());
468                 } else if (otherSource != null) {
469                     shortName = otherSource.transform(base);
470                 } else {
471                     return null;
472                 }
473                 if (shortName == null) {
474                     return null;
475                 }
476             }
477 
478             boolean hackBlond = EmojiConstants.HAIR_EXPLICIT.contains(base.codePointAt(0));
479             Collection<String> arguments = new ArrayList<>();
480             int lastSkin = -1;
481 
482             for (int mod : CharSequences.codePoints(rem)) {
483                 if (ignore.contains(mod)) {
484                     continue;
485                 }
486                 if (EmojiConstants.MODIFIERS.contains(mod)) {
487                     if (lastSkin == mod) {
488                         continue;
489                     }
490                     lastSkin = mod; // collapse skin tones. TODO fix if we ever do multi-skin families
491                 }
492                 Annotations stock = baseData.get(mod);
493                 String modName = null;
494                 if (stock != null) {
495                     modName = stock.getShortName();
496                 } else if (otherSource != null) {
497                     modName = otherSource.transform(base);
498                 }
499                 if (modName == null) {
500                     needMarker = true;
501                     if (ENGLISH_DATA != null) {
502                         Annotations engName = ENGLISH_DATA.baseData.get(mod);
503                         if (engName != null) {
504                             modName = engName.getShortName();
505                         }
506                     }
507                     if (modName == null) {
508                         modName = Utility.hex(mod); // ultimate fallback
509                     }
510                 }
511                 if (hackBlond && shortName != null) {
512                     // HACK: make the blond names look like the other hair names
513                     // Split the short name into pieces, if possible, and insert the modName first
514                     String sep = initialPattern.format("", "");
515                     int splitPoint = shortName.indexOf(sep);
516                     if (splitPoint >= 0) {
517                         String modName0 = shortName.substring(splitPoint+sep.length());
518                         shortName = shortName.substring(0, splitPoint);
519                         if (modName != null) {
520                             arguments.add(modName);
521                             annotations.add(modName);
522                         }
523                         modName = modName0;
524                     }
525                     hackBlond = false;
526                 }
527 
528                 if (modName != null) {
529                     arguments.add(modName);
530                     annotations.add(modName);
531                 }
532             }
533             if (!arguments.isEmpty()) {
534                 shortName = pattern.format(shortName, listPattern.format(arguments));
535             }
536             Annotations result = new Annotations(annotations, (needMarker ? ENGLISH_MARKER : "") + shortName);
537             return result;
538         }
539 
540         /**
541          * @deprecated Use {@link #toString(String,boolean,AnnotationSet)} instead
542          */
543         @Deprecated
toString(String code, boolean html)544         public String toString(String code, boolean html) {
545             return toString(code, html, null);
546         }
547 
toString(String code, boolean html, AnnotationSet parentAnnotations)548         public String toString(String code, boolean html, AnnotationSet parentAnnotations) {
549             if (locale.equals("be") && code.equals("��")) {
550                 int debug = 0;
551             }
552             String shortName = getShortName(code);
553             if (shortName == null || shortName.startsWith(BAD_MARKER) || shortName.startsWith(ENGLISH_MARKER)) {
554                 return MISSING_MARKER;
555             }
556 
557             String parentShortName = parentAnnotations == null ? null : parentAnnotations.getShortName(code);
558             if (shortName != null && Objects.equal(shortName, parentShortName)) {
559                 shortName = EQUIVALENT;
560             }
561 
562             Set<String> keywords = getKeywordsMinus(code);
563             Set<String> parentKeywords = parentAnnotations == null ? null : parentAnnotations.getKeywordsMinus(code);
564             if (keywords != null && !keywords.isEmpty() && Objects.equal(keywords, parentKeywords)) {
565                 keywords = Collections.singleton(EQUIVALENT);
566             }
567 
568             String result = Joiner.on(" |\u00a0").join(keywords);
569             if (shortName != null) {
570                 String ttsString = (html ? "*<b>" : "*") + shortName + (html ? "</b>" : "*");
571                 if (result.isEmpty()) {
572                     result = ttsString;
573                 } else {
574                     result = ttsString + (html ? "<br>|\u00a0" : " |\u00a0") + result;
575                 }
576             }
577             return result;
578         }
579 
getExplicitValues()580         public UnicodeMap<Annotations> getExplicitValues() {
581             return baseData;
582         }
583 
getUnresolvedExplicitValues()584         public UnicodeMap<Annotations> getUnresolvedExplicitValues() {
585             return unresolvedData;
586         }
587 
getKeywordsMinus(String code)588         public Set<String> getKeywordsMinus(String code) {
589             String shortName = getShortName(code);
590             Set<String> keywords = getKeywords(code);
591             if (shortName != null && keywords.contains(shortName)) {
592                 keywords = new LinkedHashSet<>(keywords);
593                 keywords.remove(shortName);
594             }
595             return keywords;
596         }
597     }
598 
getDataSet(String locale)599     public static AnnotationSet getDataSet(String locale) {
600         final File theDir = getDirForLocale(locale);
601         return getDataSet(theDir.getAbsolutePath(), locale);
602     }
603 
getDirForLocale(String locale)604     private static File getDirForLocale(String locale) {
605         // use the annotations Factory to find the XML file
606         List<File> dirs = ANNOTATIONS_FACTORY.getSourceDirectoriesForLocale(locale);
607         if (dirs == null || dirs.isEmpty()) {
608             throw new IllegalArgumentException("Cannot find source annotation directory for locale " + locale);
609         } else if (dirs.size() != 1) {
610             throw new IllegalArgumentException(
611                 "Did not find exactly one source directory for locale " + locale + " - " + dirs);
612         }
613         final File theDir = dirs.get(0);
614         return theDir;
615     }
616 
getDataSet(String dir, String locale)617     public static AnnotationSet getDataSet(String dir, String locale) {
618         Map<String, AnnotationSet> dirCache = cache.get(dir);
619         if (dirCache == null) {
620             cache.put(dir, dirCache = new ConcurrentHashMap<>());
621         }
622         AnnotationSet result = dirCache.get(locale);
623         if (result != null) {
624             return result;
625         }
626         if (!LOCALES.contains(locale)) {
627             return null;
628         }
629         String parentString = LocaleIDParser.getParent(locale);
630         AnnotationSet parentData = null;
631         if (parentString != null && !parentString.equals("root")) {
632             parentData = getDataSet(dir, parentString);
633         }
634         MyHandler myHandler = new MyHandler(dirCache, locale, parentData);
635         XMLFileReader xfr = new XMLFileReader().setHandler(myHandler);
636         xfr.read(dir + "/" + locale + ".xml", -1, true);
637         return myHandler.cleanup();
638     }
639 
getData(String locale)640     public static UnicodeMap<Annotations> getData(String locale) {
641         final File theDir = getDirForLocale(locale);
642         return getData(theDir.getAbsolutePath(), locale);
643     }
644 
getData(String dir, String locale)645     public static UnicodeMap<Annotations> getData(String dir, String locale) {
646         AnnotationSet result = getDataSet(dir, locale);
647         return result == null ? null : result.baseData;
648     }
649 
650     @Override
toString()651     public String toString() {
652         return toString(false);
653     }
654 
toString(boolean html)655     public String toString(boolean html) {
656         Set<String> annotations2 = getKeywords();
657         if (getShortName() != null && annotations2.contains(getShortName())) {
658             annotations2 = new LinkedHashSet<>(getKeywords());
659             annotations2.remove(getShortName());
660         }
661         String result = Joiner.on(" |\u00a0").join(annotations2);
662         if (getShortName() != null) {
663             String ttsString = (html ? "*<b>" : "*") + getShortName() + (html ? "</b>" : "*");
664             if (result.isEmpty()) {
665                 result = ttsString;
666             } else {
667                 result = ttsString + (html ? "<br>|\u00a0" : " |\u00a0") + result;
668             }
669         }
670         return result;
671     }
672 
673     /**
674      * @return the annotations
675      */
getKeywords()676     public Set<String> getKeywords() {
677         return annotations;
678     }
679 
680     /**
681      * @return the tts
682      */
getShortName()683     public String getShortName() {
684         return tts;
685     }
686 
main(String[] args)687     public static void main(String[] args) {
688         if (true) {
689             writeList();
690         } else {
691             writeEnglish();
692         }
693     }
694 
writeList()695     private static void writeList() {
696         AnnotationSet eng = Annotations.getDataSet("en");
697         Annotations an = eng.baseData.get("❤");
698         final UnicodeMap<Annotations> map = eng.getUnresolvedExplicitValues();
699         Set<String> keys = new TreeSet<>(ChartAnnotations.RBC);
700         map.keySet().addAllTo(keys);
701 //        keys.add("����‍⚖");
702         for (String key : keys) {
703             System.out.println(Utility.hex(key, 4, "_").toLowerCase(Locale.ROOT)
704                 + "\t" + key
705                 + "\t" + map.get(key).getShortName()
706                 + "\t" + Joiner.on(" | ").join(map.get(key).getKeywords()));
707         }
708         for (String s : Arrays.asList(
709             "��", "��‍❤️‍��‍��",
710             "��", "��‍❤️‍��",
711             "��", "��‍��‍��",
712             "����", "����",
713             "��‍⚖", "����‍⚖", "��‍⚖", "����‍⚖",
714             "��", "��‍♂️", "����‍♂️", "��‍♀️", "����‍♀️",
715             "��", "����", "��‍♂️", "����‍♂️", "��‍♀️", "����‍♀️")) {
716             final String shortName = eng.getShortName(s);
717             final Set<String> keywords = eng.getKeywords(s);
718             System.out.println("{\"" + s + "\",\"" + shortName + "\",\"" + Joiner.on("|")
719                 .join(keywords) + "\"},");
720         }
721     }
722 
writeEnglish()723     private static void writeEnglish() {
724         AnnotationSet eng = Annotations.getDataSet("en");
725         System.out.println(Annotations.getAvailable());
726         AnnotationSet eng100 = Annotations.getDataSet("en_001");
727         UnicodeMap<Annotations> map100 = eng100.getUnresolvedExplicitValues();
728         final UnicodeMap<Annotations> map = eng.getUnresolvedExplicitValues();
729         Set<String> keys = new TreeSet<>(ChartAnnotations.RBC);
730         map.keySet().addAllTo(keys);
731         for (String key : keys) {
732             Annotations value = map.get(key);
733             Annotations value100 = map100.get(key);
734             Set<String> keywords100 = (value100 == null ? null : value100.getKeywords());
735             System.out.println(key + "\tname\t"
736                 + "\t" + value.getShortName()
737                 + "\t" + (value100 == null ? "" : value100.getShortName())
738                 + "\t" + Joiner.on(" | ").join(value.getKeywords())
739                 + "\t" + (keywords100 == null ? "" : Joiner.on(" | ").join(keywords100)));
740         }
741     }
742 }
743