• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.util;
2 
3 import com.google.common.base.Joiner;
4 import com.google.common.base.Objects;
5 import com.google.common.base.Splitter;
6 import com.google.common.collect.ImmutableSet;
7 import com.ibm.icu.impl.UnicodeMap;
8 import com.ibm.icu.impl.Utility;
9 import com.ibm.icu.lang.CharSequences;
10 import com.ibm.icu.text.SimpleFormatter;
11 import com.ibm.icu.text.Transform;
12 import com.ibm.icu.text.UTF16;
13 import com.ibm.icu.text.UnicodeSet;
14 import com.ibm.icu.text.UnicodeSet.SpanCondition;
15 import com.ibm.icu.text.UnicodeSetSpanner;
16 import java.io.File;
17 import java.util.ArrayList;
18 import java.util.Arrays;
19 import java.util.Collection;
20 import java.util.Collections;
21 import java.util.HashSet;
22 import java.util.LinkedHashSet;
23 import java.util.List;
24 import java.util.Locale;
25 import java.util.Map;
26 import java.util.Set;
27 import java.util.TreeSet;
28 import java.util.concurrent.ConcurrentHashMap;
29 import java.util.regex.Pattern;
30 import org.unicode.cldr.tool.ChartAnnotations;
31 import org.unicode.cldr.tool.SubdivisionNames;
32 import org.unicode.cldr.util.Factory.SourceTreeType;
33 import org.unicode.cldr.util.XMLFileReader.SimpleHandler;
34 
35 public class Annotations {
36     private static final boolean DEBUG = false;
37 
38     public static final String BAD_MARKER = "⊗";
39     public static final String MISSING_MARKER = "⊖";
40     public static final String ENGLISH_MARKER = "⊕";
41     public static final String EQUIVALENT = "≣";
42     public static final String NEUTRAL_HOLDING = "��‍��‍��";
43 
44     public static final Splitter splitter =
45             Splitter.on(Pattern.compile("[|;]")).trimResults().omitEmptyStrings();
46     static final Splitter dotSplitter = Splitter.on(".").trimResults();
47 
48     static final Map<String, Map<String, AnnotationSet>> cache = new ConcurrentHashMap<>();
49     static final Set<String> LOCALES;
50     static final Set<String> ALL_LOCALES;
51     static final Factory ANNOTATIONS_FACTORY;
52     private static final AnnotationSet ENGLISH_DATA;
53 
54     private final Set<String> annotations;
55     private final String tts;
56 
57     static {
58         ANNOTATIONS_FACTORY = CLDRConfig.getInstance().getAnnotationsFactory();
59         ALL_LOCALES = ANNOTATIONS_FACTORY.getAvailable();
60         final Set<String> commonList = new HashSet<>();
61         // calculate those in common
62         for (final String loc : ALL_LOCALES) {
63             final File f = getDirForLocale(loc);
64             if (SimpleFactory.getSourceTreeType(f) == SourceTreeType.common) {
65                 commonList.add(loc);
66             }
67         }
68         LOCALES = Collections.unmodifiableSet(commonList);
69         ENGLISH_DATA = getDataSet("en");
70     }
71 
72     static class MyHandler extends SimpleHandler {
73         private final String locale;
74         private final UnicodeMap<Annotations> localeData = new UnicodeMap<>();
75         private final AnnotationSet parentData;
76         private final Map<String, AnnotationSet> dirCache;
77 
MyHandler( Map<String, AnnotationSet> dirCache, String locale, AnnotationSet parentData)78         public MyHandler(
79                 Map<String, AnnotationSet> dirCache, String locale, AnnotationSet parentData) {
80             this.locale = locale;
81             this.parentData = parentData;
82             this.dirCache = dirCache;
83         }
84 
cleanup()85         public AnnotationSet cleanup() {
86             // add parent data (may be overridden)
87             UnicodeMap<Annotations> templocaleData = null;
88             if (parentData != null) {
89                 templocaleData = new UnicodeMap<>();
90                 UnicodeSet keys =
91                         new UnicodeSet(parentData.baseData.keySet()).addAll(localeData.keySet());
92                 for (String key : keys) {
93                     Annotations parentValue = parentData.baseData.get(key);
94                     Annotations myValue = localeData.get(key);
95                     if (parentValue == null) {
96                         templocaleData.put(key, myValue);
97                     } else if (myValue == null) {
98                         templocaleData.put(key, parentValue);
99                     } else { // need to combine
100                         String tts = myValue.tts == null ? parentValue.tts : myValue.tts;
101                         Set<String> annotations =
102                                 myValue.annotations == null || myValue.annotations.isEmpty()
103                                         ? parentValue.annotations
104                                         : myValue.annotations;
105                         templocaleData.put(key, new Annotations(annotations, tts));
106                     }
107                 }
108             }
109 
110             final AnnotationSet result = new AnnotationSet(locale, localeData, templocaleData);
111             dirCache.put(locale, result);
112             return result;
113         }
114 
115         static final Pattern SPACES = Pattern.compile("\\s+");
116 
117         @Override
handlePathValue(String path, String value)118         public void handlePathValue(String path, String value) {
119             if (value.contains(CldrUtility.INHERITANCE_MARKER)) {
120                 return; // skip all ^^^
121             }
122             XPathParts parts = XPathParts.getFrozenInstance(path);
123             String lastElement = parts.getElement(-1);
124             if (!lastElement.equals("annotation")) {
125                 if (!"identity".equals(parts.getElement(1))) {
126                     throw new IllegalArgumentException("Unexpected path");
127                 }
128                 return;
129             }
130             String usString = parts.getAttributeValue(-1, "cp");
131             UnicodeSet us1 =
132                     usString.startsWith("[") && usString.endsWith("]")
133                             ? new UnicodeSet(usString)
134                             : new UnicodeSet().add(usString);
135             UnicodeSet us = new UnicodeSet();
136             for (String s : us1) {
137                 us.add(s.replace(EmojiConstants.EMOJI_VARIANT_STRING, ""));
138             }
139             String tts = parts.getAttributeValue(-1, "tts");
140             String type = parts.getAttributeValue(-1, "type");
141             String alt = parts.getAttributeValue(-1, "alt");
142 
143             // clean up value
144             String value2 = SPACES.matcher(value).replaceAll(" ").trim();
145             if (!value2.equals(value)) {
146                 value = value2;
147             }
148             if (alt != null) {
149                 // do nothing for now
150             } else if ("tts".equals(type)) {
151                 addItems(localeData, us, Collections.<String>emptySet(), value);
152             } else {
153                 Set<String> attributes = new TreeSet<>(splitter.splitToList(value));
154                 addItems(localeData, us, attributes, tts);
155             }
156         }
157 
addItems( UnicodeMap<Annotations> unicodeMap, UnicodeSet us, Set<String> attributes, String tts)158         private void addItems(
159                 UnicodeMap<Annotations> unicodeMap,
160                 UnicodeSet us,
161                 Set<String> attributes,
162                 String tts) {
163             for (String entry : us) {
164                 addItems(unicodeMap, entry, attributes, tts);
165             }
166         }
167 
addItems( UnicodeMap<Annotations> unicodeMap, String entry, Set<String> attributes, String tts)168         private void addItems(
169                 UnicodeMap<Annotations> unicodeMap,
170                 String entry,
171                 Set<String> attributes,
172                 String tts) {
173             Annotations annotations = unicodeMap.get(entry);
174             if (annotations == null) {
175                 unicodeMap.put(entry, new Annotations(attributes, tts));
176             } else {
177                 unicodeMap.put(entry, annotations.add(attributes, tts)); // creates new item
178             }
179         }
180     }
181 
Annotations(Set<String> attributes, String tts2)182     public Annotations(Set<String> attributes, String tts2) {
183         annotations =
184                 attributes == null
185                         ? Collections.<String>emptySet()
186                         : ImmutableSet.copyOf(attributes);
187         for (String attr : annotations) {
188             if (attr.contains(CldrUtility.INHERITANCE_MARKER)) {
189                 throw new IllegalArgumentException(CldrUtility.INHERITANCE_MARKER);
190             }
191         }
192         tts = tts2;
193         if (tts != null && tts.contains(CldrUtility.INHERITANCE_MARKER)) {
194             throw new IllegalArgumentException(CldrUtility.INHERITANCE_MARKER);
195         }
196     }
197 
add(Set<String> attributes, String tts2)198     public Annotations add(Set<String> attributes, String tts2) {
199         return new Annotations(
200                 getKeywords() == null
201                         ? attributes
202                         : attributes == null ? getKeywords() : union(attributes, getKeywords()),
203                 getShortName() == null ? tts2 : tts2 == null ? getShortName() : throwDup());
204     }
205 
throwDup()206     private String throwDup() {
207         throw new IllegalArgumentException("Duplicate tts");
208     }
209 
union(Set<String> a, Set<String> b)210     private Set<String> union(Set<String> a, Set<String> b) {
211         TreeSet<String> result = new TreeSet<>(a);
212         result.addAll(b);
213         return result;
214     }
215 
216     /**
217      * @return all common locales
218      */
getAvailable()219     public static Set<String> getAvailable() {
220         return LOCALES;
221     }
222 
223     /**
224      * @return all common locales
225      */
getAvailableLocales()226     public static Set<String> getAvailableLocales() {
227         return LOCALES;
228     }
229 
230     /**
231      * @return all locales, including seed
232      */
getAllAvailable()233     public static Set<String> getAllAvailable() {
234         return ALL_LOCALES;
235     }
236 
237     public static final class AnnotationSet {
238 
239         private static final CLDRConfig CONFIG = CLDRConfig.getInstance();
240 
241         static final Factory factory = CONFIG.getCldrFactory();
242         static final CLDRFile ENGLISH = CONFIG.getEnglish();
243         static final CLDRFile ENGLISH_ANNOTATIONS = null;
244         static final SubdivisionNames englishSubdivisionIdToName =
245                 new SubdivisionNames("en", "main");
246 
247         private static final String BLACK_RIGHTWARDS_ARROW = "\u27A1";
248         private static final String BLACK_LEFTWARDS_ARROW = "\u2B05";
249         // CLDRConfig.getInstance().getAnnotationsFactory().make("en", false);
250 
251         private final String locale;
252         private final UnicodeMap<Annotations> baseData;
253         private final UnicodeMap<Annotations> unresolvedData;
254         private final CLDRFile cldrFile;
255         private final SubdivisionNames subdivisionIdToName;
256         private final SimpleFormatter initialPattern;
257         private final SimpleFormatter rightwardsArrowPattern;
258         private final Pattern initialRegexPattern;
259         private final XListFormatter listPattern;
260         private final Set<String> flagLabelSet;
261         private final Set<String> keycapLabelSet;
262         private final String keycapLabel;
263         private final String flagLabel;
264         //        private final String maleLabel;
265         //        private final String femaleLabel;
266         private final Map<String, Annotations> localeCache = new ConcurrentHashMap<>();
267 
268         static UnicodeSetSpanner uss =
269                 new UnicodeSetSpanner(EmojiConstants.COMPONENTS); // must be sync'ed
270 
AnnotationSet( String locale, UnicodeMap<Annotations> source, UnicodeMap<Annotations> resolvedSource)271         private AnnotationSet(
272                 String locale,
273                 UnicodeMap<Annotations> source,
274                 UnicodeMap<Annotations> resolvedSource) {
275             this.locale = locale;
276             unresolvedData = source.freeze();
277             this.baseData = resolvedSource == null ? unresolvedData : resolvedSource.freeze();
278             cldrFile = factory.make(locale, true);
279             subdivisionIdToName = new SubdivisionNames(locale, "main", "subdivisions");
280             // EmojiSubdivisionNames.getSubdivisionIdToName(locale);
281             listPattern = new XListFormatter(cldrFile, EmojiConstants.COMPOSED_NAME_LIST);
282             final String initialPatternString =
283                     getStringValue(
284                             "//ldml/characterLabels/characterLabelPattern[@type=\"category-list\"]");
285             initialPattern = SimpleFormatter.compile(initialPatternString);
286             //      <characterLabelPattern type="facing-right">{0} facing
287             // right</characterLabelPattern>
288             final String facingRightPatternString =
289                     getStringValue(
290                             "//ldml/characterLabels/characterLabelPattern[@type=\"facing-right\"]");
291 
292             rightwardsArrowPattern =
293                     facingRightPatternString == null
294                             ? null
295                             : SimpleFormatter.compile(facingRightPatternString);
296             final String regexPattern =
297                     ("\\Q"
298                                     + initialPatternString
299                                             .replace("{0}", "\\E.*\\Q")
300                                             .replace("{1}", "\\E.*\\Q")
301                                     + "\\E")
302                             .replace("\\Q\\E", ""); // HACK to detect use of prefix pattern
303             initialRegexPattern = Pattern.compile(regexPattern);
304             flagLabelSet = getLabelSet("flag");
305             flagLabel = flagLabelSet.isEmpty() ? null : flagLabelSet.iterator().next();
306             keycapLabelSet = getLabelSet("keycap");
307             keycapLabel = keycapLabelSet.isEmpty() ? null : keycapLabelSet.iterator().next();
308             //            maleLabel =
309             // getStringValue("//ldml/characterLabels/characterLabel[@type=\"male\"]");
310             //            femaleLabel =
311             // getStringValue("//ldml/characterLabels/characterLabel[@type=\"female\"]");
312         }
313 
314         /**
315          * @deprecated Use {@link #getLabelSet(String)} instead
316          */
317         @Deprecated
getLabelSet()318         private Set<String> getLabelSet() {
319             return getLabelSet("flag");
320         }
321 
getLabelSet(String typeAttributeValue)322         private Set<String> getLabelSet(String typeAttributeValue) {
323             String label =
324                     getStringValue(
325                             "//ldml/characterLabels/characterLabel[@type=\""
326                                     + typeAttributeValue
327                                     + "\"]");
328             return label == null ? Collections.<String>emptySet() : Collections.singleton(label);
329         }
330 
getStringValue(String xpath)331         private String getStringValue(String xpath) {
332             return getStringValue(xpath, cldrFile, ENGLISH);
333         }
334 
getStringValue(String xpath, CLDRFile cldrFile2, CLDRFile english)335         private String getStringValue(String xpath, CLDRFile cldrFile2, CLDRFile english) {
336             String result = cldrFile2.getStringValueWithBailey(xpath);
337             if (result == null) {
338                 return ENGLISH_MARKER + english.getStringValueWithBailey(xpath);
339             }
340             String sourceLocale = cldrFile2.getSourceLocaleID(xpath, null);
341             if (sourceLocale.equals(XMLSource.CODE_FALLBACK_ID)
342                     || sourceLocale.equals(XMLSource.ROOT_ID)) {
343                 if (!xpath.equals(
344                         "//ldml/characterLabels/characterLabelPattern[@type=\"category-list\"]")) {
345                     return MISSING_MARKER + result;
346                 }
347             }
348             return result;
349         }
350 
getShortName(String code)351         public String getShortName(String code) {
352             return getShortName(code, null);
353         }
354 
getShortName(String code, Transform<String, String> otherSource)355         public String getShortName(String code, Transform<String, String> otherSource) {
356             if (code.equals("��‍♀️")) {
357                 int debug = 0;
358             }
359 
360             code = code.replace(EmojiConstants.EMOJI_VARIANT_STRING, "");
361             Annotations stock = baseData.get(code);
362             if (stock != null && stock.tts != null) {
363                 return stock.tts;
364             }
365             stock = localeCache.get(code);
366             if (stock != null) {
367                 return stock.tts;
368             }
369             stock = synthesize(code, otherSource);
370             if (stock != null) {
371                 localeCache.put(code, stock);
372                 return stock.tts;
373             }
374             return null;
375         }
376 
getKeywords(String code)377         public Set<String> getKeywords(String code) {
378             code = code.replace(EmojiConstants.EMOJI_VARIANT_STRING, "");
379             Annotations stock = baseData.get(code);
380             if (stock != null && stock.annotations != null) {
381                 return stock.annotations;
382             }
383             stock = localeCache.get(code);
384             if (stock != null) {
385                 return stock.annotations;
386             }
387             stock = synthesize(code, null);
388             if (stock != null) {
389                 localeCache.put(code, stock);
390                 return stock.annotations;
391             }
392             return Collections.<String>emptySet();
393         }
394 
395         /**
396          * Returns the set of all keys for which annotations are available. WARNING: keys have the
397          * Emoji Presentation Selector removed!
398          */
keySet()399         public UnicodeSet keySet() {
400             return baseData.keySet();
401         }
402 
synthesize(String code, Transform<String, String> otherSource)403         private Annotations synthesize(String code, Transform<String, String> otherSource) {
404             if (code.equals("����‍♂")) {
405                 int debug = 0;
406             }
407             String shortName = null;
408             int len = code.codePointCount(0, code.length());
409             boolean isKeycap10 = code.equals("��");
410             if (len == 1 && !isKeycap10) {
411                 String tempName = null;
412                 if (locale.equals("en")) {
413                     if (otherSource != null) {
414                         tempName = otherSource.transform(code);
415                     }
416                     if (tempName == null) {
417                         return null;
418                     }
419                     return new Annotations(Collections.<String>emptySet(), tempName);
420                 } else { // fall back to English if possible, but mark it.
421                     tempName = getDataSet("en").getShortName(code);
422                     if (tempName == null) {
423                         return null;
424                     }
425                     return new Annotations(
426                             Collections.<String>emptySet(), ENGLISH_MARKER + tempName);
427                 }
428             } else if (EmojiConstants.REGIONAL_INDICATORS.containsAll(code)) {
429                 String countryCode = EmojiConstants.getFlagCode(code);
430                 String path = CLDRFile.getKey(CLDRFile.TERRITORY_NAME, countryCode);
431                 String regionName = getStringValue(path);
432                 if (regionName == null) {
433                     regionName = ENGLISH_MARKER + ENGLISH.getStringValueWithBailey(path);
434                 }
435                 String flagName =
436                         flagLabel == null
437                                 ? regionName
438                                 : initialPattern.format(flagLabel, regionName);
439                 return new Annotations(flagLabelSet, flagName);
440             } else if (code.startsWith(EmojiConstants.BLACK_FLAG)
441                     && code.endsWith(EmojiConstants.TAG_TERM)) {
442                 String subdivisionCode = EmojiConstants.getTagSpec(code);
443                 String subdivisionName = subdivisionIdToName.get(subdivisionCode);
444                 if (subdivisionName == null) {
445                     //                    subdivisionName =
446                     // englishSubdivisionIdToName.get(subdivisionCode);
447                     //                    if (subdivisionName != null) {
448                     //                        subdivisionName = ENGLISH_MARKER + subdivisionCode;
449                     //                    } else {
450                     subdivisionName = MISSING_MARKER + subdivisionCode;
451                     //                    }
452                 }
453                 String flagName =
454                         flagLabel == null
455                                 ? subdivisionName
456                                 : initialPattern.format(flagLabel, subdivisionName);
457                 return new Annotations(flagLabelSet, flagName);
458             } else if (isKeycap10 || code.contains(EmojiConstants.KEYCAP_MARK_STRING)) {
459                 final String rem = code.equals("��") ? "10" : UTF16.valueOf(code.charAt(0));
460                 shortName = initialPattern.format(keycapLabel, rem);
461                 return new Annotations(keycapLabelSet, shortName);
462             }
463             UnicodeSet skipSet = EmojiConstants.REM_SKIP_SET;
464             String rem = "";
465             SimpleFormatter startPattern = initialPattern;
466             if (EmojiConstants.COMPONENTS.containsSome(code)) {
467                 synchronized (uss) {
468                     rem = uss.deleteFrom(code, SpanCondition.NOT_CONTAINED);
469                     code = uss.deleteFrom(code, SpanCondition.CONTAINED);
470                 }
471             }
472             if (code.contains(EmojiConstants.JOINER_STRING)) {
473                 if (code.contains(BLACK_RIGHTWARDS_ARROW)) {
474                     String code2 =
475                             code.replace(EmojiConstants.JOINER_STRING + BLACK_RIGHTWARDS_ARROW, "");
476                     if (!Objects.equal(code2, code)) {
477                         Set<String> keywords = getKeywords(code2);
478                         String baseName = getShortName(code2);
479                         if (baseName == null
480                                 || keywords == null
481                                 || rightwardsArrowPattern == null) {
482                             return null;
483                         }
484                         shortName = rightwardsArrowPattern.format(baseName);
485                         return new Annotations(keywords, shortName);
486                     }
487                 } else if (code.contains(BLACK_LEFTWARDS_ARROW)) {
488                     throw new UnsupportedOperationException(
489                             "Implement if leftwards emoji are added");
490                 }
491                 //                if (code.endsWith(EmojiConstants.JOINER_MALE_SIGN)){
492                 //                    if (matchesInitialPattern(code)) { // "����‍♂️","police
493                 // officer: man, medium-light skin tone"
494                 //                        rem = EmojiConstants.MAN + rem;
495                 //                        code =
496                 // code.substring(0,code.length()-EmojiConstants.JOINER_MALE_SIGN.length());
497                 //                    } // otherwise "����‍♂️","man biking: dark skin tone"
498                 //                } else if (code.endsWith(EmojiConstants.JOINER_FEMALE_SIGN)){
499                 //                    if (matchesInitialPattern(code)) { //
500                 //                        rem = EmojiConstants.WOMAN + rem;
501                 //                        code =
502                 // code.substring(0,code.length()-EmojiConstants.JOINER_FEMALE_SIGN.length());
503                 //                    }
504                 //                } else
505                 if (code.contains(EmojiConstants.KISS)) {
506                     rem = code + rem;
507                     code = "��";
508                     skipSet = EmojiConstants.REM_GROUP_SKIP_SET;
509                 } else if (code.contains(EmojiConstants.HEART)
510                         && !code.startsWith(EmojiConstants.HEART)) {
511                     rem = code + rem;
512                     code = "��";
513                     skipSet = EmojiConstants.REM_GROUP_SKIP_SET;
514                 } else if (code.equals(EmojiConstants.COMPOSED_HANDSHAKE)) {
515                     code = EmojiConstants.HANDSHAKE;
516                 } else if (code.contains(EmojiConstants.HANDSHAKE)) {
517                     code =
518                             code.startsWith(EmojiConstants.MAN)
519                                     ? "��"
520                                     : code.endsWith(EmojiConstants.MAN)
521                                             ? "��"
522                                             : code.startsWith(EmojiConstants.WOMAN)
523                                                     ? "��"
524                                                     : NEUTRAL_HOLDING;
525                     skipSet = EmojiConstants.REM_GROUP_SKIP_SET;
526                 } else if (EmojiConstants.FAMILY_MARKERS.containsAll(code)) {
527                     rem = code + rem;
528                     code = "��";
529                     skipSet = EmojiConstants.REM_GROUP_SKIP_SET;
530                     //                } else {
531                     //                    startPattern = listPattern;
532                 }
533                 // left over is "����‍⚖","judge: man, dark skin tone"
534             }
535             return getBasePlusRemainder(cldrFile, code, rem, skipSet, startPattern, otherSource);
536         }
537 
matchesInitialPattern(String code)538         private boolean matchesInitialPattern(String code) {
539             Annotations baseAnnotation = baseData.get(code);
540             String baseName = baseAnnotation == null ? null : baseAnnotation.getShortName();
541             return baseName != null && initialRegexPattern.matcher(baseName).matches();
542         }
543 
getBasePlusRemainder( CLDRFile cldrFile, String base, String rem, UnicodeSet ignore, SimpleFormatter pattern, Transform<String, String> otherSource)544         private Annotations getBasePlusRemainder(
545                 CLDRFile cldrFile,
546                 String base,
547                 String rem,
548                 UnicodeSet ignore,
549                 SimpleFormatter pattern,
550                 Transform<String, String> otherSource) {
551             String shortName = null;
552             Set<String> annotations = new LinkedHashSet<>();
553             boolean needMarker = true;
554 
555             if (base != null) {
556                 needMarker = false;
557                 Annotations stock = baseData.get(base);
558                 if (stock != null) {
559                     shortName = stock.getShortName();
560                     annotations.addAll(stock.getKeywords());
561                 } else if (otherSource != null) {
562                     shortName = otherSource.transform(base);
563                 } else {
564                     return null;
565                 }
566                 if (shortName == null) {
567                     return null;
568                 }
569             }
570 
571             boolean hackBlond = EmojiConstants.HAIR_EXPLICIT.contains(base.codePointAt(0));
572             Collection<String> arguments = new ArrayList<>();
573             int lastSkin = -1;
574 
575             for (int mod : CharSequences.codePoints(rem)) {
576                 if (ignore.contains(mod)) {
577                     continue;
578                 }
579                 if (EmojiConstants.MODIFIERS.contains(mod)) {
580                     if (lastSkin == mod) {
581                         continue;
582                     }
583                     lastSkin =
584                             mod; // collapse skin tones. TODO fix if we ever do multi-skin families
585                 }
586                 Annotations stock = baseData.get(mod);
587                 String modName = null;
588                 if (stock != null) {
589                     modName = stock.getShortName();
590                 } else if (otherSource != null) {
591                     modName = otherSource.transform(base);
592                 }
593                 if (modName == null) {
594                     needMarker = true;
595                     if (ENGLISH_DATA != null) {
596                         Annotations engName = ENGLISH_DATA.baseData.get(mod);
597                         if (engName != null) {
598                             modName = engName.getShortName();
599                         }
600                     }
601                     if (modName == null) {
602                         modName = Utility.hex(mod); // ultimate fallback
603                     }
604                 }
605                 if (hackBlond && shortName != null) {
606                     // HACK: make the blond names look like the other hair names
607                     // Split the short name into pieces, if possible, and insert the modName first
608                     String sep = initialPattern.format("", "");
609                     int splitPoint = shortName.indexOf(sep);
610                     if (splitPoint >= 0) {
611                         String modName0 = shortName.substring(splitPoint + sep.length());
612                         shortName = shortName.substring(0, splitPoint);
613                         if (modName != null) {
614                             arguments.add(modName);
615                             annotations.add(modName);
616                         }
617                         modName = modName0;
618                     }
619                     hackBlond = false;
620                 }
621 
622                 if (modName != null) {
623                     arguments.add(modName);
624                     annotations.add(modName);
625                 }
626             }
627             if (!arguments.isEmpty()) {
628                 shortName = pattern.format(shortName, listPattern.format(arguments));
629             }
630             Annotations result =
631                     new Annotations(annotations, (needMarker ? ENGLISH_MARKER : "") + shortName);
632             return result;
633         }
634 
635         /**
636          * @deprecated Use {@link #toString(String,boolean,AnnotationSet)} instead
637          */
638         @Deprecated
toString(String code, boolean html)639         public String toString(String code, boolean html) {
640             return toString(code, html, null);
641         }
642 
toString(String code, boolean html, AnnotationSet parentAnnotations)643         public String toString(String code, boolean html, AnnotationSet parentAnnotations) {
644             if (locale.equals("be") && code.equals("��")) {
645                 int debug = 0;
646             }
647             String shortName = getShortName(code);
648             if (shortName == null
649                     || shortName.startsWith(BAD_MARKER)
650                     || shortName.startsWith(ENGLISH_MARKER)) {
651                 return MISSING_MARKER;
652             }
653 
654             String parentShortName =
655                     parentAnnotations == null ? null : parentAnnotations.getShortName(code);
656             if (shortName != null && Objects.equal(shortName, parentShortName)) {
657                 shortName = EQUIVALENT;
658             }
659 
660             Set<String> keywords = getKeywordsMinus(code);
661             Set<String> parentKeywords =
662                     parentAnnotations == null ? null : parentAnnotations.getKeywordsMinus(code);
663             if (keywords != null
664                     && !keywords.isEmpty()
665                     && Objects.equal(keywords, parentKeywords)) {
666                 keywords = Collections.singleton(EQUIVALENT);
667             }
668 
669             String result = Joiner.on(" |\u00a0").join(keywords);
670             if (shortName != null) {
671                 String ttsString = (html ? "*<b>" : "*") + shortName + (html ? "</b>" : "*");
672                 if (result.isEmpty()) {
673                     result = ttsString;
674                 } else {
675                     result = ttsString + (html ? "<br>|\u00a0" : " |\u00a0") + result;
676                 }
677             }
678             return result;
679         }
680 
getExplicitValues()681         public UnicodeMap<Annotations> getExplicitValues() {
682             return baseData;
683         }
684 
getUnresolvedExplicitValues()685         public UnicodeMap<Annotations> getUnresolvedExplicitValues() {
686             return unresolvedData;
687         }
688 
getKeywordsMinus(String code)689         public Set<String> getKeywordsMinus(String code) {
690             String shortName = getShortName(code);
691             Set<String> keywords = getKeywords(code);
692             if (shortName != null && keywords.contains(shortName)) {
693                 keywords = new LinkedHashSet<>(keywords);
694                 keywords.remove(shortName);
695             }
696             return keywords;
697         }
698     }
699 
getDataSet(String locale)700     public static AnnotationSet getDataSet(String locale) {
701         final File theDir = getDirForLocale(locale);
702         return getDataSet(theDir.getAbsolutePath(), locale);
703     }
704 
getDirForLocale(String locale)705     private static File getDirForLocale(String locale) {
706         // use the annotations Factory to find the XML file
707         List<File> dirs = ANNOTATIONS_FACTORY.getSourceDirectoriesForLocale(locale);
708         if (dirs == null || dirs.isEmpty()) {
709             throw new IllegalArgumentException(
710                     "Cannot find source annotation directory for locale " + locale);
711         } else if (dirs.size() != 1) {
712             throw new IllegalArgumentException(
713                     "Did not find exactly one source directory for locale "
714                             + locale
715                             + " - "
716                             + dirs);
717         }
718         final File theDir = dirs.get(0);
719         return theDir;
720     }
721 
getDataSet(String dir, String locale)722     public static AnnotationSet getDataSet(String dir, String locale) {
723         Map<String, AnnotationSet> dirCache = cache.get(dir);
724         if (dirCache == null) {
725             cache.put(dir, dirCache = new ConcurrentHashMap<>());
726         }
727         AnnotationSet result = dirCache.get(locale);
728         if (result != null) {
729             return result;
730         }
731         if (!LOCALES.contains(locale)) {
732             return null;
733         }
734         String parentString = LocaleIDParser.getParent(locale);
735         AnnotationSet parentData = null;
736         if (parentString != null && !parentString.equals("root")) {
737             parentData = getDataSet(dir, parentString);
738         }
739         MyHandler myHandler = new MyHandler(dirCache, locale, parentData);
740         XMLFileReader xfr = new XMLFileReader().setHandler(myHandler);
741         xfr.read(dir + "/" + locale + ".xml", -1, true);
742         return myHandler.cleanup();
743     }
744 
getData(String locale)745     public static UnicodeMap<Annotations> getData(String locale) {
746         final File theDir = getDirForLocale(locale);
747         return getData(theDir.getAbsolutePath(), locale);
748     }
749 
getData(String dir, String locale)750     public static UnicodeMap<Annotations> getData(String dir, String locale) {
751         AnnotationSet result = getDataSet(dir, locale);
752         return result == null ? null : result.baseData;
753     }
754 
755     @Override
toString()756     public String toString() {
757         return toString(false);
758     }
759 
toString(boolean html)760     public String toString(boolean html) {
761         Set<String> annotations2 = getKeywords();
762         if (getShortName() != null && annotations2.contains(getShortName())) {
763             annotations2 = new LinkedHashSet<>(getKeywords());
764             annotations2.remove(getShortName());
765         }
766         String result = Joiner.on(" |\u00a0").join(annotations2);
767         if (getShortName() != null) {
768             String ttsString = (html ? "*<b>" : "*") + getShortName() + (html ? "</b>" : "*");
769             if (result.isEmpty()) {
770                 result = ttsString;
771             } else {
772                 result = ttsString + (html ? "<br>|\u00a0" : " |\u00a0") + result;
773             }
774         }
775         return result;
776     }
777 
778     /**
779      * @return the annotations
780      */
getKeywords()781     public Set<String> getKeywords() {
782         return annotations;
783     }
784 
785     /**
786      * @return the tts
787      */
getShortName()788     public String getShortName() {
789         return tts;
790     }
791 
main(String[] args)792     public static void main(String[] args) {
793         if (true) {
794             writeList();
795         } else {
796             writeEnglish();
797         }
798     }
799 
writeList()800     private static void writeList() {
801         AnnotationSet eng = Annotations.getDataSet("en");
802         Annotations an = eng.baseData.get("❤");
803         final UnicodeMap<Annotations> map = eng.getUnresolvedExplicitValues();
804         Set<String> keys = new TreeSet<>(ChartAnnotations.RBC);
805         map.keySet().addAllTo(keys);
806         //        keys.add("����‍⚖");
807         for (String key : keys) {
808             System.out.println(
809                     Utility.hex(key, 4, "_").toLowerCase(Locale.ROOT)
810                             + "\t"
811                             + key
812                             + "\t"
813                             + map.get(key).getShortName()
814                             + "\t"
815                             + Joiner.on(" | ").join(map.get(key).getKeywords()));
816         }
817         for (String s :
818                 Arrays.asList(
819                         "��",
820                         "��‍❤️‍��‍��",
821                         "��",
822                         "��‍❤️‍��",
823                         "��",
824                         "��‍��‍��",
825                         "����",
826                         "����",
827                         "��‍⚖",
828                         "����‍⚖",
829                         "��‍⚖",
830                         "����‍⚖",
831                         "��",
832                         "��‍♂️",
833                         "����‍♂️",
834                         "��‍♀️",
835                         "����‍♀️",
836                         "��",
837                         "����",
838                         "��‍♂️",
839                         "����‍♂️",
840                         "��‍♀️",
841                         "����‍♀️")) {
842             final String shortName = eng.getShortName(s);
843             final Set<String> keywords = eng.getKeywords(s);
844             System.out.println(
845                     "{\""
846                             + s
847                             + "\",\""
848                             + shortName
849                             + "\",\""
850                             + Joiner.on("|").join(keywords)
851                             + "\"},");
852         }
853     }
854 
writeEnglish()855     private static void writeEnglish() {
856         AnnotationSet eng = Annotations.getDataSet("en");
857         System.out.println(Annotations.getAvailable());
858         AnnotationSet eng100 = Annotations.getDataSet("en_001");
859         UnicodeMap<Annotations> map100 = eng100.getUnresolvedExplicitValues();
860         final UnicodeMap<Annotations> map = eng.getUnresolvedExplicitValues();
861         Set<String> keys = new TreeSet<>(ChartAnnotations.RBC);
862         map.keySet().addAllTo(keys);
863         for (String key : keys) {
864             Annotations value = map.get(key);
865             Annotations value100 = map100.get(key);
866             Set<String> keywords100 = (value100 == null ? null : value100.getKeywords());
867             System.out.println(
868                     key
869                             + "\tname\t"
870                             + "\t"
871                             + value.getShortName()
872                             + "\t"
873                             + (value100 == null ? "" : value100.getShortName())
874                             + "\t"
875                             + Joiner.on(" | ").join(value.getKeywords())
876                             + "\t"
877                             + (keywords100 == null ? "" : Joiner.on(" | ").join(keywords100)));
878         }
879     }
880 }
881