• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.util;
2 
3 import com.google.common.base.Splitter;
4 import com.google.common.collect.ImmutableMap;
5 import com.google.common.collect.ImmutableSet;
6 import com.google.common.collect.Multimap;
7 import com.google.common.collect.Multimaps;
8 import com.google.common.collect.Ordering;
9 import com.google.common.collect.TreeMultimap;
10 import com.ibm.icu.impl.UnicodeMap;
11 import com.ibm.icu.impl.Utility;
12 import com.ibm.icu.lang.CharSequences;
13 import com.ibm.icu.text.Collator;
14 import com.ibm.icu.text.Transliterator;
15 import com.ibm.icu.text.UTF16;
16 import com.ibm.icu.text.UnicodeSet;
17 import com.ibm.icu.util.ICUException;
18 import java.util.ArrayList;
19 import java.util.Collection;
20 import java.util.HashMap;
21 import java.util.Iterator;
22 import java.util.LinkedHashMap;
23 import java.util.LinkedHashSet;
24 import java.util.List;
25 import java.util.Map;
26 import java.util.Map.Entry;
27 import java.util.Set;
28 import java.util.TreeMap;
29 import java.util.TreeSet;
30 import java.util.regex.Matcher;
31 import java.util.regex.Pattern;
32 import org.unicode.cldr.draft.FileUtilities;
33 import org.unicode.cldr.util.PathHeader.PageId;
34 
35 public class Emoji {
36     public static final Collator COLLATOR = CLDRConfig.getInstance().getCollator();
37     public static final String EMOJI_VARIANT = "\uFE0F";
38     public static final char JOINER = '\u200D';
39     public static final String JOINER_STR = "\u200D";
40 
41     public static final String FEMALE = "\u2640";
42     public static final String MALE = "\u2642";
43     public static final String TRANSGENDER = "\u26A7";
44 
45     public static final String COMBINING_ENCLOSING_KEYCAP = "\u20E3";
46     public static final String ZWJ = "\u200D";
47     public static final UnicodeSet REGIONAL_INDICATORS = new UnicodeSet(0x1F1E6, 0x1F1FF).freeze();
48     public static final UnicodeSet SKIN_MODIFIERS = new UnicodeSet("[��-��]").freeze();
49     public static final UnicodeSet HAIR_MODIFIERS = new UnicodeSet("[��������]").freeze();
50     public static final UnicodeSet TAGS = new UnicodeSet(0xE0000, 0xE007F).freeze();
51     public static final UnicodeSet FAMILY = new UnicodeSet("[\u200D ��-�� �� ❤]").freeze();
52     public static final UnicodeSet GENDER = new UnicodeSet().add(0x2640).add(0x2642).freeze();
53     public static final UnicodeSet SPECIALS =
54             new UnicodeSet(
55                             "["
56                                     + "{��‍⬛}{��‍❄}{��‍��}{��‍��}{��‍��}{��‍��}{��‍��‍��}{��‍��} {��‍��} {��‍☠} {��‍��} {��‍��} {��‍��} {��‍��} {��‍��} {��‍��} {��‍��}"
57                                     + "{��‍⚧}{��‍⚕}{��‍⚖}{��‍✈}{��‍��}{��‍��}{��‍��}{��‍��}{��‍��}{��‍��}{��‍��}{��‍��}{��‍��}{��‍��}{��‍��}{��‍��}{��‍��}{��‍��}{��‍��}{��‍��}"
58                                     + "{❤‍��}, {❤‍��}, {��‍��}, {��‍��}" // #E13.1
59                                     + "]")
60                     .freeze();
61     // May have to add from above, if there is a failure in testAnnotationPaths. Failure will be
62     // like:
63     // got java.util.TreeSet<[//ldml/annotations/annotation[@cp="��‍⚧"][@type="tts"],
64     // //ldml/annotations/annotation[@cp="��‍⚕"][@type="tts"], ...
65     // just extract the items in "...", and change into {...} for adding above.
66     // Example: //ldml/annotations/annotation[@cp="��‍⚕"] ==> {��‍⚕}
67     public static final UnicodeSet MAN_WOMAN = new UnicodeSet("[�� ��]").freeze();
68     public static final UnicodeSet OBJECT =
69             new UnicodeSet("[�� �� �� �� �� �� �� �� ✈ �� �� �� �� �� �� ⚖ ⚕]").freeze();
70 
71     static final UnicodeMap<String> emojiToMajorCategory = new UnicodeMap<>();
72     static final UnicodeMap<String> emojiToMinorCategory = new UnicodeMap<>();
73     static final UnicodeMap<String> toName = new UnicodeMap<>();
74 
75     static final UnicodeSet NEUTRAL =
76             new UnicodeSet(
77                             "[⛷⛹��-������-��������-����������������������������-������������-����������������-��������-��]")
78                     .freeze();
79     public static final String ZWJ_HANDSHAKE_ZWJ = JOINER_STR + UTF16.valueOf(0x1F91D) + JOINER_STR;
80     public static final String ZWJ_HEART_ZWJ = JOINER_STR + UTF16.valueOf(0x2764) + JOINER_STR;
81     public static final UnicodeSet FULL_ZWJ_GENDER_MARKERS =
82             new UnicodeSet()
83                     .add(JOINER + FEMALE)
84                     .add(JOINER + MALE)
85                     .add(JOINER + FEMALE + EMOJI_VARIANT)
86                     .add(JOINER + MALE + EMOJI_VARIANT)
87                     .freeze();
88 
89     static final Transliterator NEUTER;
90 
91     static {
92         final UnicodeMap<String> TO_NEUTRAL =
93                 new UnicodeMap<String>()
94                         .put("��", "��")
95                         .put("��", "��")
96                         .put("��", "��")
97                         .put("��", "��")
98                         .put("��", "��")
99                         .put("��", "��")
100                         .put("��", "��\u200D��")
101                         .put("��", "��\u200D��")
102                         .put("��", "��\u200D��")
103                         .put("��", "��\u200D��")
104                         .put("��", "��\u200D��")
105                         .put("��", "��\u200D��")
106                         .put("��", "��" + ZWJ_HANDSHAKE_ZWJ + "��")
107                         .put("��", "��" + ZWJ_HANDSHAKE_ZWJ + "��")
108                         .put("��", "��" + ZWJ_HANDSHAKE_ZWJ + "��")
109                         .put(JOINER + FEMALE + EMOJI_VARIANT, "")
110                         .put(JOINER + MALE + EMOJI_VARIANT, "")
111                         .put(JOINER + FEMALE, "")
112                         .put(JOINER + MALE, "")
113                         .freeze();
114         Map<String, String> results =
115                 new TreeMap(Ordering.from(SupplementalDataInfo.LENGTH_FIRST).reversed());
116         for (Entry<String, String> entry : TO_NEUTRAL.entrySet()) {
entry.getKey()117             results.put(entry.getKey(), entry.getValue());
118         }
119         StringBuilder sb = new StringBuilder();
120         for (Entry<String, String> entry : results.entrySet()) {
121             sb.append(entry.getKey()).append('→').append(entry.getValue()).append(";\n");
122         }
123         NEUTER = Transliterator.createFromRules("foo", sb.toString(), Transliterator.FORWARD);
124     }
125 
126     static {
127         emojiToMajorCategory.setErrorOnReset(true);
128         emojiToMinorCategory.setErrorOnReset(true);
129         toName.setErrorOnReset(true);
130     }
131     /**
132      * A mapping from a majorCategory to a unique ordering number, based on the first time it is
133      * encountered.
134      */
135     static final Map<String, Long> majorToOrder = new HashMap<>();
136     /**
137      * A mapping from a minorCategory to a unique ordering number, based on the first time it is
138      * encountered.
139      */
140     static final Map<String, Long> minorToOrder = new HashMap<>();
141 
142     static final Map<String, Long> emojiToOrder = new LinkedHashMap<>();
143     static final UnicodeSet nonConstructed = new UnicodeSet();
144     static final UnicodeSet allRgi = new UnicodeSet();
145     static final UnicodeSet allRgiNoES = new UnicodeSet();
146 
147     static final UnicodeMap<String> restoreVariants = new UnicodeMap<>();
148     static final Set<Set<String>> genderSets;
149     // ߘ€ E1.0 grinning face
150     static {
151         /*
152          * Example from emoji-test.txt:
153          *   # group: Smileys & Emotion
154          *   # subgroup: face-smiling
155          *   1F600 ; fully-qualified # �� grinning face
156          */
157         Splitter semi = Splitter.on(';').trimResults();
158         String majorCategory = null;
159         String minorCategory = null;
160         final Matcher commentMatcher =
161                 Pattern.compile("\\s*[\\S]+\\s+(?:E\\d*.\\d+\\s+)(.*)").matcher("");
162 
163         Map<String, String> neutralAndGenderedToNeutral = new TreeMap<>();
164         for (String line : FileUtilities.in(Emoji.class, "data/emoji/emoji-test.txt")) {
165             if (line.startsWith("#")) {
166                 line = line.substring(1).trim();
167                 if (line.startsWith("group:")) {
168                     majorCategory = line.substring("group:".length()).trim();
majorToOrder.computeIfAbsent(majorCategory, k -> (long) majorToOrder.size())169                     majorToOrder.computeIfAbsent(majorCategory, k -> (long) majorToOrder.size());
170                 } else if (line.startsWith("subgroup:")) {
171                     minorCategory = line.substring("subgroup:".length()).trim();
minorToOrder.computeIfAbsent(minorCategory, k -> (long) minorToOrder.size())172                     minorToOrder.computeIfAbsent(minorCategory, k -> (long) minorToOrder.size());
173                 }
174                 continue;
175             }
176             line = line.trim();
177             if (line.isEmpty()) {
178                 continue;
179             }
180             Iterator<String> it = semi.split(line).iterator();
181 
182             String emojiHex = it.next();
183             String original = Utility.fromHex(emojiHex, 4, " ");
184             String typeRaw = it.next();
185             // fully-qualified     # #️⃣ E0.6 keycap: #
186             int hashPos = typeRaw.indexOf('#');
187             if (hashPos < 0) {
188                 throw new IllegalArgumentException("unexpected comment format: " + typeRaw);
189             }
190             String type = typeRaw.substring(0, hashPos).trim();
191             if (type.startsWith("fully-qualified")) {
192                 if (original.contains("♂")) {
193                     int debug = 0;
194                 }
195                 allRgi.add(original);
196                 final String variantsRemoved = removeVariants(original);
197                 allRgiNoES.add(variantsRemoved);
198                 if (!original.equals(variantsRemoved)) {
restoreVariants.put(variantsRemoved, original)199                     restoreVariants.put(variantsRemoved, original);
200                 }
201                 if (!SKIN_MODIFIERS.containsSome(original)) {
202                     String neutral = NEUTER.transform(original);
203                     if (!neutral.equals(original)) {
neutralAndGenderedToNeutral.put(original, neutral)204                         neutralAndGenderedToNeutral.put(original, neutral);
neutralAndGenderedToNeutral.put(neutral, neutral)205                         neutralAndGenderedToNeutral.put(neutral, neutral);
206                     }
207                 }
208             }
emojiToMajorCategory.put(original, majorCategory)209             emojiToMajorCategory.put(original, majorCategory);
emojiToMinorCategory.put(original, minorCategory)210             emojiToMinorCategory.put(original, minorCategory);
211             String comment = typeRaw.substring(hashPos + 1);
212             if (!commentMatcher.reset(comment).matches()) {
213                 throw new IllegalArgumentException("unexpected comment format");
214             }
215             String name = commentMatcher.group(1);
216             // The comment is now of the form:  # �� E0.6 beaming face with smiling eyes
217             // int spacePos = comment.indexOf(' ');
218             // The format changed in v15.1, so there is no version number.
219             // Thus the following is commented out:
220             // spacePos = comment.indexOf(' ', spacePos + 1); // get second space
221             // String name = comment.substring(spacePos + 1).trim();
222 
toName.put(original, name)223             toName.put(original, name);
224 
225             // add all the non-constructed values to a set for annotations
226 
227             String minimal = original.replace(EMOJI_VARIANT, "");
228 
229             // Add the order. If it is not minimal, add that also.
230             if (!emojiToOrder.containsKey(original)) {
putUnique(emojiToOrder, original, emojiToOrder.size() * 100L)231                 putUnique(emojiToOrder, original, emojiToOrder.size() * 100L);
232             }
233             if (!emojiToOrder.containsKey(minimal)) {
putUnique(emojiToOrder, minimal, emojiToOrder.size() * 100L)234                 putUnique(emojiToOrder, minimal, emojiToOrder.size() * 100L);
235             }
236 
237             boolean singleton = CharSequences.getSingleCodePoint(minimal) != Integer.MAX_VALUE;
238 
239             // skip constructed values
240             if (minimal.contains(COMBINING_ENCLOSING_KEYCAP)
241                     || REGIONAL_INDICATORS.containsSome(minimal)
242                     || TAGS.containsSome(minimal)
243                     || !singleton && SKIN_MODIFIERS.containsSome(minimal)
244                     || !singleton && FAMILY.containsAll(minimal)) {
245                 // do nothing
246             } else if (minimal.contains(ZWJ)) { // only do certain ZWJ sequences
247                 if (SPECIALS.contains(minimal)
248                         || GENDER.containsSome(minimal)
249                         || MAN_WOMAN.contains(minimal.codePointAt(0))
250                                 && OBJECT.contains(minimal.codePointBefore(minimal.length()))) {
251                     nonConstructed.add(minimal);
252                 }
253             } else if (!minimal.contains("��")) {
254                 nonConstructed.add(minimal);
255             }
256         }
emojiToMajorCategory.freeze()257         emojiToMajorCategory.freeze();
emojiToMinorCategory.freeze()258         emojiToMinorCategory.freeze();
259         nonConstructed.add(SKIN_MODIFIERS); // needed for names
nonConstructed.freeze()260         nonConstructed.freeze();
toName.freeze()261         toName.freeze();
allRgi.freeze()262         allRgi.freeze();
263         allRgiNoES.addAll(SKIN_MODIFIERS).addAll(HAIR_MODIFIERS).freeze();
264         // hack
265         for (String s :
266                 new UnicodeSet(
267                         "[#*0-9©®‼⁉™ℹ↔-↙↩↪⌨⏏⏭-⏯ ⏱⏲⏸-⏺Ⓜ▪▫▶◀◻◼☀-☄☎☑☘☝☠☢ ☣☦☪☮☯☸-☺♀♂♟♠♣♥♦♨♻♾⚒⚔-⚗ ⚙⚛⚜⚠⚧⚰⚱⛈⛏⛑⛓⛩⛰⛱⛴⛷-⛹✂"
268                                 + "✈✉ ✌✍✏✒✔✖✝✡✳✴❄❇❣❤➡⤴⤵⬅-⬇〰 〽㊗㊙����������������-������������-���� ����-����-����������������"
269                                 + "��������-���� ��-����������������-����-����-�������� ����������-����-��������]")) {
restoreVariants.put(s, s + Emoji.EMOJI_VARIANT)270             restoreVariants.put(s, s + Emoji.EMOJI_VARIANT);
271         }
restoreVariants.freeze()272         restoreVariants.freeze();
273         Multimap<String, String> neutralToOthers = TreeMultimap.create(COLLATOR, COLLATOR);
Multimaps.forMap(neutralAndGenderedToNeutral)274         Multimaps.invertFrom(Multimaps.forMap(neutralAndGenderedToNeutral), neutralToOthers);
275         Set<Set<String>> toGenderGroup = new LinkedHashSet<>();
276         for (Collection<String> set : neutralToOthers.asMap().values()) {
277             TreeSet<String> s = new TreeSet<>(COLLATOR);
278             s.addAll(set);
ImmutableSet.copyOf(s)279             toGenderGroup.add(ImmutableSet.copyOf(s));
280         }
281         genderSets = CldrUtility.protectCollection(toGenderGroup);
282     }
283 
removeVariants(String original)284     public static String removeVariants(String original) {
285         return original.replace(Emoji.EMOJI_VARIANT, "");
286     }
287 
getGenderGroups()288     public static Set<Set<String>> getGenderGroups() {
289         return genderSets;
290     }
291 
restoreVariants(String source)292     public static final String restoreVariants(String source) {
293         String restored = restoreVariants.get(source);
294         if (restored != null) {
295             int debug = 0;
296         }
297         return restored == null ? source : restored;
298     }
299 
putUnique(Map<K, V> map, K key, V value)300     private static <K, V> void putUnique(Map<K, V> map, K key, V value) {
301         V oldValue = map.put(key, value);
302         if (oldValue != null) {
303             throw new ICUException(
304                     "Attempt to change value of "
305                             + map
306                             + " for "
307                             + key
308                             + " from "
309                             + oldValue
310                             + " to "
311                             + value);
312         }
313     }
314 
getAllRgi()315     public static UnicodeSet getAllRgi() {
316         return allRgi;
317     }
318 
getAllRgiNoES()319     public static UnicodeSet getAllRgiNoES() {
320         return allRgiNoES;
321     }
322 
323     public static final UnicodeMap<String> EXTRA_SYMBOL_MINOR_CATEGORIES = new UnicodeMap<>();
324     public static final Map<String, Long> EXTRA_SYMBOL_ORDER;
325     private static final boolean DEBUG = false;
326 
327     static {
328         String[][] data = {
329             {"arrow", "→ ↓ ↑ ← ↔ ↕ ⇆ ⇅"},
330             {"alphanum", "© ® ℗ ™ µ"},
331             {"geometric", "▼ ▶ ▲ ◀ ● ○ ◯ ◊"},
332             {"math", "× ÷ √ ∞ ∆ ∇ ⁻ ¹ ² ³ ≡ ∈ ⊂ ∩ ∪ ° + ± − = ≈ ≠ > < ≤ ≥ ¬ | ~"},
333             {
334                 "punctuation",
335                 "§ † ‡ \\u0020  , 、 ، ; : ؛ ! ¡ ? ¿ ؟ ¶ ※ / \\ & # % ‰ ′ ″ ‴ @ * ♪ ♭ ♯ ` ´ ^ ¨ ‐ ― _ - – — • · . … 。 ‧ ・ ‘ ’ ‚ ' “ ” „ » « ( ) [ ] { } 〔 〕 〈 〉 《 》 「 」 『 』 〖 〗 【 】"
336             },
337             {"currency", "€ £ ¥ ₹ ₽ $ ¢ ฿ ₪ ₺ ₫ ₱ ₩ ₡ ₦ ₮ ৳ ₴ ₸ ₲ ₵ ៛ ₭ ֏ ₥ ₾ ₼ ₿ ؋ ₧ ¤"},
338             {
339                 "other-symbol",
340                 "‾‽‸⁂↚↛↮↙↜↝↞↟↠↡↢↣↤↥↦↧↨↫↬↭↯↰↱↲↳↴↵↶↷↸↹↺↻↼↽↾↿⇀⇁⇂⇃⇄⇇⇈⇉⇊⇋⇌⇐⇍⇑⇒⇏⇓⇔⇎⇖⇗⇘⇙⇚⇛⇜⇝⇞⇟⇠⇡⇢⇣⇤⇥⇦⇧⇨⇩⇪⇵∀∂∃∅∉∋∎∏∑≮≯∓∕⁄∗∘∙∝∟∠∣∥∧∫∬∮∴∵∶∷∼∽∾≃≅≌≒≖≣≦≧≪≫≬≳≺≻⊁⊃⊆⊇⊕⊖⊗⊘⊙⊚⊛⊞⊟⊥⊮⊰⊱⋭⊶⊹⊿⋁⋂⋃⋅⋆⋈⋒⋘⋙⋮⋯⋰⋱■□▢▣▤▥▦▧▨▩▬▭▮▰△▴▵▷▸▹►▻▽▾▿◁◂◃◄◅◆◇◈◉◌◍◎◐◑◒◓◔◕◖◗◘◙◜◝◞◟◠◡◢◣◤◥◦◳◷◻◽◿⨧⨯⨼⩣⩽⪍⪚⪺₢₣₤₰₳₶₷₨﷼"
341             },
342         };
343         // get the maximum suborder for each subcategory
344         Map<String, Long> subcategoryToMaxSuborder = new HashMap<>();
345         for (String[] row : data) {
346             final String subcategory = row[0];
347             for (Entry<String, String> entry : emojiToMinorCategory.entrySet()) {
348                 if (entry.getValue().equals(subcategory)) {
349                     String emoji = entry.getKey();
350                     Long order = emojiToOrder.get(emoji);
351                     Long currentMax = subcategoryToMaxSuborder.get(subcategory);
352                     if (currentMax == null || currentMax < order) {
subcategoryToMaxSuborder.put(subcategory, order)353                         subcategoryToMaxSuborder.put(subcategory, order);
354                     }
355                 }
356             }
357         }
358         if (DEBUG) System.out.println(subcategoryToMaxSuborder);
359         Map<String, Long> _EXTRA_SYMBOL_ORDER = new LinkedHashMap<>();
360         for (String[] row : data) {
361             final String subcategory = row[0];
362             final String characters = row[1];
363 
364             List<String> items = new ArrayList<>();
365             for (int cp : With.codePointArray(characters)) {
366                 if (cp != ' ') {
With.fromCodePoint(cp)367                     items.add(With.fromCodePoint(cp));
368                 }
369             }
370             final UnicodeSet uset = new UnicodeSet().addAll(items);
371             if (uset.containsSome(EXTRA_SYMBOL_MINOR_CATEGORIES.keySet())) {
372                 throw new IllegalArgumentException(
373                         "Duplicate values in " + EXTRA_SYMBOL_MINOR_CATEGORIES);
374             }
EXTRA_SYMBOL_MINOR_CATEGORIES.putAll(uset, subcategory)375             EXTRA_SYMBOL_MINOR_CATEGORIES.putAll(uset, subcategory);
376             long count = subcategoryToMaxSuborder.get(subcategory);
377             for (String s : items) {
378                 ++count;
_EXTRA_SYMBOL_ORDER.put(s, count)379                 _EXTRA_SYMBOL_ORDER.put(s, count);
380             }
subcategoryToMaxSuborder.put(subcategory, count)381             subcategoryToMaxSuborder.put(subcategory, count);
382         }
383         if (DEBUG) System.out.println(_EXTRA_SYMBOL_ORDER);
EXTRA_SYMBOL_MINOR_CATEGORIES.freeze()384         EXTRA_SYMBOL_MINOR_CATEGORIES.freeze();
385         EXTRA_SYMBOL_ORDER = ImmutableMap.copyOf(_EXTRA_SYMBOL_ORDER);
386     }
387 
getMinorCategory(String emoji)388     public static String getMinorCategory(String emoji) {
389         String minorCat = emojiToMinorCategory.get(emoji);
390         if (minorCat == null) {
391             minorCat = EXTRA_SYMBOL_MINOR_CATEGORIES.get(emoji);
392             if (minorCat == null) {
393                 throw new InternalCldrException(
394                         "No minor category (aka subgroup) found for "
395                                 + emoji
396                                 + ". Update emoji-test.txt to latest, and adjust PathHeader.. functionMap.put(\"minor\", ...");
397             }
398         }
399         return minorCat;
400     }
401 
getName(String emoji)402     public static String getName(String emoji) {
403         return toName.get(emoji);
404     }
405 
getEmojiToOrder(String emoji)406     public static long getEmojiToOrder(String emoji) {
407         Long result = emojiToOrder.get(emoji);
408         if (result == null) {
409             result = EXTRA_SYMBOL_ORDER.get(emoji);
410             if (result == null) {
411                 throw new InternalCldrException(
412                         "No Order found for "
413                                 + emoji
414                                 + ". Update emoji-test.txt to latest, and adjust PathHeader.. functionMap.put(\"minor\", ...");
415             }
416         }
417         return result;
418     }
419 
getEmojiMinorOrder(String minor)420     public static long getEmojiMinorOrder(String minor) {
421         Long result = minorToOrder.get(minor);
422         if (result == null) {
423             throw new InternalCldrException(
424                     "No minor category (aka subgroup) found for "
425                             + minor
426                             + ". Update emoji-test.txt to latest, and adjust PathHeader.. functionMap.put(\"minor\", ...");
427         }
428         return result;
429     }
430 
getMajorCategory(String emoji)431     public static String getMajorCategory(String emoji) {
432         String majorCat = emojiToMajorCategory.get(emoji);
433         if (majorCat == null) {
434             if (EXTRA_SYMBOL_MINOR_CATEGORIES.containsKey(emoji)) {
435                 majorCat = "Symbols";
436             } else {
437                 throw new InternalCldrException(
438                         "No minor category (aka subgroup) found for "
439                                 + emoji
440                                 + ". Update emoji-test.txt to latest, and adjust PathHeader.. functionMap.put(\"major\", ...");
441             }
442         }
443         return majorCat;
444     }
445 
getMinorCategoriesWithExtras()446     public static Set<String> getMinorCategoriesWithExtras() {
447         Set<String> result = new LinkedHashSet<>(emojiToMinorCategory.values());
448         result.addAll(EXTRA_SYMBOL_MINOR_CATEGORIES.getAvailableValues());
449         return ImmutableSet.copyOf(result);
450     }
451 
getEmojiInMinorCategoriesWithExtras(String minorCategory)452     public static UnicodeSet getEmojiInMinorCategoriesWithExtras(String minorCategory) {
453         return new UnicodeSet(emojiToMinorCategory.getSet(minorCategory))
454                 .addAll(EXTRA_SYMBOL_MINOR_CATEGORIES.getSet(minorCategory))
455                 .freeze();
456     }
457 
getNonConstructed()458     public static UnicodeSet getNonConstructed() {
459         return nonConstructed;
460     }
461 
462     private static Set<String> NAME_PATHS = null;
463     public static final String TYPE_TTS = "[@type=\"tts\"]";
464 
getNamePaths()465     public static synchronized Set<String> getNamePaths() {
466         return NAME_PATHS != null ? NAME_PATHS : (NAME_PATHS = buildPaths(TYPE_TTS));
467     }
468 
buildPaths(String suffix)469     private static ImmutableSet<String> buildPaths(String suffix) {
470         ImmutableSet.Builder<String> builder = ImmutableSet.builder();
471         for (String s : Emoji.getNonConstructed()) {
472             String base = "//ldml/annotations/annotation[@cp=\"" + s + "\"]" + suffix;
473             builder.add(base);
474         }
475         return builder.build();
476     }
477 
478     /**
479      * Return the PageId for the given emoji, making adjustments for pages that are united in
480      * emoji-test.txt but divided in Survey Tool, such as Symbols, Symbols2, and Symbols3
481      *
482      * @param emoji the emoji as a string
483      * @return the adjusted PageId
484      */
getPageId(String emoji)485     public static PageId getPageId(String emoji) {
486         final String major = getMajorCategory(emoji);
487         final String minor = getMinorCategory(emoji);
488         final PageId pageId = PageId.forString(major);
489         final Long minorOrder = minorToOrder.get(minor);
490         switch (pageId) {
491             case Objects:
492                 return (minorOrder < minorToOrder.get("money")) ? PageId.Objects : PageId.Objects2;
493             case People:
494                 return (minorOrder < minorToOrder.get("person-fantasy"))
495                         ? PageId.People
496                         : PageId.People2;
497             case Symbols:
498                 return (minorOrder < minorToOrder.get("transport-sign"))
499                         ? PageId.Symbols
500                         : PageId.EmojiSymbols;
501             case Travel_Places:
502                 return (minorOrder < minorToOrder.get("transport-ground"))
503                         ? PageId.Travel_Places
504                         : PageId.Travel_Places2;
505             default:
506                 return pageId;
507         }
508     }
509 }
510