• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.util;
2 
3 import java.util.HashMap;
4 import java.util.Iterator;
5 import java.util.LinkedHashMap;
6 import java.util.Map;
7 import java.util.Set;
8 
9 import org.unicode.cldr.draft.FileUtilities;
10 
11 import com.google.common.base.CharMatcher;
12 import com.google.common.base.Splitter;
13 import com.google.common.collect.ImmutableSet;
14 import com.ibm.icu.dev.util.UnicodeMap;
15 import com.ibm.icu.impl.Utility;
16 import com.ibm.icu.lang.CharSequences;
17 import com.ibm.icu.text.UnicodeSet;
18 
19 public class Emoji {
20     public static final String EMOJI_VARIANT = "\uFE0F";
21     public static final String COMBINING_ENCLOSING_KEYCAP = "\u20E3";
22     public static final String ZWJ = "\u200D";
23     public static final UnicodeSet REGIONAL_INDICATORS = new UnicodeSet(0x1F1E6, 0x1F1FF).freeze();
24     public static final UnicodeSet MODIFIERS = new UnicodeSet("[��-��]").freeze();
25     public static final UnicodeSet TAGS = new UnicodeSet(0xE0000, 0xE007F).freeze();
26     public static final UnicodeSet FAMILY = new UnicodeSet("[\u200D ��-�� �� ❤]").freeze();
27     public static final UnicodeSet GENDER = new UnicodeSet().add(0x2640).add(0x2642).freeze();
28     public static final UnicodeSet SPECIALS = new UnicodeSet("["
29         + "{��‍⬛}{��‍❄}{��‍��}{��‍��}{��‍��}{��‍��}{��‍��‍��}{��‍��} {��‍��} {��‍☠} {��‍��} {��‍��} {��‍��} {��‍��} {��‍��} {��‍��} {��‍��}"
30         + "{��‍⚧}{��‍⚕}{��‍⚖}{��‍✈}{��‍��}{��‍��}{��‍��}{��‍��}{��‍��}{��‍��}{��‍��}{��‍��}{��‍��}{��‍��}{��‍��}{��‍��}{��‍��}{��‍��}{��‍��}{��‍��}"
31         + "]").freeze();
32     // May have to add from above, if there is a failure in testAnnotationPaths. Failure will be like:
33     // got java.util.TreeSet<[//ldml/annotations/annotation[@cp="��‍⚧"][@type="tts"], //ldml/annotations/annotation[@cp="��‍⚕"][@type="tts"], ...
34     // just extract the items in "...", and change into {...} for adding above.
35     // Example: //ldml/annotations/annotation[@cp="��‍⚕"] ==> {��‍⚕}
36     public static final UnicodeSet MAN_WOMAN = new UnicodeSet("[�� ��]").freeze();
37     public static final UnicodeSet OBJECT = new UnicodeSet("[�� �� �� �� �� �� �� �� ✈ �� �� �� �� �� �� ⚖ ⚕]").freeze();
38 
39     static final UnicodeMap<String> emojiToMajorCategory = new UnicodeMap<>();
40     static final UnicodeMap<String> emojiToMinorCategory = new UnicodeMap<>();
41     static final UnicodeMap<String> toName = new UnicodeMap<>();
42     /**
43      * A mapping from a majorCategory to a unique ordering number, based on the first time it is encountered.
44      */
45     static final Map<String, Integer> majorToOrder = new HashMap<>();
46     /**
47      * A mapping from a minorCategory to a unique ordering number, based on the first time it is encountered.
48      */
49     static final Map<String, Integer> minorToOrder = new HashMap<>();
50     static final Map<String, Integer> emojiToOrder = new LinkedHashMap<>();
51     static final UnicodeSet nonConstructed = new UnicodeSet();
52     static final UnicodeSet allRgi = new UnicodeSet();
53     static final UnicodeSet allRgiNoES = new UnicodeSet();
54 
55     static {
56         /*
57             # group: Smileys & People
58             # subgroup: face-positive
59             1F600 ; fully-qualified     # �� grinning face
60          */
61         Splitter semi = Splitter.on(CharMatcher.anyOf(";#")).trimResults();
62         String majorCategory = null;
63         String minorCategory = null;
64         int majorOrder = 0;
65         int minorOrder = 0;
66         //Multimap<Pair<Integer,Integer>,String> majorPlusMinorToEmoji = TreeMultimap.create();
67         for (String line : FileUtilities.in(Emoji.class, "data/emoji/emoji-test.txt")) {
68             if (line.startsWith("#")) {
69                 line = line.substring(1).trim();
70                 if (line.startsWith("group:")) {
71                     majorCategory = line.substring("group:".length()).trim();
72                     Integer oldMajorOrder = majorToOrder.get(majorCategory);
73                     if (oldMajorOrder == null) {
majorToOrder.put(majorCategory, majorOrder = majorToOrder.size())74                         majorToOrder.put(majorCategory, majorOrder = majorToOrder.size());
75                     } else {
76                         majorOrder = oldMajorOrder;
77                     }
78                 } else if (line.startsWith("subgroup:")) {
79                     minorCategory = line.substring("subgroup:".length()).trim();
80                     Integer oldMinorOrder = minorToOrder.get(minorCategory);
81                     if (oldMinorOrder == null) {
minorToOrder.put(minorCategory, minorOrder = minorToOrder.size())82                         minorToOrder.put(minorCategory, minorOrder = minorToOrder.size());
83                     } else {
84                         minorOrder = oldMinorOrder;
85                     }
86                 }
87                 continue;
88             }
89             line = line.trim();
90             if (line.isEmpty()) {
91                 continue;
92             }
93             Iterator<String> it = semi.split(line).iterator();
94             String emojiHex = it.next();
95             String original = Utility.fromHex(emojiHex, 4, " ");
96             String type = it.next();
97             if (type.startsWith("fully-qualified")) {
98                 allRgi.add(original);
original.replace(Emoji.EMOJI_VARIANT, "")99                 allRgiNoES.add(original.replace(Emoji.EMOJI_VARIANT, ""));
100             }
emojiToMajorCategory.put(original, majorCategory)101             emojiToMajorCategory.put(original, majorCategory);
emojiToMinorCategory.put(original, minorCategory)102             emojiToMinorCategory.put(original, minorCategory);
103             String comment = it.next();
104             int spacePos = comment.indexOf(' ');
105             String name = comment.substring(spacePos+1).trim();
toName.put(original, name)106             toName.put(original, name);
107 
108             // add all the non-constructed values to a set for annotations
109 
110             String minimal = original.replace(EMOJI_VARIANT, "");
111 
112             // Add the order. If it is not minimal, add that also.
113             if (!emojiToOrder.containsKey(original)) {
emojiToOrder.put(original, emojiToOrder.size())114                 emojiToOrder.put(original, emojiToOrder.size());
115             }
116             if (!emojiToOrder.containsKey(minimal)) {
emojiToOrder.put(original, emojiToOrder.size())117                 emojiToOrder.put(original, emojiToOrder.size());
118             }
119             //
120             // majorPlusMinorToEmoji.put(Pair.of(majorOrder, minorOrder), minimal);
121 
122             boolean singleton = CharSequences.getSingleCodePoint(minimal) != Integer.MAX_VALUE;
123 //            if (!emojiToOrder.containsKey(minimal)) {
124 //                emojiToOrder.put(minimal, emojiToOrder.size());
125 //            }
126 
127             // skip constructed values
128             if (minimal.contains(COMBINING_ENCLOSING_KEYCAP)
129                 || REGIONAL_INDICATORS.containsSome(minimal)
130                 || TAGS.containsSome(minimal)
131                 || !singleton && MODIFIERS.containsSome(minimal)
132                 || !singleton && FAMILY.containsAll(minimal)) {
133                 // do nothing
134             } else if (minimal.contains(ZWJ)) { // only do certain ZWJ sequences
135                 if (SPECIALS.contains(minimal)
136                     || GENDER.containsSome(minimal)
137                     || MAN_WOMAN.contains(minimal.codePointAt(0)) && OBJECT.contains(minimal.codePointBefore(minimal.length()))) {
138                     nonConstructed.add(minimal);
139                 }
140             } else if (!minimal.contains("��")) {
141                 nonConstructed.add(minimal);
142             }
143         }
144 //        for (Entry<Pair<Integer,Integer>, String> entry : majorPlusMinorToEmoji.entries()) {
145 //            String minimal = entry.getValue();
146 //            emojiToOrder.put(minimal, emojiToOrder.size());
147 //        }
emojiToMajorCategory.freeze()148         emojiToMajorCategory.freeze();
emojiToMinorCategory.freeze()149         emojiToMinorCategory.freeze();
150         nonConstructed.add(MODIFIERS); // needed for names
nonConstructed.freeze()151         nonConstructed.freeze();
toName.freeze()152         toName.freeze();
allRgi.freeze()153         allRgi.freeze();
allRgiNoES.freeze()154         allRgiNoES.freeze();
155     }
156 
getAllRgi()157     public static UnicodeSet getAllRgi() {
158         return allRgi;
159     }
160 
getAllRgiNoES()161     public static UnicodeSet getAllRgiNoES() {
162         return allRgiNoES;
163     }
164 
getMinorCategory(String emoji)165     public static String getMinorCategory(String emoji) {
166         String minorCat = emojiToMinorCategory.get(emoji);
167         if (minorCat == null) {
168             throw new InternalCldrException("No minor category (aka subgroup) found for " + emoji
169                 + ". Update emoji-test.txt to latest, and adjust PathHeader.. functionMap.put(\"minor\", ...");
170         }
171         return minorCat;
172     }
173 
getName(String emoji)174     public static String getName(String emoji) {
175         return toName.get(emoji);
176     }
177 
178 
179 //    public static int getMinorToOrder(String minor) {
180 //        Integer result = minorToOrder.get(minor);
181 //        return result == null ? Integer.MAX_VALUE : result;
182 //    }
183 
getEmojiToOrder(String emoji)184     public static int getEmojiToOrder(String emoji) {
185         Integer result = emojiToOrder.get(emoji);
186         return result == null ? Integer.MAX_VALUE : result;
187     }
188 
getEmojiMinorOrder(String minor)189     public static int getEmojiMinorOrder(String minor) {
190         Integer result = minorToOrder.get(minor);
191         return result == null ? Integer.MAX_VALUE : result;
192     }
193 
getMajorCategory(String emoji)194     public static String getMajorCategory(String emoji) {
195         String majorCat = emojiToMajorCategory.get(emoji);
196         if (majorCat == null) {
197             throw new InternalCldrException("No minor category (aka subgroup) found for " + emoji
198                 + ". Update emoji-test.txt to latest, and adjust PathHeader.. functionMap.put(\"major\", ...");
199         }
200         return majorCat;
201     }
202 
getMajorCategories()203     public static Set<String> getMajorCategories() {
204         return emojiToMajorCategory.values();
205     }
206 
getMinorCategories()207     public static Set<String> getMinorCategories() {
208         return emojiToMinorCategory.values();
209     }
210 
getNonConstructed()211     public static UnicodeSet getNonConstructed() {
212         return nonConstructed;
213     }
214 
215     private static Set<String> NAME_PATHS = null;
216     private static Set<String> KEYWORD_PATHS = null;
217     public static final String TYPE_TTS = "[@type=\"tts\"]";
218 
getNamePaths()219     public static synchronized Set<String> getNamePaths() {
220         return NAME_PATHS != null ? NAME_PATHS : (NAME_PATHS = buildPaths(TYPE_TTS));
221     }
222 
getKeywordPaths()223     public static synchronized Set<String> getKeywordPaths() {
224         return KEYWORD_PATHS != null ? KEYWORD_PATHS : (KEYWORD_PATHS = buildPaths(""));
225     }
226 
buildPaths(String suffix)227     private static ImmutableSet<String> buildPaths(String suffix) {
228         ImmutableSet.Builder<String> builder = ImmutableSet.builder();
229         for (String s : Emoji.getNonConstructed()) {
230             String base = "//ldml/annotations/annotation[@cp=\"" + s + "\"]" + suffix;
231             builder.add(base);
232         }
233         return builder.build();
234     }
235 }
236