1 package org.unicode.cldr.util; 2 3 import java.util.HashMap; 4 import java.util.Iterator; 5 import java.util.LinkedHashMap; 6 import java.util.Map; 7 import java.util.Set; 8 9 import org.unicode.cldr.draft.FileUtilities; 10 11 import com.google.common.base.CharMatcher; 12 import com.google.common.base.Splitter; 13 import com.google.common.collect.ImmutableSet; 14 import com.ibm.icu.dev.util.UnicodeMap; 15 import com.ibm.icu.impl.Utility; 16 import com.ibm.icu.lang.CharSequences; 17 import com.ibm.icu.text.UnicodeSet; 18 19 public class Emoji { 20 public static final String EMOJI_VARIANT = "\uFE0F"; 21 public static final String COMBINING_ENCLOSING_KEYCAP = "\u20E3"; 22 public static final String ZWJ = "\u200D"; 23 public static final UnicodeSet REGIONAL_INDICATORS = new UnicodeSet(0x1F1E6, 0x1F1FF).freeze(); 24 public static final UnicodeSet MODIFIERS = new UnicodeSet("[-]").freeze(); 25 public static final UnicodeSet TAGS = new UnicodeSet(0xE0000, 0xE007F).freeze(); 26 public static final UnicodeSet FAMILY = new UnicodeSet("[\u200D - ❤]").freeze(); 27 public static final UnicodeSet GENDER = new UnicodeSet().add(0x2640).add(0x2642).freeze(); 28 public static final UnicodeSet SPECIALS = new UnicodeSet("[" 29 + "{⬛}{❄}{}{}{}{}{}{} {} {☠} {} {} {} {} {} {} {}" 30 + "{⚧}{⚕}{⚖}{✈}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}" 31 + "]").freeze(); 32 // May have to add from above, if there is a failure in testAnnotationPaths. Failure will be like: 33 // got java.util.TreeSet<[//ldml/annotations/annotation[@cp="⚧"][@type="tts"], //ldml/annotations/annotation[@cp="⚕"][@type="tts"], ... 34 // just extract the items in "...", and change into {...} for adding above. 35 // Example: //ldml/annotations/annotation[@cp="⚕"] ==> {⚕} 36 public static final UnicodeSet MAN_WOMAN = new UnicodeSet("[ ]").freeze(); 37 public static final UnicodeSet OBJECT = new UnicodeSet("[ ✈ ⚖ ⚕]").freeze(); 38 39 static final UnicodeMap<String> emojiToMajorCategory = new UnicodeMap<>(); 40 static final UnicodeMap<String> emojiToMinorCategory = new UnicodeMap<>(); 41 static final UnicodeMap<String> toName = new UnicodeMap<>(); 42 /** 43 * A mapping from a majorCategory to a unique ordering number, based on the first time it is encountered. 44 */ 45 static final Map<String, Integer> majorToOrder = new HashMap<>(); 46 /** 47 * A mapping from a minorCategory to a unique ordering number, based on the first time it is encountered. 48 */ 49 static final Map<String, Integer> minorToOrder = new HashMap<>(); 50 static final Map<String, Integer> emojiToOrder = new LinkedHashMap<>(); 51 static final UnicodeSet nonConstructed = new UnicodeSet(); 52 static final UnicodeSet allRgi = new UnicodeSet(); 53 static final UnicodeSet allRgiNoES = new UnicodeSet(); 54 55 static { 56 /* 57 # group: Smileys & People 58 # subgroup: face-positive 59 1F600 ; fully-qualified # grinning face 60 */ 61 Splitter semi = Splitter.on(CharMatcher.anyOf(";#")).trimResults(); 62 String majorCategory = null; 63 String minorCategory = null; 64 int majorOrder = 0; 65 int minorOrder = 0; 66 //Multimap<Pair<Integer,Integer>,String> majorPlusMinorToEmoji = TreeMultimap.create(); 67 for (String line : FileUtilities.in(Emoji.class, "data/emoji/emoji-test.txt")) { 68 if (line.startsWith("#")) { 69 line = line.substring(1).trim(); 70 if (line.startsWith("group:")) { 71 majorCategory = line.substring("group:".length()).trim(); 72 Integer oldMajorOrder = majorToOrder.get(majorCategory); 73 if (oldMajorOrder == null) { majorToOrder.put(majorCategory, majorOrder = majorToOrder.size())74 majorToOrder.put(majorCategory, majorOrder = majorToOrder.size()); 75 } else { 76 majorOrder = oldMajorOrder; 77 } 78 } else if (line.startsWith("subgroup:")) { 79 minorCategory = line.substring("subgroup:".length()).trim(); 80 Integer oldMinorOrder = minorToOrder.get(minorCategory); 81 if (oldMinorOrder == null) { minorToOrder.put(minorCategory, minorOrder = minorToOrder.size())82 minorToOrder.put(minorCategory, minorOrder = minorToOrder.size()); 83 } else { 84 minorOrder = oldMinorOrder; 85 } 86 } 87 continue; 88 } 89 line = line.trim(); 90 if (line.isEmpty()) { 91 continue; 92 } 93 Iterator<String> it = semi.split(line).iterator(); 94 String emojiHex = it.next(); 95 String original = Utility.fromHex(emojiHex, 4, " "); 96 String type = it.next(); 97 if (type.startsWith("fully-qualified")) { 98 allRgi.add(original); original.replace(Emoji.EMOJI_VARIANT, "")99 allRgiNoES.add(original.replace(Emoji.EMOJI_VARIANT, "")); 100 } emojiToMajorCategory.put(original, majorCategory)101 emojiToMajorCategory.put(original, majorCategory); emojiToMinorCategory.put(original, minorCategory)102 emojiToMinorCategory.put(original, minorCategory); 103 String comment = it.next(); 104 int spacePos = comment.indexOf(' '); 105 String name = comment.substring(spacePos+1).trim(); toName.put(original, name)106 toName.put(original, name); 107 108 // add all the non-constructed values to a set for annotations 109 110 String minimal = original.replace(EMOJI_VARIANT, ""); 111 112 // Add the order. If it is not minimal, add that also. 113 if (!emojiToOrder.containsKey(original)) { emojiToOrder.put(original, emojiToOrder.size())114 emojiToOrder.put(original, emojiToOrder.size()); 115 } 116 if (!emojiToOrder.containsKey(minimal)) { emojiToOrder.put(original, emojiToOrder.size())117 emojiToOrder.put(original, emojiToOrder.size()); 118 } 119 // 120 // majorPlusMinorToEmoji.put(Pair.of(majorOrder, minorOrder), minimal); 121 122 boolean singleton = CharSequences.getSingleCodePoint(minimal) != Integer.MAX_VALUE; 123 // if (!emojiToOrder.containsKey(minimal)) { 124 // emojiToOrder.put(minimal, emojiToOrder.size()); 125 // } 126 127 // skip constructed values 128 if (minimal.contains(COMBINING_ENCLOSING_KEYCAP) 129 || REGIONAL_INDICATORS.containsSome(minimal) 130 || TAGS.containsSome(minimal) 131 || !singleton && MODIFIERS.containsSome(minimal) 132 || !singleton && FAMILY.containsAll(minimal)) { 133 // do nothing 134 } else if (minimal.contains(ZWJ)) { // only do certain ZWJ sequences 135 if (SPECIALS.contains(minimal) 136 || GENDER.containsSome(minimal) 137 || MAN_WOMAN.contains(minimal.codePointAt(0)) && OBJECT.contains(minimal.codePointBefore(minimal.length()))) { 138 nonConstructed.add(minimal); 139 } 140 } else if (!minimal.contains("")) { 141 nonConstructed.add(minimal); 142 } 143 } 144 // for (Entry<Pair<Integer,Integer>, String> entry : majorPlusMinorToEmoji.entries()) { 145 // String minimal = entry.getValue(); 146 // emojiToOrder.put(minimal, emojiToOrder.size()); 147 // } emojiToMajorCategory.freeze()148 emojiToMajorCategory.freeze(); emojiToMinorCategory.freeze()149 emojiToMinorCategory.freeze(); 150 nonConstructed.add(MODIFIERS); // needed for names nonConstructed.freeze()151 nonConstructed.freeze(); toName.freeze()152 toName.freeze(); allRgi.freeze()153 allRgi.freeze(); allRgiNoES.freeze()154 allRgiNoES.freeze(); 155 } 156 getAllRgi()157 public static UnicodeSet getAllRgi() { 158 return allRgi; 159 } 160 getAllRgiNoES()161 public static UnicodeSet getAllRgiNoES() { 162 return allRgiNoES; 163 } 164 getMinorCategory(String emoji)165 public static String getMinorCategory(String emoji) { 166 String minorCat = emojiToMinorCategory.get(emoji); 167 if (minorCat == null) { 168 throw new InternalCldrException("No minor category (aka subgroup) found for " + emoji 169 + ". Update emoji-test.txt to latest, and adjust PathHeader.. functionMap.put(\"minor\", ..."); 170 } 171 return minorCat; 172 } 173 getName(String emoji)174 public static String getName(String emoji) { 175 return toName.get(emoji); 176 } 177 178 179 // public static int getMinorToOrder(String minor) { 180 // Integer result = minorToOrder.get(minor); 181 // return result == null ? Integer.MAX_VALUE : result; 182 // } 183 getEmojiToOrder(String emoji)184 public static int getEmojiToOrder(String emoji) { 185 Integer result = emojiToOrder.get(emoji); 186 return result == null ? Integer.MAX_VALUE : result; 187 } 188 getEmojiMinorOrder(String minor)189 public static int getEmojiMinorOrder(String minor) { 190 Integer result = minorToOrder.get(minor); 191 return result == null ? Integer.MAX_VALUE : result; 192 } 193 getMajorCategory(String emoji)194 public static String getMajorCategory(String emoji) { 195 String majorCat = emojiToMajorCategory.get(emoji); 196 if (majorCat == null) { 197 throw new InternalCldrException("No minor category (aka subgroup) found for " + emoji 198 + ". Update emoji-test.txt to latest, and adjust PathHeader.. functionMap.put(\"major\", ..."); 199 } 200 return majorCat; 201 } 202 getMajorCategories()203 public static Set<String> getMajorCategories() { 204 return emojiToMajorCategory.values(); 205 } 206 getMinorCategories()207 public static Set<String> getMinorCategories() { 208 return emojiToMinorCategory.values(); 209 } 210 getNonConstructed()211 public static UnicodeSet getNonConstructed() { 212 return nonConstructed; 213 } 214 215 private static Set<String> NAME_PATHS = null; 216 private static Set<String> KEYWORD_PATHS = null; 217 public static final String TYPE_TTS = "[@type=\"tts\"]"; 218 getNamePaths()219 public static synchronized Set<String> getNamePaths() { 220 return NAME_PATHS != null ? NAME_PATHS : (NAME_PATHS = buildPaths(TYPE_TTS)); 221 } 222 getKeywordPaths()223 public static synchronized Set<String> getKeywordPaths() { 224 return KEYWORD_PATHS != null ? KEYWORD_PATHS : (KEYWORD_PATHS = buildPaths("")); 225 } 226 buildPaths(String suffix)227 private static ImmutableSet<String> buildPaths(String suffix) { 228 ImmutableSet.Builder<String> builder = ImmutableSet.builder(); 229 for (String s : Emoji.getNonConstructed()) { 230 String base = "//ldml/annotations/annotation[@cp=\"" + s + "\"]" + suffix; 231 builder.add(base); 232 } 233 return builder.build(); 234 } 235 } 236