1 package org.unicode.cldr.util; 2 3 import java.util.ArrayList; 4 import java.util.HashMap; 5 import java.util.Iterator; 6 import java.util.LinkedHashMap; 7 import java.util.LinkedHashSet; 8 import java.util.List; 9 import java.util.Map; 10 import java.util.Map.Entry; 11 import java.util.Set; 12 13 import org.unicode.cldr.draft.FileUtilities; 14 15 import com.google.common.base.CharMatcher; 16 import com.google.common.base.Splitter; 17 import com.google.common.collect.ImmutableMap; 18 import com.google.common.collect.ImmutableSet; 19 import com.ibm.icu.dev.util.UnicodeMap; 20 import com.ibm.icu.impl.Utility; 21 import com.ibm.icu.lang.CharSequences; 22 import com.ibm.icu.text.UnicodeSet; 23 import com.ibm.icu.util.ICUException; 24 25 public class Emoji { 26 public static final String EMOJI_VARIANT = "\uFE0F"; 27 public static final String COMBINING_ENCLOSING_KEYCAP = "\u20E3"; 28 public static final String ZWJ = "\u200D"; 29 public static final UnicodeSet REGIONAL_INDICATORS = new UnicodeSet(0x1F1E6, 0x1F1FF).freeze(); 30 public static final UnicodeSet MODIFIERS = new UnicodeSet("[-]").freeze(); 31 public static final UnicodeSet TAGS = new UnicodeSet(0xE0000, 0xE007F).freeze(); 32 public static final UnicodeSet FAMILY = new UnicodeSet("[\u200D - ❤]").freeze(); 33 public static final UnicodeSet GENDER = new UnicodeSet().add(0x2640).add(0x2642).freeze(); 34 public static final UnicodeSet SPECIALS = new UnicodeSet("[" 35 + "{⬛}{❄}{}{}{}{}{}{} {} {☠} {} {} {} {} {} {} {}" 36 + "{⚧}{⚕}{⚖}{✈}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}" 37 + "{❤}, {❤}, {}, {}" // #E13.1 38 + "]").freeze(); 39 // May have to add from above, if there is a failure in testAnnotationPaths. Failure will be like: 40 // got java.util.TreeSet<[//ldml/annotations/annotation[@cp="⚧"][@type="tts"], //ldml/annotations/annotation[@cp="⚕"][@type="tts"], ... 41 // just extract the items in "...", and change into {...} for adding above. 42 // Example: //ldml/annotations/annotation[@cp="⚕"] ==> {⚕} 43 public static final UnicodeSet MAN_WOMAN = new UnicodeSet("[ ]").freeze(); 44 public static final UnicodeSet OBJECT = new UnicodeSet("[ ✈ ⚖ ⚕]").freeze(); 45 46 static final UnicodeMap<String> emojiToMajorCategory = new UnicodeMap<>(); 47 static final UnicodeMap<String> emojiToMinorCategory = new UnicodeMap<>(); 48 static final UnicodeMap<String> toName = new UnicodeMap<>(); 49 static { 50 emojiToMajorCategory.setErrorOnReset(true); 51 emojiToMinorCategory.setErrorOnReset(true); 52 toName.setErrorOnReset(true); 53 } 54 /** 55 * A mapping from a majorCategory to a unique ordering number, based on the first time it is encountered. 56 */ 57 static final Map<String, Long> majorToOrder = new HashMap<>(); 58 /** 59 * A mapping from a minorCategory to a unique ordering number, based on the first time it is encountered. 60 */ 61 static final Map<String, Long> minorToOrder = new HashMap<>(); 62 static final Map<String, Long> emojiToOrder = new LinkedHashMap<>(); 63 static final UnicodeSet nonConstructed = new UnicodeSet(); 64 static final UnicodeSet allRgi = new UnicodeSet(); 65 static final UnicodeSet allRgiNoES = new UnicodeSet(); 66 67 static { 68 /* 69 # group: Smileys & People 70 # subgroup: face-positive 71 1F600 ; fully-qualified # grinning face 72 */ 73 Splitter semi = Splitter.on(CharMatcher.anyOf(";#")).trimResults(); 74 String majorCategory = null; 75 String minorCategory = null; 76 long majorOrder = 0; 77 long minorOrder = 0; 78 //Multimap<Pair<Integer,Integer>,String> majorPlusMinorToEmoji = TreeMultimap.create(); 79 for (String line : FileUtilities.in(Emoji.class, "data/emoji/emoji-test.txt")) { 80 if (line.startsWith("#")) { 81 line = line.substring(1).trim(); 82 if (line.startsWith("group:")) { 83 majorCategory = line.substring("group:".length()).trim(); 84 Long oldMajorOrder = majorToOrder.get(majorCategory); 85 if (oldMajorOrder == null) { majorToOrder.put(majorCategory, majorOrder = majorToOrder.size())86 majorToOrder.put(majorCategory, majorOrder = majorToOrder.size()); 87 } else { 88 majorOrder = oldMajorOrder; 89 } 90 } else if (line.startsWith("subgroup:")) { 91 minorCategory = line.substring("subgroup:".length()).trim(); 92 Long oldMinorOrder = minorToOrder.get(minorCategory); 93 if (oldMinorOrder == null) { minorToOrder.put(minorCategory, minorOrder = minorToOrder.size())94 minorToOrder.put(minorCategory, minorOrder = minorToOrder.size()); 95 } else { 96 minorOrder = oldMinorOrder; 97 } 98 } 99 continue; 100 } 101 line = line.trim(); 102 if (line.isEmpty()) { 103 continue; 104 } 105 Iterator<String> it = semi.split(line).iterator(); 106 107 String emojiHex = it.next(); 108 if (emojiHex.contains("1F48F")) { 109 int debug = 0; 110 } 111 112 String original = Utility.fromHex(emojiHex, 4, " "); 113 if (original.contains("")) { 114 if (false) { 115 System.out.println(original + "\t" + Utility.hex(original)); 116 } 117 } 118 119 String type = it.next(); 120 if (type.startsWith("fully-qualified")) { 121 allRgi.add(original); original.replace(Emoji.EMOJI_VARIANT, "")122 allRgiNoES.add(original.replace(Emoji.EMOJI_VARIANT, "")); 123 } emojiToMajorCategory.put(original, majorCategory)124 emojiToMajorCategory.put(original, majorCategory); emojiToMinorCategory.put(original, minorCategory)125 emojiToMinorCategory.put(original, minorCategory); 126 String comment = it.next(); 127 // The comment is now of the form: # E0.6 beaming face with smiling eyes 128 int spacePos = comment.indexOf(' '); 129 spacePos = comment.indexOf(' ', spacePos+1); // get second space 130 String name = comment.substring(spacePos+1).trim(); toName.put(original, name)131 toName.put(original, name); 132 133 // add all the non-constructed values to a set for annotations 134 135 String minimal = original.replace(EMOJI_VARIANT, ""); 136 137 // Add the order. If it is not minimal, add that also. 138 if (!emojiToOrder.containsKey(original)) { putUnique(emojiToOrder, original, emojiToOrder.size()*100L)139 putUnique(emojiToOrder, original, emojiToOrder.size()*100L); 140 } 141 if (!emojiToOrder.containsKey(minimal)) { putUnique(emojiToOrder, minimal, emojiToOrder.size()*100L)142 putUnique(emojiToOrder, minimal, emojiToOrder.size()*100L); 143 } 144 // 145 // majorPlusMinorToEmoji.put(Pair.of(majorOrder, minorOrder), minimal); 146 147 boolean singleton = CharSequences.getSingleCodePoint(minimal) != Integer.MAX_VALUE; 148 // if (!emojiToOrder.containsKey(minimal)) { 149 // emojiToOrder.put(minimal, emojiToOrder.size()); 150 // } 151 152 // skip constructed values 153 if (minimal.contains(COMBINING_ENCLOSING_KEYCAP) 154 || REGIONAL_INDICATORS.containsSome(minimal) 155 || TAGS.containsSome(minimal) 156 || !singleton && MODIFIERS.containsSome(minimal) 157 || !singleton && FAMILY.containsAll(minimal)) { 158 // do nothing 159 } else if (minimal.contains(ZWJ)) { // only do certain ZWJ sequences 160 if (SPECIALS.contains(minimal) 161 || GENDER.containsSome(minimal) 162 || MAN_WOMAN.contains(minimal.codePointAt(0)) && OBJECT.contains(minimal.codePointBefore(minimal.length()))) { 163 nonConstructed.add(minimal); 164 } 165 } else if (!minimal.contains("")) { 166 nonConstructed.add(minimal); 167 } 168 } 169 // for (Entry<Pair<Integer,Integer>, String> entry : majorPlusMinorToEmoji.entries()) { 170 // String minimal = entry.getValue(); 171 // emojiToOrder.put(minimal, emojiToOrder.size()); 172 // } emojiToMajorCategory.freeze()173 emojiToMajorCategory.freeze(); emojiToMinorCategory.freeze()174 emojiToMinorCategory.freeze(); 175 nonConstructed.add(MODIFIERS); // needed for names nonConstructed.freeze()176 nonConstructed.freeze(); toName.freeze()177 toName.freeze(); allRgi.freeze()178 allRgi.freeze(); allRgiNoES.freeze()179 allRgiNoES.freeze(); 180 } 181 putUnique(Map<K, V> map, K key, V value)182 private static <K, V> void putUnique(Map<K, V> map, K key, V value) { 183 V oldValue = map.put(key, value); 184 if (oldValue != null) { 185 throw new ICUException("Attempt to change value of " + map 186 + " for " + key 187 + " from " + oldValue 188 + " to " + value 189 ); 190 } 191 } 192 getAllRgi()193 public static UnicodeSet getAllRgi() { 194 return allRgi; 195 } 196 getAllRgiNoES()197 public static UnicodeSet getAllRgiNoES() { 198 return allRgiNoES; 199 } 200 201 public static final UnicodeMap<String> EXTRA_SYMBOL_MINOR_CATEGORIES = new UnicodeMap<>(); 202 public static final Map<String,Long> EXTRA_SYMBOL_ORDER; 203 private static final boolean DEBUG = false; 204 static { 205 String[][] data = { 206 {"arrow", "→ ↓ ↑ ← ↔ ↕ ⇆ ⇅"}, 207 {"alphanum", "© ® ℗ ™ µ"}, 208 {"geometric", "▼ ▶ ▲ ◀ ● ○ ◯ ◊"}, 209 {"math", "× ÷ √ ∞ ∆ ∇ ⁻ ¹ ² ³ ≡ ∈ ⊂ ∩ ∪ ° + ± − = ≈ ≠ > < ≤ ≥ ¬ | ~"}, 210 {"punctuation", "§ † ‡ \\u0020 , 、 ، ; : ؛ ! ¡ ? ¿ ؟ ¶ ※ / \\ & # % ‰ ′ ″ ‴ @ * ♪ ♭ ♯ ` ´ ^ ¨ ‐ ― _ - – — • · . … 。 ‧ ・ ‘ ’ ‚ ' “ ” „ » « ( ) [ ] { } 〔 〕 〈 〉 《 》 「 」 『 』 〖 〗 【 】"}, 211 {"currency", "€ £ ¥ ₹ ₽ $ ¢ ฿ ₪ ₺ ₫ ₱ ₩ ₡ ₦ ₮ ৳ ₴ ₸ ₲ ₵ ៛ ₭ ֏ ₥ ₾ ₼ ₿ ؋"}, 212 {"other-symbol", "‾‽‸⁂↚↛↮↙↜↝↞↟↠↡↢↣↤↥↦↧↨↫↬↭↯↰↱↲↳↴↵↶↷↸↹↺↻↼↽↾↿⇀⇁⇂⇃⇄⇇⇈⇉⇊⇋⇌⇐⇍⇑⇒⇏⇓⇔⇎⇖⇗⇘⇙⇚⇛⇜⇝⇞⇟⇠⇡⇢⇣⇤⇥⇦⇧⇨⇩⇪⇵∀∂∃∅∉∋∎∏∑≮≯∓∕⁄∗∘∙∝∟∠∣∥∧∫∬∮∴∵∶∷∼∽∾≃≅≌≒≖≣≦≧≪≫≬≳≺≻⊁⊃⊆⊇⊕⊖⊗⊘⊙⊚⊛⊞⊟⊥⊮⊰⊱⋭⊶⊹⊿⋁⋂⋃⋅⋆⋈⋒⋘⋙⋮⋯⋰⋱■□▢▣▤▥▦▧▨▩▬▭▮▰△▴▵▷▸▹►▻▽▾▿◁◂◃◄◅◆◇◈◉◌◍◎◐◑◒◓◔◕◖◗◘◙◜◝◞◟◠◡◢◣◤◥◦◳◷◻◽◿⨧⨯⨼⩣⩽⪍⪚⪺₢₣₤₰₳₶₷₨﷼"}, 213 }; 214 // get the maximum suborder for each subcategory 215 Map<String, Long> subcategoryToMaxSuborder = new HashMap<>(); 216 for (String[] row : data) { 217 final String subcategory = row[0]; 218 for (Entry<String, String> entry : emojiToMinorCategory.entrySet()) { 219 if (entry.getValue().equals(subcategory)) { 220 String emoji = entry.getKey(); 221 Long order = emojiToOrder.get(emoji); 222 Long currentMax = subcategoryToMaxSuborder.get(subcategory); 223 if (currentMax == null || currentMax < order) { subcategoryToMaxSuborder.put(subcategory, order)224 subcategoryToMaxSuborder.put(subcategory, order); 225 } 226 } 227 } 228 } 229 if (DEBUG) System.out.println(subcategoryToMaxSuborder); 230 Map<String,Long> _EXTRA_SYMBOL_ORDER = new LinkedHashMap<>(); 231 for (String[] row : data) { 232 final String subcategory = row[0]; 233 final String characters = row[1]; 234 235 List<String> items = new ArrayList<>(); 236 for (int cp : With.codePointArray(characters)) { 237 if (cp != ' ') { With.fromCodePoint(cp)238 items.add(With.fromCodePoint(cp)); 239 } 240 } 241 final UnicodeSet uset = new UnicodeSet().addAll(items); 242 if (uset.containsSome(EXTRA_SYMBOL_MINOR_CATEGORIES.keySet())) { 243 throw new IllegalArgumentException("Duplicate values in " + EXTRA_SYMBOL_MINOR_CATEGORIES); 244 } EXTRA_SYMBOL_MINOR_CATEGORIES.putAll(uset, subcategory)245 EXTRA_SYMBOL_MINOR_CATEGORIES.putAll(uset, subcategory); 246 long count = subcategoryToMaxSuborder.get(subcategory); 247 for (String s : items) { 248 ++count; _EXTRA_SYMBOL_ORDER.put(s, count)249 _EXTRA_SYMBOL_ORDER.put(s, count); 250 } subcategoryToMaxSuborder.put(subcategory, count)251 subcategoryToMaxSuborder.put(subcategory, count); 252 } 253 if (DEBUG) System.out.println(_EXTRA_SYMBOL_ORDER); EXTRA_SYMBOL_MINOR_CATEGORIES.freeze()254 EXTRA_SYMBOL_MINOR_CATEGORIES.freeze(); 255 EXTRA_SYMBOL_ORDER = ImmutableMap.copyOf(_EXTRA_SYMBOL_ORDER); 256 } 257 getMinorCategory(String emoji)258 public static String getMinorCategory(String emoji) { 259 String minorCat = emojiToMinorCategory.get(emoji); 260 if (minorCat == null) { 261 minorCat = EXTRA_SYMBOL_MINOR_CATEGORIES.get(emoji); 262 if (minorCat == null) { 263 throw new InternalCldrException("No minor category (aka subgroup) found for " + emoji 264 + ". Update emoji-test.txt to latest, and adjust PathHeader.. functionMap.put(\"minor\", ..."); 265 } 266 } 267 return minorCat; 268 } 269 getName(String emoji)270 public static String getName(String emoji) { 271 return toName.get(emoji); 272 } 273 getEmojiToOrder(String emoji)274 public static long getEmojiToOrder(String emoji) { 275 Long result = emojiToOrder.get(emoji); 276 if (result == null) { 277 result = EXTRA_SYMBOL_ORDER.get(emoji); 278 if (result == null) { 279 throw new InternalCldrException("No Order found for " + emoji 280 + ". Update emoji-test.txt to latest, and adjust PathHeader.. functionMap.put(\"minor\", ..."); 281 } 282 } 283 return result; 284 } 285 getEmojiMinorOrder(String minor)286 public static long getEmojiMinorOrder(String minor) { 287 Long result = minorToOrder.get(minor); 288 if (result == null) { 289 throw new InternalCldrException("No minor category (aka subgroup) found for " + minor 290 + ". Update emoji-test.txt to latest, and adjust PathHeader.. functionMap.put(\"minor\", ..."); 291 } 292 return result; 293 } 294 getMajorCategory(String emoji)295 public static String getMajorCategory(String emoji) { 296 String majorCat = emojiToMajorCategory.get(emoji); 297 if (majorCat == null) { 298 if (EXTRA_SYMBOL_MINOR_CATEGORIES.containsKey(emoji)) { 299 majorCat = "Symbols"; 300 } else { 301 throw new InternalCldrException("No minor category (aka subgroup) found for " + emoji 302 + ". Update emoji-test.txt to latest, and adjust PathHeader.. functionMap.put(\"major\", ..."); 303 } 304 } 305 return majorCat; 306 } 307 getMajorCategories()308 public static Set<String> getMajorCategories() { 309 return emojiToMajorCategory.values(); 310 } 311 getMinorCategories()312 public static Set<String> getMinorCategories() { 313 return emojiToMinorCategory.values(); 314 } 315 getMinorCategoriesWithExtras()316 public static Set<String> getMinorCategoriesWithExtras() { 317 Set<String> result = new LinkedHashSet<>(emojiToMinorCategory.values()); 318 result.addAll(EXTRA_SYMBOL_MINOR_CATEGORIES.getAvailableValues()); 319 return ImmutableSet.copyOf(result); 320 } 321 getEmojiInMinorCategoriesWithExtras(String minorCategory)322 public static UnicodeSet getEmojiInMinorCategoriesWithExtras(String minorCategory) { 323 return new UnicodeSet(emojiToMinorCategory.getSet(minorCategory)) 324 .addAll(EXTRA_SYMBOL_MINOR_CATEGORIES.getSet(minorCategory)) 325 .freeze(); 326 } 327 getNonConstructed()328 public static UnicodeSet getNonConstructed() { 329 return nonConstructed; 330 } 331 332 private static Set<String> NAME_PATHS = null; 333 private static Set<String> KEYWORD_PATHS = null; 334 public static final String TYPE_TTS = "[@type=\"tts\"]"; 335 getNamePaths()336 public static synchronized Set<String> getNamePaths() { 337 return NAME_PATHS != null ? NAME_PATHS : (NAME_PATHS = buildPaths(TYPE_TTS)); 338 } 339 getKeywordPaths()340 public static synchronized Set<String> getKeywordPaths() { 341 return KEYWORD_PATHS != null ? KEYWORD_PATHS : (KEYWORD_PATHS = buildPaths("")); 342 } 343 buildPaths(String suffix)344 private static ImmutableSet<String> buildPaths(String suffix) { 345 ImmutableSet.Builder<String> builder = ImmutableSet.builder(); 346 for (String s : Emoji.getNonConstructed()) { 347 String base = "//ldml/annotations/annotation[@cp=\"" + s + "\"]" + suffix; 348 builder.add(base); 349 } 350 return builder.build(); 351 } 352 } 353