1 package org.unicode.cldr.util; 2 3 import java.util.HashMap; 4 import java.util.Iterator; 5 import java.util.Map; 6 import java.util.Set; 7 8 import org.unicode.cldr.draft.FileUtilities; 9 10 import com.google.common.base.Splitter; 11 import com.google.common.collect.ImmutableSet; 12 import com.ibm.icu.dev.util.UnicodeMap; 13 import com.ibm.icu.impl.Utility; 14 import com.ibm.icu.lang.CharSequences; 15 import com.ibm.icu.text.UnicodeSet; 16 17 public class Emoji { 18 public static final String EMOJI_VARIANT = "\uFE0F"; 19 public static final String COMBINING_ENCLOSING_KEYCAP = "\u20E3"; 20 public static final String ZWJ = "\u200D"; 21 public static final UnicodeSet REGIONAL_INDICATORS = new UnicodeSet(0x1F1E6, 0x1F1FF).freeze(); 22 public static final UnicodeSet MODIFIERS = new UnicodeSet("[-]").freeze(); 23 public static final UnicodeSet TAGS = new UnicodeSet(0xE0000, 0xE007F).freeze(); 24 public static final UnicodeSet FAMILY = new UnicodeSet("[\u200D - ❤]").freeze(); 25 public static final UnicodeSet GENDER = new UnicodeSet().add(0x2640).add(0x2642).freeze(); 26 public static final UnicodeSet SPECIALS = new UnicodeSet("[{}{}{☠}]").freeze(); 27 public static final UnicodeSet MAN_WOMAN = new UnicodeSet("[ ]").freeze(); 28 public static final UnicodeSet OBJECT = new UnicodeSet("[ ✈ ⚖ ⚕]").freeze(); 29 30 static final UnicodeMap<String> emojiToMajorCategory = new UnicodeMap<>(); 31 static final UnicodeMap<String> emojiToMinorCategory = new UnicodeMap<>(); 32 static final Map<String, Integer> minorToOrder = new HashMap<>(); 33 static final UnicodeSet nonConstructed = new UnicodeSet(); 34 static final UnicodeSet allRgi = new UnicodeSet(); 35 static final UnicodeSet allRgiNoES = new UnicodeSet(); 36 37 static { 38 /* 39 # group: Smileys & People 40 # subgroup: face-positive 41 1F600 ; fully-qualified # grinning face 42 */ 43 Splitter semi = Splitter.on(';').trimResults(); 44 String majorCategory = null; 45 String minorCategory = null; 46 for (String line : FileUtilities.in(Emoji.class, "data/emoji/emoji-test.txt")) { 47 if (line.startsWith("#")) { 48 line = line.substring(1).trim(); 49 if (line.startsWith("group:")) { 50 majorCategory = line.substring("group:".length()).trim(); 51 } else if (line.startsWith("subgroup:")) { 52 minorCategory = line.substring("subgroup:".length()).trim(); 53 if (!minorToOrder.containsKey(minorCategory)) { minorToOrder.put(minorCategory, minorToOrder.size())54 minorToOrder.put(minorCategory, minorToOrder.size()); 55 } 56 } 57 continue; 58 } 59 line = line.trim(); 60 if (line.isEmpty()) { 61 continue; 62 } 63 Iterator<String> it = semi.split(line).iterator(); 64 String emojiHex = it.next(); 65 String original = Utility.fromHex(emojiHex, 4, " "); 66 String type = it.next(); 67 if (type.startsWith("fully-qualified")) { 68 allRgi.add(original); original.replace(Emoji.EMOJI_VARIANT, "")69 allRgiNoES.add(original.replace(Emoji.EMOJI_VARIANT, "")); 70 } emojiToMajorCategory.put(original, majorCategory)71 emojiToMajorCategory.put(original, majorCategory); emojiToMinorCategory.put(original, minorCategory)72 emojiToMinorCategory.put(original, minorCategory); 73 74 // add all the non-constructed values to a set for annotations 75 76 String minimal = original.replace(EMOJI_VARIANT, ""); 77 boolean singleton = CharSequences.getSingleCodePoint(minimal) != Integer.MAX_VALUE; 78 79 // skip constructed values 80 if (minimal.contains(COMBINING_ENCLOSING_KEYCAP) 81 || REGIONAL_INDICATORS.containsSome(minimal) 82 || TAGS.containsSome(minimal) 83 || !singleton && MODIFIERS.containsSome(minimal) 84 || !singleton && FAMILY.containsAll(minimal)) { 85 // do nothing 86 } else if (minimal.contains(ZWJ)) { // only do certain ZWJ sequences 87 if (SPECIALS.contains(minimal) 88 || GENDER.containsSome(minimal) 89 || MAN_WOMAN.contains(minimal.codePointAt(0)) && OBJECT.contains(minimal.codePointBefore(minimal.length()))) { 90 nonConstructed.add(minimal); 91 } 92 } else if (!minimal.contains("")) { 93 nonConstructed.add(minimal); 94 } 95 96 } emojiToMajorCategory.freeze()97 emojiToMajorCategory.freeze(); emojiToMinorCategory.freeze()98 emojiToMinorCategory.freeze(); 99 nonConstructed.add(MODIFIERS); // needed for names nonConstructed.freeze()100 nonConstructed.freeze(); allRgi.freeze()101 allRgi.freeze(); allRgiNoES.freeze()102 allRgiNoES.freeze(); 103 } 104 getAllRgi()105 public static UnicodeSet getAllRgi() { 106 return allRgi; 107 } 108 getAllRgiNoES()109 public static UnicodeSet getAllRgiNoES() { 110 return allRgiNoES; 111 } 112 getMinorCategory(String emoji)113 public static String getMinorCategory(String emoji) { 114 String minorCat = emojiToMinorCategory.get(emoji); 115 if (minorCat == null) { 116 throw new InternalCldrException("No minor category (aka subgroup) found for " + emoji 117 + ". Update emoji-test.txt to latest, and adjust PathHeader.. functionMap.put(\"minor\", ..."); 118 } 119 return minorCat; 120 } 121 getMinorToOrder(String minor)122 public static int getMinorToOrder(String minor) { 123 Integer result = minorToOrder.get(minor); 124 return result == null ? Integer.MAX_VALUE : result; 125 } 126 getMajorCategory(String emoji)127 public static String getMajorCategory(String emoji) { 128 String majorCat = emojiToMajorCategory.get(emoji); 129 if (majorCat == null) { 130 throw new InternalCldrException("No minor category (aka subgroup) found for " + emoji 131 + ". Update emoji-test.txt to latest, and adjust PathHeader.. functionMap.put(\"major\", ..."); 132 } 133 return majorCat; 134 } 135 getMajorCategories()136 public static Set<String> getMajorCategories() { 137 return emojiToMajorCategory.values(); 138 } 139 getMinorCategories()140 public static Set<String> getMinorCategories() { 141 return emojiToMinorCategory.values(); 142 } 143 getNonConstructed()144 public static UnicodeSet getNonConstructed() { 145 return nonConstructed; 146 } 147 148 private static Set<String> NAME_PATHS = null; 149 private static Set<String> KEYWORD_PATHS = null; 150 public static final String TYPE_TTS = "[@type=\"tts\"]"; 151 getNamePaths()152 public static synchronized Set<String> getNamePaths() { 153 return NAME_PATHS != null ? NAME_PATHS : (NAME_PATHS = buildPaths(TYPE_TTS)); 154 } 155 getKeywordPaths()156 public static synchronized Set<String> getKeywordPaths() { 157 return KEYWORD_PATHS != null ? KEYWORD_PATHS : (KEYWORD_PATHS = buildPaths("")); 158 } 159 buildPaths(String suffix)160 private static ImmutableSet<String> buildPaths(String suffix) { 161 ImmutableSet.Builder<String> builder = ImmutableSet.builder(); 162 for (String s : Emoji.getNonConstructed()) { 163 String base = "//ldml/annotations/annotation[@cp=\"" + s + "\"]" + suffix; 164 builder.add(base); 165 } 166 return builder.build(); 167 } 168 } 169