1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // © 2021 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html 4 5 // emojiprops.h 6 // created: 2021sep06 Markus W. Scherer 7 8 package android.icu.impl; 9 10 import java.io.IOException; 11 import java.nio.ByteBuffer; 12 13 import android.icu.lang.UProperty; 14 import android.icu.text.UnicodeSet; 15 import android.icu.util.BytesTrie; 16 import android.icu.util.CharsTrie; 17 import android.icu.util.CodePointMap; 18 import android.icu.util.CodePointTrie; 19 import android.icu.util.ICUUncheckedIOException; 20 21 /** 22 * @hide Only a subset of ICU is exposed in Android 23 */ 24 public final class EmojiProps { 25 private static final class IsAcceptable implements ICUBinary.Authenticate { 26 @Override isDataVersionAcceptable(byte version[])27 public boolean isDataVersionAcceptable(byte version[]) { 28 return version[0] == 1; 29 } 30 } 31 private static final IsAcceptable IS_ACCEPTABLE = new IsAcceptable(); 32 private static final int DATA_FORMAT = 0x456d6f6a; // "Emoj" 33 34 // Byte offsets from the start of the data, after the generic header, 35 // in ascending order. 36 // UCPTrie=CodePointTrie, follows the indexes 37 private static final int IX_CPTRIE_OFFSET = 0; 38 39 // UCharsTrie=CharsTrie 40 private static final int IX_BASIC_EMOJI_TRIE_OFFSET = 4; 41 //ivate static final int IX_EMOJI_KEYCAP_SEQUENCE_TRIE_OFFSET = 5; 42 //ivate static final int IX_RGI_EMOJI_MODIFIER_SEQUENCE_TRIE_OFFSET = 6; 43 //ivate static final int IX_RGI_EMOJI_FLAG_SEQUENCE_TRIE_OFFSET = 7; 44 //ivate static final int IX_RGI_EMOJI_TAG_SEQUENCE_TRIE_OFFSET = 8; 45 private static final int IX_RGI_EMOJI_ZWJ_SEQUENCE_TRIE_OFFSET = 9; 46 47 // Properties in the code point trie. 48 // https://www.unicode.org/reports/tr51/#Emoji_Properties 49 private static final int BIT_EMOJI = 0; 50 private static final int BIT_EMOJI_PRESENTATION = 1; 51 private static final int BIT_EMOJI_MODIFIER = 2; 52 private static final int BIT_EMOJI_MODIFIER_BASE = 3; 53 private static final int BIT_EMOJI_COMPONENT = 4; 54 private static final int BIT_EXTENDED_PICTOGRAPHIC = 5; 55 // https://www.unicode.org/reports/tr51/#Emoji_Sets 56 private static final int BIT_BASIC_EMOJI = 6; 57 58 public static final EmojiProps INSTANCE = new EmojiProps(); 59 60 private CodePointTrie.Fast8 cpTrie = null; 61 private String stringTries[] = new String[6]; 62 63 /** Input i: One of the IX_..._TRIE_OFFSET indexes into the data file indexes[] array. */ getStringTrieIndex(int i)64 private static int getStringTrieIndex(int i) { 65 return i - IX_BASIC_EMOJI_TRIE_OFFSET; 66 } 67 EmojiProps()68 private EmojiProps() { 69 ByteBuffer bytes = ICUBinary.getRequiredData("uemoji.icu"); 70 try { 71 ICUBinary.readHeaderAndDataVersion(bytes, DATA_FORMAT, IS_ACCEPTABLE); 72 int startPos = bytes.position(); 73 74 int cpTrieOffset = bytes.getInt(); // inIndexes[IX_CPTRIE_OFFSET] 75 int indexesLength = cpTrieOffset / 4; 76 if (indexesLength <= IX_RGI_EMOJI_ZWJ_SEQUENCE_TRIE_OFFSET) { 77 throw new ICUUncheckedIOException( 78 "Emoji properties data: not enough indexes"); 79 } 80 81 int[] inIndexes = new int[indexesLength]; 82 inIndexes[0] = cpTrieOffset; 83 for (int i = 1; i < indexesLength; ++i) { 84 inIndexes[i] = bytes.getInt(); 85 } 86 87 int i = IX_CPTRIE_OFFSET; 88 int offset = inIndexes[i++]; 89 int nextOffset = inIndexes[i]; 90 cpTrie = CodePointTrie.Fast8.fromBinary(bytes); 91 int pos = bytes.position() - startPos; 92 assert nextOffset >= pos; 93 ICUBinary.skipBytes(bytes, nextOffset - pos); // skip padding after trie bytes 94 95 offset = nextOffset; 96 nextOffset = inIndexes[IX_BASIC_EMOJI_TRIE_OFFSET]; 97 ICUBinary.skipBytes(bytes, nextOffset - offset); // skip unknown bytes 98 99 for (i = IX_BASIC_EMOJI_TRIE_OFFSET; i <= IX_RGI_EMOJI_ZWJ_SEQUENCE_TRIE_OFFSET; ++i) { 100 offset = inIndexes[i]; 101 nextOffset = inIndexes[i + 1]; 102 // Set/leave null if there is no CharsTrie. 103 if (nextOffset > offset) { 104 stringTries[getStringTrieIndex(i)] = 105 ICUBinary.getString(bytes, (nextOffset - offset) / 2, 0); 106 } 107 } 108 } catch(IOException e) { 109 throw new ICUUncheckedIOException(e); 110 } 111 } 112 addPropertyStarts(UnicodeSet set)113 public UnicodeSet addPropertyStarts(UnicodeSet set) { 114 // Add the start code point of each same-value range of the trie. 115 CodePointMap.Range range = new CodePointMap.Range(); 116 int start = 0; 117 while (cpTrie.getRange(start, null, range)) { 118 set.add(start); 119 start = range.getEnd() + 1; 120 } 121 return set; 122 } 123 124 // Note: REGIONAL_INDICATOR is a single, hardcoded range implemented elsewhere. 125 private static final byte[] bitFlags = { 126 BIT_EMOJI, // UCHAR_EMOJI=57 127 BIT_EMOJI_PRESENTATION, // UCHAR_EMOJI_PRESENTATION=58 128 BIT_EMOJI_MODIFIER, // UCHAR_EMOJI_MODIFIER=59 129 BIT_EMOJI_MODIFIER_BASE, // UCHAR_EMOJI_MODIFIER_BASE=60 130 BIT_EMOJI_COMPONENT, // UCHAR_EMOJI_COMPONENT=61 131 -1, // UCHAR_REGIONAL_INDICATOR=62 132 -1, // UCHAR_PREPENDED_CONCATENATION_MARK=63 133 BIT_EXTENDED_PICTOGRAPHIC, // UCHAR_EXTENDED_PICTOGRAPHIC=64 134 BIT_BASIC_EMOJI, // UCHAR_BASIC_EMOJI=65 135 -1, // UCHAR_EMOJI_KEYCAP_SEQUENCE=66 136 -1, // UCHAR_RGI_EMOJI_MODIFIER_SEQUENCE=67 137 -1, // UCHAR_RGI_EMOJI_FLAG_SEQUENCE=68 138 -1, // UCHAR_RGI_EMOJI_TAG_SEQUENCE=69 139 -1, // UCHAR_RGI_EMOJI_ZWJ_SEQUENCE=70 140 BIT_BASIC_EMOJI, // UCHAR_RGI_EMOJI=71 141 }; 142 hasBinaryProperty(int c, int which)143 public boolean hasBinaryProperty(int c, int which) { 144 if (which < UProperty.EMOJI || UProperty.RGI_EMOJI < which) { 145 return false; 146 } 147 int bit = bitFlags[which - UProperty.EMOJI]; 148 if (bit < 0) { 149 return false; // not a property that we support in this function 150 } 151 int bits = cpTrie.get(c); 152 return ((bits >> bit) & 1) != 0; 153 } 154 hasBinaryProperty(CharSequence s, int which)155 public boolean hasBinaryProperty(CharSequence s, int which) { 156 int length = s.length(); 157 if (length == 0) { return false; } // empty string 158 // The caller should have delegated single code points to hasBinaryProperty(c, which). 159 if (which < UProperty.BASIC_EMOJI || UProperty.RGI_EMOJI < which) { 160 return false; 161 } 162 int firstProp = which, lastProp = which; 163 if (which == UProperty.RGI_EMOJI) { 164 // RGI_Emoji is the union of the other emoji properties of strings. 165 firstProp = UProperty.BASIC_EMOJI; 166 lastProp = UProperty.RGI_EMOJI_ZWJ_SEQUENCE; 167 } 168 for (int prop = firstProp; prop <= lastProp; ++prop) { 169 String trieUChars = stringTries[prop - UProperty.BASIC_EMOJI]; 170 if (trieUChars != null) { 171 CharsTrie trie = new CharsTrie(trieUChars, 0); 172 BytesTrie.Result result = trie.next(s, 0, length); 173 if (result.hasValue()) { 174 return true; 175 } 176 } 177 } 178 return false; 179 } 180 addStrings(int which, UnicodeSet set)181 public void addStrings(int which, UnicodeSet set) { 182 if (which < UProperty.BASIC_EMOJI || UProperty.RGI_EMOJI < which) { 183 return; 184 } 185 int firstProp = which, lastProp = which; 186 if (which == UProperty.RGI_EMOJI) { 187 // RGI_Emoji is the union of the other emoji properties of strings. 188 firstProp = UProperty.BASIC_EMOJI; 189 lastProp = UProperty.RGI_EMOJI_ZWJ_SEQUENCE; 190 } 191 for (int prop = firstProp; prop <= lastProp; ++prop) { 192 String trieUChars = stringTries[prop - UProperty.BASIC_EMOJI]; 193 if (trieUChars != null) { 194 CharsTrie trie = new CharsTrie(trieUChars, 0); 195 for (CharsTrie.Entry entry : trie) { 196 set.add(entry.chars); 197 } 198 } 199 } 200 } 201 } 202