• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GENERATED SOURCE. DO NOT MODIFY. */
2 // © 2021 and later: Unicode, Inc. and others.
3 // License & terms of use: http://www.unicode.org/copyright.html
4 
5 // emojiprops.h
6 // created: 2021sep06 Markus W. Scherer
7 
8 package android.icu.impl;
9 
10 import java.io.IOException;
11 import java.nio.ByteBuffer;
12 
13 import android.icu.lang.UProperty;
14 import android.icu.text.UnicodeSet;
15 import android.icu.util.BytesTrie;
16 import android.icu.util.CharsTrie;
17 import android.icu.util.CodePointMap;
18 import android.icu.util.CodePointTrie;
19 import android.icu.util.ICUUncheckedIOException;
20 
21 /**
22  * @hide Only a subset of ICU is exposed in Android
23  */
24 public final class EmojiProps {
25     private static final class IsAcceptable implements ICUBinary.Authenticate {
26         @Override
isDataVersionAcceptable(byte version[])27         public boolean isDataVersionAcceptable(byte version[]) {
28             return version[0] == 1;
29         }
30     }
31     private static final IsAcceptable IS_ACCEPTABLE = new IsAcceptable();
32     private static final int DATA_FORMAT = 0x456d6f6a;  // "Emoj"
33 
34     // Byte offsets from the start of the data, after the generic header,
35     // in ascending order.
36     // UCPTrie=CodePointTrie, follows the indexes
37     private static final int IX_CPTRIE_OFFSET = 0;
38 
39     // UCharsTrie=CharsTrie
40     private static final int IX_BASIC_EMOJI_TRIE_OFFSET = 4;
41     //ivate static final int IX_EMOJI_KEYCAP_SEQUENCE_TRIE_OFFSET = 5;
42     //ivate static final int IX_RGI_EMOJI_MODIFIER_SEQUENCE_TRIE_OFFSET = 6;
43     //ivate static final int IX_RGI_EMOJI_FLAG_SEQUENCE_TRIE_OFFSET = 7;
44     //ivate static final int IX_RGI_EMOJI_TAG_SEQUENCE_TRIE_OFFSET = 8;
45     private static final int IX_RGI_EMOJI_ZWJ_SEQUENCE_TRIE_OFFSET = 9;
46 
47     // Properties in the code point trie.
48     // https://www.unicode.org/reports/tr51/#Emoji_Properties
49     private static final int BIT_EMOJI = 0;
50     private static final int BIT_EMOJI_PRESENTATION = 1;
51     private static final int BIT_EMOJI_MODIFIER = 2;
52     private static final int BIT_EMOJI_MODIFIER_BASE = 3;
53     private static final int BIT_EMOJI_COMPONENT = 4;
54     private static final int BIT_EXTENDED_PICTOGRAPHIC = 5;
55     // https://www.unicode.org/reports/tr51/#Emoji_Sets
56     private static final int BIT_BASIC_EMOJI = 6;
57 
58     public static final EmojiProps INSTANCE = new EmojiProps();
59 
60     private CodePointTrie.Fast8 cpTrie = null;
61     private String stringTries[] = new String[6];
62 
63     /** Input i: One of the IX_..._TRIE_OFFSET indexes into the data file indexes[] array. */
getStringTrieIndex(int i)64     private static int getStringTrieIndex(int i) {
65         return i - IX_BASIC_EMOJI_TRIE_OFFSET;
66     }
67 
EmojiProps()68     private EmojiProps() {
69         ByteBuffer bytes = ICUBinary.getRequiredData("uemoji.icu");
70         try {
71             ICUBinary.readHeaderAndDataVersion(bytes, DATA_FORMAT, IS_ACCEPTABLE);
72             int startPos = bytes.position();
73 
74             int cpTrieOffset = bytes.getInt();  // inIndexes[IX_CPTRIE_OFFSET]
75             int indexesLength = cpTrieOffset / 4;
76             if (indexesLength <= IX_RGI_EMOJI_ZWJ_SEQUENCE_TRIE_OFFSET) {
77                 throw new ICUUncheckedIOException(
78                         "Emoji properties data: not enough indexes");
79             }
80 
81             int[] inIndexes = new int[indexesLength];
82             inIndexes[0] = cpTrieOffset;
83             for (int i = 1; i < indexesLength; ++i) {
84                 inIndexes[i] = bytes.getInt();
85             }
86 
87             int i = IX_CPTRIE_OFFSET;
88             int offset = inIndexes[i++];
89             int nextOffset = inIndexes[i];
90             cpTrie = CodePointTrie.Fast8.fromBinary(bytes);
91             int pos = bytes.position() - startPos;
92             assert nextOffset >= pos;
93             ICUBinary.skipBytes(bytes, nextOffset - pos);  // skip padding after trie bytes
94 
95             offset = nextOffset;
96             nextOffset = inIndexes[IX_BASIC_EMOJI_TRIE_OFFSET];
97             ICUBinary.skipBytes(bytes, nextOffset - offset);  // skip unknown bytes
98 
99             for (i = IX_BASIC_EMOJI_TRIE_OFFSET; i <= IX_RGI_EMOJI_ZWJ_SEQUENCE_TRIE_OFFSET; ++i) {
100                 offset = inIndexes[i];
101                 nextOffset = inIndexes[i + 1];
102                 // Set/leave null if there is no CharsTrie.
103                 if (nextOffset > offset) {
104                     stringTries[getStringTrieIndex(i)] =
105                             ICUBinary.getString(bytes, (nextOffset - offset) / 2, 0);
106                 }
107             }
108         } catch(IOException e) {
109             throw new ICUUncheckedIOException(e);
110         }
111     }
112 
addPropertyStarts(UnicodeSet set)113     public UnicodeSet addPropertyStarts(UnicodeSet set) {
114         // Add the start code point of each same-value range of the trie.
115         CodePointMap.Range range = new CodePointMap.Range();
116         int start = 0;
117         while (cpTrie.getRange(start, null, range)) {
118             set.add(start);
119             start = range.getEnd() + 1;
120         }
121         return set;
122     }
123 
124     // Note: REGIONAL_INDICATOR is a single, hardcoded range implemented elsewhere.
125     private static final byte[] bitFlags = {
126         BIT_EMOJI,                  // UCHAR_EMOJI=57
127         BIT_EMOJI_PRESENTATION,     // UCHAR_EMOJI_PRESENTATION=58
128         BIT_EMOJI_MODIFIER,         // UCHAR_EMOJI_MODIFIER=59
129         BIT_EMOJI_MODIFIER_BASE,    // UCHAR_EMOJI_MODIFIER_BASE=60
130         BIT_EMOJI_COMPONENT,        // UCHAR_EMOJI_COMPONENT=61
131         -1,                         // UCHAR_REGIONAL_INDICATOR=62
132         -1,                         // UCHAR_PREPENDED_CONCATENATION_MARK=63
133         BIT_EXTENDED_PICTOGRAPHIC,  // UCHAR_EXTENDED_PICTOGRAPHIC=64
134         BIT_BASIC_EMOJI,            // UCHAR_BASIC_EMOJI=65
135         -1,                         // UCHAR_EMOJI_KEYCAP_SEQUENCE=66
136         -1,                         // UCHAR_RGI_EMOJI_MODIFIER_SEQUENCE=67
137         -1,                         // UCHAR_RGI_EMOJI_FLAG_SEQUENCE=68
138         -1,                         // UCHAR_RGI_EMOJI_TAG_SEQUENCE=69
139         -1,                         // UCHAR_RGI_EMOJI_ZWJ_SEQUENCE=70
140         BIT_BASIC_EMOJI,            // UCHAR_RGI_EMOJI=71
141     };
142 
hasBinaryProperty(int c, int which)143     public boolean hasBinaryProperty(int c, int which) {
144         if (which < UProperty.EMOJI || UProperty.RGI_EMOJI < which) {
145             return false;
146         }
147         int bit = bitFlags[which - UProperty.EMOJI];
148         if (bit < 0) {
149             return false;  // not a property that we support in this function
150         }
151         int bits = cpTrie.get(c);
152         return ((bits >> bit) & 1) != 0;
153     }
154 
hasBinaryProperty(CharSequence s, int which)155     public boolean hasBinaryProperty(CharSequence s, int which) {
156         int length = s.length();
157         if (length == 0) { return false; }  // empty string
158         // The caller should have delegated single code points to hasBinaryProperty(c, which).
159         if (which < UProperty.BASIC_EMOJI || UProperty.RGI_EMOJI < which) {
160             return false;
161         }
162         int firstProp = which, lastProp = which;
163         if (which == UProperty.RGI_EMOJI) {
164             // RGI_Emoji is the union of the other emoji properties of strings.
165             firstProp = UProperty.BASIC_EMOJI;
166             lastProp = UProperty.RGI_EMOJI_ZWJ_SEQUENCE;
167         }
168         for (int prop = firstProp; prop <= lastProp; ++prop) {
169             String trieUChars = stringTries[prop - UProperty.BASIC_EMOJI];
170             if (trieUChars != null) {
171                 CharsTrie trie = new CharsTrie(trieUChars, 0);
172                 BytesTrie.Result result = trie.next(s, 0, length);
173                 if (result.hasValue()) {
174                     return true;
175                 }
176             }
177         }
178         return false;
179     }
180 
addStrings(int which, UnicodeSet set)181     public void addStrings(int which, UnicodeSet set) {
182         if (which < UProperty.BASIC_EMOJI || UProperty.RGI_EMOJI < which) {
183             return;
184         }
185         int firstProp = which, lastProp = which;
186         if (which == UProperty.RGI_EMOJI) {
187             // RGI_Emoji is the union of the other emoji properties of strings.
188             firstProp = UProperty.BASIC_EMOJI;
189             lastProp = UProperty.RGI_EMOJI_ZWJ_SEQUENCE;
190         }
191         for (int prop = firstProp; prop <= lastProp; ++prop) {
192             String trieUChars = stringTries[prop - UProperty.BASIC_EMOJI];
193             if (trieUChars != null) {
194                 CharsTrie trie = new CharsTrie(trieUChars, 0);
195                 for (CharsTrie.Entry entry : trie) {
196                     set.add(entry.chars);
197                 }
198             }
199         }
200     }
201 }
202