• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2021 and later: Unicode, Inc. and others.
2 // License & terms of use: https://www.unicode.org/copyright.html
3 
4 // emojipropsbuilder.cpp
5 // created: 2021sep03 Markus W. Scherer
6 
7 #include <stdio.h>
8 #include <string.h>
9 #include <set>
10 #include <string>
11 #include "unicode/utypes.h"
12 #include "unicode/uchar.h"
13 #include "unicode/ucharstriebuilder.h"
14 #include "unicode/ucptrie.h"
15 #include "unicode/udata.h"
16 #include "unicode/umutablecptrie.h"
17 #include "unicode/uniset.h"
18 #include "unicode/unistr.h"
19 #include "charstr.h"
20 #include "cmemory.h"
21 #include "emojiprops.h"
22 #include "genprops.h"
23 #include "uassert.h"
24 #include "unewdata.h"
25 #include "uparse.h"
26 
27 /* Emoji properties file format ------------------------------------------------
28 
29 The file format prepared and written here contains several data
30 structures that store indexes or data.
31 
32 Before the data contents described below, there are the headers required by
33 the udata API for loading ICU data. Especially, a UDataInfo structure
34 precedes the actual data. It contains platform properties values and the
35 file format version.
36 
37 The following is a description of format version 1.0 .
38 
39 The file contains the following structures:
40 
41     const int32_t indexes[] with values i0, i1, ...:
42     (see EmojiProps::IX_... constants for names of indexes)
43 
44     The length of the indexes[] array is indexes[IX_CPTRIE_OFFSET]/4;
45 
46     The first 14 indexes are byte offsets in ascending order.
47     Each byte offset marks the start of a part in the data file,
48     and the limit (exclusive end) of the previous one.
49     When two consecutive byte offsets are the same, then the corresponding part is empty.
50     Byte offsets are offsets from after the header, that is, from the beginning of the indexes[].
51     Each part starts at an offset with proper alignment for its data.
52     If necessary, the previous part may include padding bytes to achieve this alignment.
53 
54     i0        offset of cpTrie (and the limit offset of the indexes[] array)
55     i1..i3    reserved, same as the limit offset of the previous part
56     i4        offset of Basic_Emoji string trie
57     i5        offset of Emoji_Keycap_Sequence string trie
58     i6        offset of RGI_Emoji_Modifier_Sequence string trie
59     i7        offset of RGI_Emoji_Flag_Sequence string trie
60     i8        offset of RGI_Emoji_Tag_Sequence string trie
61     i9        offset of RGI_Emoji_ZWJ_Sequence string trie
62     i10..i12  reserved, same as the limit offset of the previous part
63     i13       totalSize -- same as the limit offset of the previous part
64     i14..i15  reserved, 0
65 
66     After the indexes array follows a UCPTrie=CodePointTrie (type=fast, valueWidth=8)
67     "cpTrie" with one bit each for multiple binary properties;
68     see EmojiProps::BIT_... constants.
69 
70     After that follow consecutive, serialized,
71     single-property UCharsTrie=CharsTrie string tries for multiple properties of strings;
72     see EmojiProps::IX_.._TRIE_OFFSET constants.
73 
74     The Basic_Emoji property contains both single code points and multi-character strings.
75     Its data is in both the code point trie and in one of the string tries.
76 
77 ----------------------------------------------------------------------------- */
78 
79 U_NAMESPACE_USE
80 
81 // UDataInfo cf. udata.h
82 static UDataInfo dataInfo={
83     sizeof(UDataInfo),
84     0,
85 
86     U_IS_BIG_ENDIAN,
87     U_CHARSET_FAMILY,
88     U_SIZEOF_UCHAR,
89     0,
90 
91     { u'E', u'm', u'o', u'j' },                 // dataFormat="Emoj"
92     { 1, 0, 0, 0 },                             // formatVersion
93     { 14, 0, 0, 0 }                             // dataVersion
94 };
95 
96 class EmojiPropsBuilder : public PropsBuilder {
97 public:
98     EmojiPropsBuilder(UErrorCode &errorCode);
99     ~EmojiPropsBuilder() override;
100 
101     void setUnicodeVersion(const UVersionInfo version) override;
102     void setProps(const UniProps &, const UnicodeSet &newValues, UErrorCode &errorCode) override;
103     void parseUnidataFiles(const char *unidataPath, UErrorCode &errorCode) override;
104     void build(UErrorCode &errorCode) override;
105     void writeBinaryData(const char *path, UBool withCopyright, UErrorCode &errorCode) override;
106 
107     // visible for C adapter
108     void parsePropsOfStringsLine(char *fields[][2], UErrorCode &errorCode);
109 
110 private:
111     void setBit(UChar32 start, UChar32 end, int32_t shift, bool on, UErrorCode &errorCode);
112     void setBits(UChar32 start, UChar32 end, uint32_t value, uint32_t mask, UErrorCode &errorCode);
113     void parsePropsOfStringsFile(const char *path, UErrorCode &errorCode);
114 
getTrieIndex(int32_t index)115     static int32_t getTrieIndex(int32_t index) {
116         U_ASSERT(TRIE_IX_START <= index);
117         U_ASSERT(index < TRIE_IX_LIMIT);
118         return index - TRIE_IX_START;
119     }
getTrieBuilder(int32_t index)120     UCharsTrieBuilder &getTrieBuilder(int32_t index) {
121         index = getTrieIndex(index);
122         U_ASSERT(trieBuilders[index] != nullptr);
123         return *trieBuilders[index];
124     }
getTrieString(int32_t index)125     UnicodeString &getTrieString(int32_t index) {
126         index = getTrieIndex(index);
127         return trieStrings[index];
128     }
getNumStrings(int32_t index)129     int32_t &getNumStrings(int32_t index) {
130         index = getTrieIndex(index);
131         return numStrings[index];
132     }
133 
134     static constexpr int32_t TRIE_IX_START = EmojiProps::IX_BASIC_EMOJI_TRIE_OFFSET;
135     static constexpr int32_t TRIE_IX_LIMIT = EmojiProps::IX_RESERVED10;
136 
137     UMutableCPTrie *mutableCPTrie = nullptr;
138     UCPTrie *cpTrie = nullptr;
139     std::set<std::string> unrecognized;
140     UCharsTrieBuilder *trieBuilders[TRIE_IX_LIMIT - TRIE_IX_START] = {
141         nullptr, nullptr, nullptr, nullptr, nullptr, nullptr
142     };
143     UnicodeString trieStrings[TRIE_IX_LIMIT - TRIE_IX_START];
144     int32_t numStrings[TRIE_IX_LIMIT - TRIE_IX_START];
145     int32_t indexes[EmojiProps::IX_COUNT] = {
146         0, 0, 0, 0,
147         0, 0, 0, 0,
148         0, 0, 0, 0,
149         0, 0, 0, 0
150     };
151     uint8_t trieBlock[100000];
152     int32_t trieSize = 0;
153 };
154 
EmojiPropsBuilder(UErrorCode & errorCode)155 EmojiPropsBuilder::EmojiPropsBuilder(UErrorCode &errorCode) {
156     mutableCPTrie = umutablecptrie_open(0, 0, &errorCode);
157     if (U_FAILURE(errorCode)) {
158         fprintf(stderr, "genprops/emoji error: umutablecptrie_open() failed: %s\n",
159                 u_errorName(errorCode));
160     }
161     bool isNull = false;
162     for (auto &ptr : trieBuilders) {
163         ptr = new UCharsTrieBuilder(errorCode);
164         if (ptr == nullptr) {
165             isNull = true;
166         }
167     }
168     if (isNull && U_SUCCESS(errorCode)) {
169         errorCode = U_MEMORY_ALLOCATION_ERROR;
170     }
171     if (U_FAILURE(errorCode)) {
172         fprintf(stderr, "genprops/emoji error: new UCharsTrieBuilder() failed: %s\n",
173                 u_errorName(errorCode));
174     }
175     for (auto &num : numStrings) {
176         num = 0;
177     }
178 }
179 
~EmojiPropsBuilder()180 EmojiPropsBuilder::~EmojiPropsBuilder() {
181     umutablecptrie_close(mutableCPTrie);
182     ucptrie_close(cpTrie);
183     for (auto ptr : trieBuilders) {
184         delete ptr;
185     }
186 }
187 
188 void
setUnicodeVersion(const UVersionInfo version)189 EmojiPropsBuilder::setUnicodeVersion(const UVersionInfo version) {
190     uprv_memcpy(dataInfo.dataVersion, version, 4);
191 }
192 
193 namespace {
194 
195 struct PropToBinary {
196     UProperty prop;
197     int32_t shift;
198 };
199 
200 constexpr PropToBinary propToBinaries[] = {
201     { UCHAR_EMOJI,                      EmojiProps::BIT_EMOJI },
202     { UCHAR_EMOJI_PRESENTATION,         EmojiProps::BIT_EMOJI_PRESENTATION },
203     { UCHAR_EMOJI_MODIFIER,             EmojiProps::BIT_EMOJI_MODIFIER },
204     { UCHAR_EMOJI_MODIFIER_BASE,        EmojiProps::BIT_EMOJI_MODIFIER_BASE },
205     { UCHAR_EMOJI_COMPONENT,            EmojiProps::BIT_EMOJI_COMPONENT },
206     { UCHAR_EXTENDED_PICTOGRAPHIC,      EmojiProps::BIT_EXTENDED_PICTOGRAPHIC },
207 };
208 
209 struct PropNameToIndex {
210     const char *propName;
211     int32_t emojiPropsIndex;
212 };
213 
214 constexpr PropNameToIndex propNameToIndex[] = {
215     { "Basic_Emoji",                    EmojiProps::IX_BASIC_EMOJI_TRIE_OFFSET },
216     { "Emoji_Keycap_Sequence",          EmojiProps::IX_EMOJI_KEYCAP_SEQUENCE_TRIE_OFFSET },
217     { "RGI_Emoji_Modifier_Sequence",    EmojiProps::IX_RGI_EMOJI_MODIFIER_SEQUENCE_TRIE_OFFSET },
218     { "RGI_Emoji_Flag_Sequence",        EmojiProps::IX_RGI_EMOJI_FLAG_SEQUENCE_TRIE_OFFSET },
219     { "RGI_Emoji_Tag_Sequence",         EmojiProps::IX_RGI_EMOJI_TAG_SEQUENCE_TRIE_OFFSET },
220     { "RGI_Emoji_ZWJ_Sequence",         EmojiProps::IX_RGI_EMOJI_ZWJ_SEQUENCE_TRIE_OFFSET },
221 };
222 
223 }  // namespace
224 
225 void
setProps(const UniProps & props,const UnicodeSet & newValues,UErrorCode & errorCode)226 EmojiPropsBuilder::setProps(const UniProps &props, const UnicodeSet &newValues,
227                             UErrorCode &errorCode) {
228     if (U_FAILURE(errorCode)) { return; }
229 
230     if (newValues.containsSome(0, UCHAR_BINARY_LIMIT-1)) {
231         for (const auto &p2b : propToBinaries) {
232             U_ASSERT(p2b.shift < 8);
233             if (newValues.contains(p2b.prop)) {
234                 setBit(props.start, props.end, p2b.shift, props.binProps[p2b.prop], errorCode);
235             }
236         }
237     }
238 }
239 
240 void
setBit(UChar32 start,UChar32 end,int32_t shift,bool on,UErrorCode & errorCode)241 EmojiPropsBuilder::setBit(UChar32 start, UChar32 end, int32_t shift, bool on,
242                           UErrorCode &errorCode) {
243     uint32_t mask = U_MASK(shift);
244     uint32_t value = on ? mask : 0;
245     setBits(start, end, value, mask, errorCode);
246 }
247 
248 void
setBits(UChar32 start,UChar32 end,uint32_t value,uint32_t mask,UErrorCode & errorCode)249 EmojiPropsBuilder::setBits(UChar32 start, UChar32 end, uint32_t value, uint32_t mask,
250                            UErrorCode &errorCode) {
251     if (U_FAILURE(errorCode)) { return; }
252 
253     if (start == end) {
254         uint32_t oldValue = umutablecptrie_get(mutableCPTrie, start);
255         uint32_t newValue = (oldValue & ~mask) | value;
256         if (newValue != oldValue) {
257             umutablecptrie_set(mutableCPTrie, start, newValue, &errorCode);
258         }
259         return;
260     }
261     while (start <= end && U_SUCCESS(errorCode)) {
262         uint32_t oldValue;
263         UChar32 rangeEnd = umutablecptrie_getRange(
264             mutableCPTrie, start, UCPMAP_RANGE_NORMAL, 0, nullptr, nullptr, &oldValue);
265         if (rangeEnd > end) {
266             rangeEnd = end;
267         }
268         uint32_t newValue = (oldValue & ~mask) | value;
269         if (newValue != oldValue) {
270             umutablecptrie_setRange(mutableCPTrie, start, rangeEnd, newValue, &errorCode);
271         }
272         start = rangeEnd + 1;
273     }
274 }
275 
276 namespace {
277 
278 void U_CALLCONV
parsePropsOfStringsLineFn(void * context,char * fields[][2],int32_t,UErrorCode * pErrorCode)279 parsePropsOfStringsLineFn(
280         void *context,
281         char *fields[][2], int32_t /* fieldCount */,
282         UErrorCode *pErrorCode) {
283     reinterpret_cast<EmojiPropsBuilder *>(context)->parsePropsOfStringsLine(fields, *pErrorCode);
284 }
285 
286 }  // namespace
287 
288 void
parseUnidataFiles(const char * unidataPath,UErrorCode & errorCode)289 EmojiPropsBuilder::parseUnidataFiles(const char *unidataPath, UErrorCode &errorCode) {
290     CharString path(unidataPath, errorCode);
291     path.ensureEndsWithFileSeparator(errorCode);
292     if (U_FAILURE(errorCode)) { return; }
293     int32_t pathLength = path.length();
294     path.append("emoji-sequences.txt", errorCode);
295     parsePropsOfStringsFile(path.data(), errorCode);
296     if (U_FAILURE(errorCode)) { return; }
297     path.truncate(pathLength);
298     path.append("emoji-zwj-sequences.txt", errorCode);
299     parsePropsOfStringsFile(path.data(), errorCode);
300 
301     if (U_SUCCESS(errorCode) && !unrecognized.empty()) {
302         puts("\n*** genprops/emoji warning: sample of unrecognized property names:");
303         int32_t i = 0;
304         for (const auto &s : unrecognized) {
305             printf("    \"%s\"\n", s.c_str());
306             if (++i == 10) { break; }
307         }
308     }
309 }
310 
311 void
parsePropsOfStringsFile(const char * path,UErrorCode & errorCode)312 EmojiPropsBuilder::parsePropsOfStringsFile(const char *path, UErrorCode &errorCode) {
313     if (U_FAILURE(errorCode)) { return; }
314     char *fields[3][2];
315     u_parseDelimitedFile(path, ';', fields, 3, parsePropsOfStringsLineFn, this, &errorCode);
316 }
317 
parsePropsOfStringsLine(char * fields[][2],UErrorCode & errorCode)318 void EmojiPropsBuilder::parsePropsOfStringsLine(char *fields[][2], UErrorCode &errorCode) {
319     if (U_FAILURE(errorCode)) { return; }
320     // Format:
321     //   code_point(s) ; type_field ; description # comments
322     *fields[1][1] = 0;  // NUL-terminate the name field
323     char *propName = const_cast<char *>(u_skipWhitespace(fields[1][0]));
324     u_rtrim(propName);
325     int32_t index = -1;
326     for (const PropNameToIndex &pn2i : propNameToIndex) {
327         if (strcmp(pn2i.propName, propName) == 0) {
328             index = pn2i.emojiPropsIndex;
329             break;
330         }
331     }
332     if (index < 0) {
333         // not a supported property
334         unrecognized.insert(propName);
335         return;
336     }
337 
338     const char *rangeOrString = fields[0][0];
339     if (strstr(rangeOrString, "..") != nullptr) {
340         // Code point range:
341         // 231A..231B    ; Basic_Emoji                  ; watch
342         if (index != EmojiProps::IX_BASIC_EMOJI_TRIE_OFFSET) {
343             fprintf(stderr,
344                     "genprops/emoji error: single code points %s for %s\n", rangeOrString, propName);
345             errorCode = U_ILLEGAL_ARGUMENT_ERROR;
346             return;
347         }
348         uint32_t start, end;
349         u_parseCodePointRange(rangeOrString, &start, &end, &errorCode);
350         setBit(start, end, EmojiProps::BIT_BASIC_EMOJI, true, errorCode);
351     } else {
352         // Code point or string:
353         // 23F0          ; Basic_Emoji                  ; alarm clock
354         // 23F1 FE0F     ; Basic_Emoji                  ; stopwatch
355         uint32_t first;
356         UChar s[100];
357         int32_t length = u_parseString(rangeOrString, s, UPRV_LENGTHOF(s), &first, &errorCode);
358         if (U_FAILURE(errorCode)) { return; }
359         if (length == 0) {
360             fprintf(stderr,
361                     "genprops/emoji error: empty string on line\n    %s ; %s ; %s\n",
362                     rangeOrString, propName, fields[2][0]);
363             errorCode = U_ILLEGAL_ARGUMENT_ERROR;
364             return;
365         }
366         if (length == U16_LENGTH(first)) {
367             // single code point
368             if (index != EmojiProps::IX_BASIC_EMOJI_TRIE_OFFSET) {
369                 fprintf(stderr,
370                         "genprops/emoji error: single code point %s for %s\n", rangeOrString, propName);
371                 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
372                 return;
373             }
374             setBit(first, first, EmojiProps::BIT_BASIC_EMOJI, true, errorCode);
375         } else {
376             // more than one code point
377             UnicodeString us(false, s, length);
378             getTrieBuilder(index).add(us, 0, errorCode);
379             ++getNumStrings(index);
380         }
381     }
382 }
383 
384 void
build(UErrorCode & errorCode)385 EmojiPropsBuilder::build(UErrorCode &errorCode) {
386     if (U_FAILURE(errorCode)) { return; }
387     cpTrie = umutablecptrie_buildImmutable(
388         mutableCPTrie, UCPTRIE_TYPE_FAST, UCPTRIE_VALUE_BITS_8, &errorCode);
389     if (U_FAILURE(errorCode)) {
390         fprintf(stderr,
391                 "genprops/emoji error: umutablecptrie_buildImmutable() failed: %s\n",
392                 u_errorName(errorCode));
393         return;
394     }
395     trieSize = ucptrie_toBinary(cpTrie, trieBlock, sizeof(trieBlock), &errorCode);
396     if (U_FAILURE(errorCode)) {
397         fprintf(stderr,
398                 "genprops/emoji error: ucptrie_toBinary() failed: %s (length %ld)\n",
399                 u_errorName(errorCode), (long)trieSize);
400         return;
401     }
402     U_ASSERT((trieSize & 3) == 0);  // multiple of 4 bytes
403 
404     for (int32_t index = TRIE_IX_START; index < TRIE_IX_LIMIT; ++index) {
405         if (getNumStrings(index) == 0) {
406             fprintf(stderr, "genprops/emoji error: no strings for property index %d\n", (int)index);
407             errorCode = U_ILLEGAL_ARGUMENT_ERROR;
408             return;
409         }
410         UCharsTrieBuilder &builder = getTrieBuilder(index);
411         UnicodeString &result = getTrieString(index);
412         builder.buildUnicodeString(USTRINGTRIE_BUILD_SMALL, result, errorCode);
413         if (U_FAILURE(errorCode)) {
414             fprintf(stderr,
415                     "genprops/emoji error: UCharsTrieBuilder[%d].buildUnicodeString() failed: %s\n",
416                     (int)index, u_errorName(errorCode));
417             return;
418         }
419     }
420 
421     // Set indexes.
422     int32_t length = sizeof(indexes);
423     U_ASSERT(length == EmojiProps::IX_COUNT * 4);
424     int32_t offset = length;
425     indexes[EmojiProps::IX_CPTRIE_OFFSET] = offset;
426     if (!beQuiet) {
427         puts("* uemoji.icu stats *");
428         printf("UCPTrie size in bytes:                                 %5u\n", (int)trieSize);
429     }
430     offset += trieSize;
431 
432     indexes[EmojiProps::IX_RESERVED1] = offset;
433     indexes[EmojiProps::IX_RESERVED2] = offset;
434     indexes[EmojiProps::IX_RESERVED3] = offset;
435 
436     int32_t index = EmojiProps::IX_BASIC_EMOJI_TRIE_OFFSET;
437     indexes[index] = offset;
438     length = getTrieString(index).length() * 2;
439     if (!beQuiet) {
440         printf("UCharsTrie size in bytes: Basic_Emoji                  %5u  num strings: %5u\n",
441                (int)length, (int)getNumStrings(index));
442     }
443     offset += length;
444 
445     index = EmojiProps::IX_EMOJI_KEYCAP_SEQUENCE_TRIE_OFFSET;
446     indexes[index] = offset;
447     length = getTrieString(index).length() * 2;
448     if (!beQuiet) {
449         printf("UCharsTrie size in bytes: Emoji_Keycap_Sequence        %5u  num strings: %5u\n",
450                (int)length, (int)getNumStrings(index));
451     }
452     offset += length;
453 
454     index = EmojiProps::IX_RGI_EMOJI_MODIFIER_SEQUENCE_TRIE_OFFSET;
455     indexes[index] = offset;
456     length = getTrieString(index).length() * 2;
457     if (!beQuiet) {
458         printf("UCharsTrie size in bytes: RGI_Emoji_Modifier_Sequence  %5u  num strings: %5u\n",
459                (int)length, (int)getNumStrings(index));
460     }
461     offset += length;
462 
463     index = EmojiProps::IX_RGI_EMOJI_FLAG_SEQUENCE_TRIE_OFFSET;
464     indexes[index] = offset;
465     length = getTrieString(index).length() * 2;
466     if (!beQuiet) {
467         printf("UCharsTrie size in bytes: RGI_Emoji_Flag_Sequence      %5u  num strings: %5u\n",
468                (int)length, (int)getNumStrings(index));
469     }
470     offset += length;
471 
472     index = EmojiProps::IX_RGI_EMOJI_TAG_SEQUENCE_TRIE_OFFSET;
473     indexes[index] = offset;
474     length = getTrieString(index).length() * 2;
475     if (!beQuiet) {
476         printf("UCharsTrie size in bytes: RGI_Emoji_Tag_Sequence       %5u  num strings: %5u\n",
477                (int)length, (int)getNumStrings(index));
478     }
479     offset += length;
480 
481     index = EmojiProps::IX_RGI_EMOJI_ZWJ_SEQUENCE_TRIE_OFFSET;
482     indexes[index] = offset;
483     length = getTrieString(index).length() * 2;
484     if (!beQuiet) {
485         printf("UCharsTrie size in bytes: RGI_Emoji_ZWJ_Sequence       %5u  num strings: %5u\n",
486                (int)length, (int)getNumStrings(index));
487     }
488     offset += length;
489 
490     indexes[EmojiProps::IX_RESERVED10] = offset;
491     indexes[EmojiProps::IX_RESERVED11] = offset;
492     indexes[EmojiProps::IX_RESERVED12] = offset;
493     indexes[EmojiProps::IX_TOTAL_SIZE] = offset;
494 
495     if (!beQuiet) {
496         printf("data size:                                            %6ld\n", (long)offset);
497     }
498 }
499 
500 namespace {
501 
writeTrieBlock(UNewDataMemory * pData,const UnicodeString & s)502 void writeTrieBlock(UNewDataMemory *pData, const UnicodeString &s) {
503     udata_writeBlock(pData, s.getBuffer(), s.length() * 2);
504 }
505 
506 }  // namespace
507 
508 void
writeBinaryData(const char * path,UBool withCopyright,UErrorCode & errorCode)509 EmojiPropsBuilder::writeBinaryData(const char *path, UBool withCopyright, UErrorCode &errorCode) {
510     if (U_FAILURE(errorCode)) { return; }
511 
512     UNewDataMemory *pData = udata_create(path, "icu", "uemoji", &dataInfo,
513                                          withCopyright ? U_COPYRIGHT_STRING : nullptr, &errorCode);
514     if (U_FAILURE(errorCode)) {
515         fprintf(stderr, "genprops/emoji error: udata_create(%s, uemoji.icu) failed: %s\n",
516                 path, u_errorName(errorCode));
517         return;
518     }
519 
520     udata_writeBlock(pData, indexes, sizeof(indexes));
521     udata_writeBlock(pData, trieBlock, trieSize);
522     writeTrieBlock(pData, getTrieString(EmojiProps::IX_BASIC_EMOJI_TRIE_OFFSET));
523     writeTrieBlock(pData, getTrieString(EmojiProps::IX_EMOJI_KEYCAP_SEQUENCE_TRIE_OFFSET));
524     writeTrieBlock(pData, getTrieString(EmojiProps::IX_RGI_EMOJI_MODIFIER_SEQUENCE_TRIE_OFFSET));
525     writeTrieBlock(pData, getTrieString(EmojiProps::IX_RGI_EMOJI_FLAG_SEQUENCE_TRIE_OFFSET));
526     writeTrieBlock(pData, getTrieString(EmojiProps::IX_RGI_EMOJI_TAG_SEQUENCE_TRIE_OFFSET));
527     writeTrieBlock(pData, getTrieString(EmojiProps::IX_RGI_EMOJI_ZWJ_SEQUENCE_TRIE_OFFSET));
528 
529     long dataLength = udata_finish(pData, &errorCode);
530     if (U_FAILURE(errorCode)) {
531         fprintf(stderr,
532                 "genprops/emoji error: error %s writing the output file\n",
533                 u_errorName(errorCode));
534         return;
535     }
536 
537     int32_t totalSize = indexes[EmojiProps::IX_TOTAL_SIZE];
538     if (dataLength != (long)totalSize) {
539         fprintf(stderr,
540                 "udata_finish(uemoji.icu) reports %ld bytes written but should be %ld\n",
541                 dataLength, (long)totalSize);
542         errorCode = U_INTERNAL_PROGRAM_ERROR;
543     }
544 }
545 
546 PropsBuilder *
createEmojiPropsBuilder(UErrorCode & errorCode)547 createEmojiPropsBuilder(UErrorCode &errorCode) {
548     if (U_FAILURE(errorCode)) { return nullptr; }
549     PropsBuilder *pb = new EmojiPropsBuilder(errorCode);
550     if (pb == nullptr) {
551         errorCode = U_MEMORY_ALLOCATION_ERROR;
552     }
553     return pb;
554 }
555 
556 /*
557  * Hey, Emacs, please set the following:
558  *
559  * Local Variables:
560  * indent-tabs-mode: nil
561  * End:
562  *
563  */
564