1 // © 2021 and later: Unicode, Inc. and others.
2 // License & terms of use: https://www.unicode.org/copyright.html
3
4 // emojipropsbuilder.cpp
5 // created: 2021sep03 Markus W. Scherer
6
7 #include <stdio.h>
8 #include <string.h>
9 #include <set>
10 #include <string>
11 #include "unicode/utypes.h"
12 #include "unicode/uchar.h"
13 #include "unicode/ucharstriebuilder.h"
14 #include "unicode/ucptrie.h"
15 #include "unicode/udata.h"
16 #include "unicode/umutablecptrie.h"
17 #include "unicode/uniset.h"
18 #include "unicode/unistr.h"
19 #include "charstr.h"
20 #include "cmemory.h"
21 #include "emojiprops.h"
22 #include "genprops.h"
23 #include "uassert.h"
24 #include "unewdata.h"
25 #include "uparse.h"
26
27 /* Emoji properties file format ------------------------------------------------
28
29 The file format prepared and written here contains several data
30 structures that store indexes or data.
31
32 Before the data contents described below, there are the headers required by
33 the udata API for loading ICU data. Especially, a UDataInfo structure
34 precedes the actual data. It contains platform properties values and the
35 file format version.
36
37 The following is a description of format version 1.0 .
38
39 The file contains the following structures:
40
41 const int32_t indexes[] with values i0, i1, ...:
42 (see EmojiProps::IX_... constants for names of indexes)
43
44 The length of the indexes[] array is indexes[IX_CPTRIE_OFFSET]/4;
45
46 The first 14 indexes are byte offsets in ascending order.
47 Each byte offset marks the start of a part in the data file,
48 and the limit (exclusive end) of the previous one.
49 When two consecutive byte offsets are the same, then the corresponding part is empty.
50 Byte offsets are offsets from after the header, that is, from the beginning of the indexes[].
51 Each part starts at an offset with proper alignment for its data.
52 If necessary, the previous part may include padding bytes to achieve this alignment.
53
54 i0 offset of cpTrie (and the limit offset of the indexes[] array)
55 i1..i3 reserved, same as the limit offset of the previous part
56 i4 offset of Basic_Emoji string trie
57 i5 offset of Emoji_Keycap_Sequence string trie
58 i6 offset of RGI_Emoji_Modifier_Sequence string trie
59 i7 offset of RGI_Emoji_Flag_Sequence string trie
60 i8 offset of RGI_Emoji_Tag_Sequence string trie
61 i9 offset of RGI_Emoji_ZWJ_Sequence string trie
62 i10..i12 reserved, same as the limit offset of the previous part
63 i13 totalSize -- same as the limit offset of the previous part
64 i14..i15 reserved, 0
65
66 After the indexes array follows a UCPTrie=CodePointTrie (type=fast, valueWidth=8)
67 "cpTrie" with one bit each for multiple binary properties;
68 see EmojiProps::BIT_... constants.
69
70 After that follow consecutive, serialized,
71 single-property UCharsTrie=CharsTrie string tries for multiple properties of strings;
72 see EmojiProps::IX_.._TRIE_OFFSET constants.
73
74 The Basic_Emoji property contains both single code points and multi-character strings.
75 Its data is in both the code point trie and in one of the string tries.
76
77 ----------------------------------------------------------------------------- */
78
79 U_NAMESPACE_USE
80
81 // UDataInfo cf. udata.h
82 static UDataInfo dataInfo={
83 sizeof(UDataInfo),
84 0,
85
86 U_IS_BIG_ENDIAN,
87 U_CHARSET_FAMILY,
88 U_SIZEOF_UCHAR,
89 0,
90
91 { u'E', u'm', u'o', u'j' }, // dataFormat="Emoj"
92 { 1, 0, 0, 0 }, // formatVersion
93 { 14, 0, 0, 0 } // dataVersion
94 };
95
96 class EmojiPropsBuilder : public PropsBuilder {
97 public:
98 EmojiPropsBuilder(UErrorCode &errorCode);
99 ~EmojiPropsBuilder() override;
100
101 void setUnicodeVersion(const UVersionInfo version) override;
102 void setProps(const UniProps &, const UnicodeSet &newValues, UErrorCode &errorCode) override;
103 void parseUnidataFiles(const char *unidataPath, UErrorCode &errorCode) override;
104 void build(UErrorCode &errorCode) override;
105 void writeBinaryData(const char *path, UBool withCopyright, UErrorCode &errorCode) override;
106
107 // visible for C adapter
108 void parsePropsOfStringsLine(char *fields[][2], UErrorCode &errorCode);
109
110 private:
111 void setBit(UChar32 start, UChar32 end, int32_t shift, bool on, UErrorCode &errorCode);
112 void setBits(UChar32 start, UChar32 end, uint32_t value, uint32_t mask, UErrorCode &errorCode);
113 void parsePropsOfStringsFile(const char *path, UErrorCode &errorCode);
114
getTrieIndex(int32_t index)115 static int32_t getTrieIndex(int32_t index) {
116 U_ASSERT(TRIE_IX_START <= index);
117 U_ASSERT(index < TRIE_IX_LIMIT);
118 return index - TRIE_IX_START;
119 }
getTrieBuilder(int32_t index)120 UCharsTrieBuilder &getTrieBuilder(int32_t index) {
121 index = getTrieIndex(index);
122 U_ASSERT(trieBuilders[index] != nullptr);
123 return *trieBuilders[index];
124 }
getTrieString(int32_t index)125 UnicodeString &getTrieString(int32_t index) {
126 index = getTrieIndex(index);
127 return trieStrings[index];
128 }
getNumStrings(int32_t index)129 int32_t &getNumStrings(int32_t index) {
130 index = getTrieIndex(index);
131 return numStrings[index];
132 }
133
134 static constexpr int32_t TRIE_IX_START = EmojiProps::IX_BASIC_EMOJI_TRIE_OFFSET;
135 static constexpr int32_t TRIE_IX_LIMIT = EmojiProps::IX_RESERVED10;
136
137 UMutableCPTrie *mutableCPTrie = nullptr;
138 UCPTrie *cpTrie = nullptr;
139 std::set<std::string> unrecognized;
140 UCharsTrieBuilder *trieBuilders[TRIE_IX_LIMIT - TRIE_IX_START] = {
141 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr
142 };
143 UnicodeString trieStrings[TRIE_IX_LIMIT - TRIE_IX_START];
144 int32_t numStrings[TRIE_IX_LIMIT - TRIE_IX_START];
145 int32_t indexes[EmojiProps::IX_COUNT] = {
146 0, 0, 0, 0,
147 0, 0, 0, 0,
148 0, 0, 0, 0,
149 0, 0, 0, 0
150 };
151 uint8_t trieBlock[100000];
152 int32_t trieSize = 0;
153 };
154
EmojiPropsBuilder(UErrorCode & errorCode)155 EmojiPropsBuilder::EmojiPropsBuilder(UErrorCode &errorCode) {
156 mutableCPTrie = umutablecptrie_open(0, 0, &errorCode);
157 if (U_FAILURE(errorCode)) {
158 fprintf(stderr, "genprops/emoji error: umutablecptrie_open() failed: %s\n",
159 u_errorName(errorCode));
160 }
161 bool isNull = false;
162 for (auto &ptr : trieBuilders) {
163 ptr = new UCharsTrieBuilder(errorCode);
164 if (ptr == nullptr) {
165 isNull = true;
166 }
167 }
168 if (isNull && U_SUCCESS(errorCode)) {
169 errorCode = U_MEMORY_ALLOCATION_ERROR;
170 }
171 if (U_FAILURE(errorCode)) {
172 fprintf(stderr, "genprops/emoji error: new UCharsTrieBuilder() failed: %s\n",
173 u_errorName(errorCode));
174 }
175 for (auto &num : numStrings) {
176 num = 0;
177 }
178 }
179
~EmojiPropsBuilder()180 EmojiPropsBuilder::~EmojiPropsBuilder() {
181 umutablecptrie_close(mutableCPTrie);
182 ucptrie_close(cpTrie);
183 for (auto ptr : trieBuilders) {
184 delete ptr;
185 }
186 }
187
188 void
setUnicodeVersion(const UVersionInfo version)189 EmojiPropsBuilder::setUnicodeVersion(const UVersionInfo version) {
190 uprv_memcpy(dataInfo.dataVersion, version, 4);
191 }
192
193 namespace {
194
195 struct PropToBinary {
196 UProperty prop;
197 int32_t shift;
198 };
199
200 constexpr PropToBinary propToBinaries[] = {
201 { UCHAR_EMOJI, EmojiProps::BIT_EMOJI },
202 { UCHAR_EMOJI_PRESENTATION, EmojiProps::BIT_EMOJI_PRESENTATION },
203 { UCHAR_EMOJI_MODIFIER, EmojiProps::BIT_EMOJI_MODIFIER },
204 { UCHAR_EMOJI_MODIFIER_BASE, EmojiProps::BIT_EMOJI_MODIFIER_BASE },
205 { UCHAR_EMOJI_COMPONENT, EmojiProps::BIT_EMOJI_COMPONENT },
206 { UCHAR_EXTENDED_PICTOGRAPHIC, EmojiProps::BIT_EXTENDED_PICTOGRAPHIC },
207 };
208
209 struct PropNameToIndex {
210 const char *propName;
211 int32_t emojiPropsIndex;
212 };
213
214 constexpr PropNameToIndex propNameToIndex[] = {
215 { "Basic_Emoji", EmojiProps::IX_BASIC_EMOJI_TRIE_OFFSET },
216 { "Emoji_Keycap_Sequence", EmojiProps::IX_EMOJI_KEYCAP_SEQUENCE_TRIE_OFFSET },
217 { "RGI_Emoji_Modifier_Sequence", EmojiProps::IX_RGI_EMOJI_MODIFIER_SEQUENCE_TRIE_OFFSET },
218 { "RGI_Emoji_Flag_Sequence", EmojiProps::IX_RGI_EMOJI_FLAG_SEQUENCE_TRIE_OFFSET },
219 { "RGI_Emoji_Tag_Sequence", EmojiProps::IX_RGI_EMOJI_TAG_SEQUENCE_TRIE_OFFSET },
220 { "RGI_Emoji_ZWJ_Sequence", EmojiProps::IX_RGI_EMOJI_ZWJ_SEQUENCE_TRIE_OFFSET },
221 };
222
223 } // namespace
224
225 void
setProps(const UniProps & props,const UnicodeSet & newValues,UErrorCode & errorCode)226 EmojiPropsBuilder::setProps(const UniProps &props, const UnicodeSet &newValues,
227 UErrorCode &errorCode) {
228 if (U_FAILURE(errorCode)) { return; }
229
230 if (newValues.containsSome(0, UCHAR_BINARY_LIMIT-1)) {
231 for (const auto &p2b : propToBinaries) {
232 U_ASSERT(p2b.shift < 8);
233 if (newValues.contains(p2b.prop)) {
234 setBit(props.start, props.end, p2b.shift, props.binProps[p2b.prop], errorCode);
235 }
236 }
237 }
238 }
239
240 void
setBit(UChar32 start,UChar32 end,int32_t shift,bool on,UErrorCode & errorCode)241 EmojiPropsBuilder::setBit(UChar32 start, UChar32 end, int32_t shift, bool on,
242 UErrorCode &errorCode) {
243 uint32_t mask = U_MASK(shift);
244 uint32_t value = on ? mask : 0;
245 setBits(start, end, value, mask, errorCode);
246 }
247
248 void
setBits(UChar32 start,UChar32 end,uint32_t value,uint32_t mask,UErrorCode & errorCode)249 EmojiPropsBuilder::setBits(UChar32 start, UChar32 end, uint32_t value, uint32_t mask,
250 UErrorCode &errorCode) {
251 if (U_FAILURE(errorCode)) { return; }
252
253 if (start == end) {
254 uint32_t oldValue = umutablecptrie_get(mutableCPTrie, start);
255 uint32_t newValue = (oldValue & ~mask) | value;
256 if (newValue != oldValue) {
257 umutablecptrie_set(mutableCPTrie, start, newValue, &errorCode);
258 }
259 return;
260 }
261 while (start <= end && U_SUCCESS(errorCode)) {
262 uint32_t oldValue;
263 UChar32 rangeEnd = umutablecptrie_getRange(
264 mutableCPTrie, start, UCPMAP_RANGE_NORMAL, 0, nullptr, nullptr, &oldValue);
265 if (rangeEnd > end) {
266 rangeEnd = end;
267 }
268 uint32_t newValue = (oldValue & ~mask) | value;
269 if (newValue != oldValue) {
270 umutablecptrie_setRange(mutableCPTrie, start, rangeEnd, newValue, &errorCode);
271 }
272 start = rangeEnd + 1;
273 }
274 }
275
276 namespace {
277
278 void U_CALLCONV
parsePropsOfStringsLineFn(void * context,char * fields[][2],int32_t,UErrorCode * pErrorCode)279 parsePropsOfStringsLineFn(
280 void *context,
281 char *fields[][2], int32_t /* fieldCount */,
282 UErrorCode *pErrorCode) {
283 reinterpret_cast<EmojiPropsBuilder *>(context)->parsePropsOfStringsLine(fields, *pErrorCode);
284 }
285
286 } // namespace
287
288 void
parseUnidataFiles(const char * unidataPath,UErrorCode & errorCode)289 EmojiPropsBuilder::parseUnidataFiles(const char *unidataPath, UErrorCode &errorCode) {
290 CharString path(unidataPath, errorCode);
291 path.ensureEndsWithFileSeparator(errorCode);
292 if (U_FAILURE(errorCode)) { return; }
293 int32_t pathLength = path.length();
294 path.append("emoji-sequences.txt", errorCode);
295 parsePropsOfStringsFile(path.data(), errorCode);
296 if (U_FAILURE(errorCode)) { return; }
297 path.truncate(pathLength);
298 path.append("emoji-zwj-sequences.txt", errorCode);
299 parsePropsOfStringsFile(path.data(), errorCode);
300
301 if (U_SUCCESS(errorCode) && !unrecognized.empty()) {
302 puts("\n*** genprops/emoji warning: sample of unrecognized property names:");
303 int32_t i = 0;
304 for (const auto &s : unrecognized) {
305 printf(" \"%s\"\n", s.c_str());
306 if (++i == 10) { break; }
307 }
308 }
309 }
310
311 void
parsePropsOfStringsFile(const char * path,UErrorCode & errorCode)312 EmojiPropsBuilder::parsePropsOfStringsFile(const char *path, UErrorCode &errorCode) {
313 if (U_FAILURE(errorCode)) { return; }
314 char *fields[3][2];
315 u_parseDelimitedFile(path, ';', fields, 3, parsePropsOfStringsLineFn, this, &errorCode);
316 }
317
parsePropsOfStringsLine(char * fields[][2],UErrorCode & errorCode)318 void EmojiPropsBuilder::parsePropsOfStringsLine(char *fields[][2], UErrorCode &errorCode) {
319 if (U_FAILURE(errorCode)) { return; }
320 // Format:
321 // code_point(s) ; type_field ; description # comments
322 *fields[1][1] = 0; // NUL-terminate the name field
323 char *propName = const_cast<char *>(u_skipWhitespace(fields[1][0]));
324 u_rtrim(propName);
325 int32_t index = -1;
326 for (const PropNameToIndex &pn2i : propNameToIndex) {
327 if (strcmp(pn2i.propName, propName) == 0) {
328 index = pn2i.emojiPropsIndex;
329 break;
330 }
331 }
332 if (index < 0) {
333 // not a supported property
334 unrecognized.insert(propName);
335 return;
336 }
337
338 const char *rangeOrString = fields[0][0];
339 if (strstr(rangeOrString, "..") != nullptr) {
340 // Code point range:
341 // 231A..231B ; Basic_Emoji ; watch
342 if (index != EmojiProps::IX_BASIC_EMOJI_TRIE_OFFSET) {
343 fprintf(stderr,
344 "genprops/emoji error: single code points %s for %s\n", rangeOrString, propName);
345 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
346 return;
347 }
348 uint32_t start, end;
349 u_parseCodePointRange(rangeOrString, &start, &end, &errorCode);
350 setBit(start, end, EmojiProps::BIT_BASIC_EMOJI, true, errorCode);
351 } else {
352 // Code point or string:
353 // 23F0 ; Basic_Emoji ; alarm clock
354 // 23F1 FE0F ; Basic_Emoji ; stopwatch
355 uint32_t first;
356 UChar s[100];
357 int32_t length = u_parseString(rangeOrString, s, UPRV_LENGTHOF(s), &first, &errorCode);
358 if (U_FAILURE(errorCode)) { return; }
359 if (length == 0) {
360 fprintf(stderr,
361 "genprops/emoji error: empty string on line\n %s ; %s ; %s\n",
362 rangeOrString, propName, fields[2][0]);
363 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
364 return;
365 }
366 if (length == U16_LENGTH(first)) {
367 // single code point
368 if (index != EmojiProps::IX_BASIC_EMOJI_TRIE_OFFSET) {
369 fprintf(stderr,
370 "genprops/emoji error: single code point %s for %s\n", rangeOrString, propName);
371 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
372 return;
373 }
374 setBit(first, first, EmojiProps::BIT_BASIC_EMOJI, true, errorCode);
375 } else {
376 // more than one code point
377 UnicodeString us(false, s, length);
378 getTrieBuilder(index).add(us, 0, errorCode);
379 ++getNumStrings(index);
380 }
381 }
382 }
383
384 void
build(UErrorCode & errorCode)385 EmojiPropsBuilder::build(UErrorCode &errorCode) {
386 if (U_FAILURE(errorCode)) { return; }
387 cpTrie = umutablecptrie_buildImmutable(
388 mutableCPTrie, UCPTRIE_TYPE_FAST, UCPTRIE_VALUE_BITS_8, &errorCode);
389 if (U_FAILURE(errorCode)) {
390 fprintf(stderr,
391 "genprops/emoji error: umutablecptrie_buildImmutable() failed: %s\n",
392 u_errorName(errorCode));
393 return;
394 }
395 trieSize = ucptrie_toBinary(cpTrie, trieBlock, sizeof(trieBlock), &errorCode);
396 if (U_FAILURE(errorCode)) {
397 fprintf(stderr,
398 "genprops/emoji error: ucptrie_toBinary() failed: %s (length %ld)\n",
399 u_errorName(errorCode), (long)trieSize);
400 return;
401 }
402 U_ASSERT((trieSize & 3) == 0); // multiple of 4 bytes
403
404 for (int32_t index = TRIE_IX_START; index < TRIE_IX_LIMIT; ++index) {
405 if (getNumStrings(index) == 0) {
406 fprintf(stderr, "genprops/emoji error: no strings for property index %d\n", (int)index);
407 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
408 return;
409 }
410 UCharsTrieBuilder &builder = getTrieBuilder(index);
411 UnicodeString &result = getTrieString(index);
412 builder.buildUnicodeString(USTRINGTRIE_BUILD_SMALL, result, errorCode);
413 if (U_FAILURE(errorCode)) {
414 fprintf(stderr,
415 "genprops/emoji error: UCharsTrieBuilder[%d].buildUnicodeString() failed: %s\n",
416 (int)index, u_errorName(errorCode));
417 return;
418 }
419 }
420
421 // Set indexes.
422 int32_t length = sizeof(indexes);
423 U_ASSERT(length == EmojiProps::IX_COUNT * 4);
424 int32_t offset = length;
425 indexes[EmojiProps::IX_CPTRIE_OFFSET] = offset;
426 if (!beQuiet) {
427 puts("* uemoji.icu stats *");
428 printf("UCPTrie size in bytes: %5u\n", (int)trieSize);
429 }
430 offset += trieSize;
431
432 indexes[EmojiProps::IX_RESERVED1] = offset;
433 indexes[EmojiProps::IX_RESERVED2] = offset;
434 indexes[EmojiProps::IX_RESERVED3] = offset;
435
436 int32_t index = EmojiProps::IX_BASIC_EMOJI_TRIE_OFFSET;
437 indexes[index] = offset;
438 length = getTrieString(index).length() * 2;
439 if (!beQuiet) {
440 printf("UCharsTrie size in bytes: Basic_Emoji %5u num strings: %5u\n",
441 (int)length, (int)getNumStrings(index));
442 }
443 offset += length;
444
445 index = EmojiProps::IX_EMOJI_KEYCAP_SEQUENCE_TRIE_OFFSET;
446 indexes[index] = offset;
447 length = getTrieString(index).length() * 2;
448 if (!beQuiet) {
449 printf("UCharsTrie size in bytes: Emoji_Keycap_Sequence %5u num strings: %5u\n",
450 (int)length, (int)getNumStrings(index));
451 }
452 offset += length;
453
454 index = EmojiProps::IX_RGI_EMOJI_MODIFIER_SEQUENCE_TRIE_OFFSET;
455 indexes[index] = offset;
456 length = getTrieString(index).length() * 2;
457 if (!beQuiet) {
458 printf("UCharsTrie size in bytes: RGI_Emoji_Modifier_Sequence %5u num strings: %5u\n",
459 (int)length, (int)getNumStrings(index));
460 }
461 offset += length;
462
463 index = EmojiProps::IX_RGI_EMOJI_FLAG_SEQUENCE_TRIE_OFFSET;
464 indexes[index] = offset;
465 length = getTrieString(index).length() * 2;
466 if (!beQuiet) {
467 printf("UCharsTrie size in bytes: RGI_Emoji_Flag_Sequence %5u num strings: %5u\n",
468 (int)length, (int)getNumStrings(index));
469 }
470 offset += length;
471
472 index = EmojiProps::IX_RGI_EMOJI_TAG_SEQUENCE_TRIE_OFFSET;
473 indexes[index] = offset;
474 length = getTrieString(index).length() * 2;
475 if (!beQuiet) {
476 printf("UCharsTrie size in bytes: RGI_Emoji_Tag_Sequence %5u num strings: %5u\n",
477 (int)length, (int)getNumStrings(index));
478 }
479 offset += length;
480
481 index = EmojiProps::IX_RGI_EMOJI_ZWJ_SEQUENCE_TRIE_OFFSET;
482 indexes[index] = offset;
483 length = getTrieString(index).length() * 2;
484 if (!beQuiet) {
485 printf("UCharsTrie size in bytes: RGI_Emoji_ZWJ_Sequence %5u num strings: %5u\n",
486 (int)length, (int)getNumStrings(index));
487 }
488 offset += length;
489
490 indexes[EmojiProps::IX_RESERVED10] = offset;
491 indexes[EmojiProps::IX_RESERVED11] = offset;
492 indexes[EmojiProps::IX_RESERVED12] = offset;
493 indexes[EmojiProps::IX_TOTAL_SIZE] = offset;
494
495 if (!beQuiet) {
496 printf("data size: %6ld\n", (long)offset);
497 }
498 }
499
500 namespace {
501
writeTrieBlock(UNewDataMemory * pData,const UnicodeString & s)502 void writeTrieBlock(UNewDataMemory *pData, const UnicodeString &s) {
503 udata_writeBlock(pData, s.getBuffer(), s.length() * 2);
504 }
505
506 } // namespace
507
508 void
writeBinaryData(const char * path,UBool withCopyright,UErrorCode & errorCode)509 EmojiPropsBuilder::writeBinaryData(const char *path, UBool withCopyright, UErrorCode &errorCode) {
510 if (U_FAILURE(errorCode)) { return; }
511
512 UNewDataMemory *pData = udata_create(path, "icu", "uemoji", &dataInfo,
513 withCopyright ? U_COPYRIGHT_STRING : nullptr, &errorCode);
514 if (U_FAILURE(errorCode)) {
515 fprintf(stderr, "genprops/emoji error: udata_create(%s, uemoji.icu) failed: %s\n",
516 path, u_errorName(errorCode));
517 return;
518 }
519
520 udata_writeBlock(pData, indexes, sizeof(indexes));
521 udata_writeBlock(pData, trieBlock, trieSize);
522 writeTrieBlock(pData, getTrieString(EmojiProps::IX_BASIC_EMOJI_TRIE_OFFSET));
523 writeTrieBlock(pData, getTrieString(EmojiProps::IX_EMOJI_KEYCAP_SEQUENCE_TRIE_OFFSET));
524 writeTrieBlock(pData, getTrieString(EmojiProps::IX_RGI_EMOJI_MODIFIER_SEQUENCE_TRIE_OFFSET));
525 writeTrieBlock(pData, getTrieString(EmojiProps::IX_RGI_EMOJI_FLAG_SEQUENCE_TRIE_OFFSET));
526 writeTrieBlock(pData, getTrieString(EmojiProps::IX_RGI_EMOJI_TAG_SEQUENCE_TRIE_OFFSET));
527 writeTrieBlock(pData, getTrieString(EmojiProps::IX_RGI_EMOJI_ZWJ_SEQUENCE_TRIE_OFFSET));
528
529 long dataLength = udata_finish(pData, &errorCode);
530 if (U_FAILURE(errorCode)) {
531 fprintf(stderr,
532 "genprops/emoji error: error %s writing the output file\n",
533 u_errorName(errorCode));
534 return;
535 }
536
537 int32_t totalSize = indexes[EmojiProps::IX_TOTAL_SIZE];
538 if (dataLength != (long)totalSize) {
539 fprintf(stderr,
540 "udata_finish(uemoji.icu) reports %ld bytes written but should be %ld\n",
541 dataLength, (long)totalSize);
542 errorCode = U_INTERNAL_PROGRAM_ERROR;
543 }
544 }
545
546 PropsBuilder *
createEmojiPropsBuilder(UErrorCode & errorCode)547 createEmojiPropsBuilder(UErrorCode &errorCode) {
548 if (U_FAILURE(errorCode)) { return nullptr; }
549 PropsBuilder *pb = new EmojiPropsBuilder(errorCode);
550 if (pb == nullptr) {
551 errorCode = U_MEMORY_ALLOCATION_ERROR;
552 }
553 return pb;
554 }
555
556 /*
557 * Hey, Emacs, please set the following:
558 *
559 * Local Variables:
560 * indent-tabs-mode: nil
561 * End:
562 *
563 */
564