• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2017 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 *   Copyright (C) 2004-2016, International Business Machines
7 *   Corporation and others.  All Rights Reserved.
8 *
9 *******************************************************************************
10 *   file name:  casepropsbuilder.cpp (was gencase/store.c)
11 *   encoding:   US-ASCII
12 *   tab size:   8 (not used)
13 *   indentation:4
14 *
15 *   created on: 2004aug28
16 *   created by: Markus W. Scherer
17 *
18 *   Store Unicode case mapping properties efficiently for
19 *   random access.
20 */
21 
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include "unicode/utypes.h"
25 #include "unicode/localpointer.h"
26 #include "unicode/uchar.h"
27 #include "unicode/udata.h"
28 #include "unicode/uniset.h"
29 #include "unicode/usetiter.h"
30 #include "unicode/ustring.h"
31 #include "cmemory.h"
32 #include "cstring.h"
33 #include "genprops.h"
34 #include "ppucd.h"
35 #include "uassert.h"
36 #include "uarrsort.h"
37 #include "ucase.h"
38 #include "unewdata.h"
39 #include "utrie2.h"
40 #include "writesrc.h"
41 
42 /* Unicode case mapping properties file format ---------------------------------
43 
44 The file format prepared and written here contains several data
45 structures that store indexes or data.
46 
47 Before the data contents described below, there are the headers required by
48 the udata API for loading ICU data. Especially, a UDataInfo structure
49 precedes the actual data. It contains platform properties values and the
50 file format version.
51 
52 The following is a description of format version 4.0 .
53 
54 Format version 1.1 adds data for case closure.
55 
56 Format version 1.2 adds an exception bit for case-ignorable. Needed because
57 the Cased and Case_Ignorable properties are not disjoint.
58 
59 Format version 2.0 changes from UTrie to UTrie2.
60 
61 Format version 3.0 (ICU 49) shuffles the trie bits to simplify some builder and runtime code.
62 It moves the Case_Ignorable flag from sometimes-trie-bit 6, sometimes-exception-bit 11
63 to always-trie-bit 2 and adjusts the higher trie bits accordingly.
64 Exception index reduced from 12 bits to 11, simple case mapping delta reduced from 10 bits to 9.
65 
66 Format version 4.0 (ICU 62) swaps trie data bits 3 and 4, exception vs. case-sensitive,
67 and when exception=1 then data bits 15..4 (not 15..5) are used for the exception index,
68 and the case-sensitive bit is moved into the excWord. This will allow for more exceptions words.
69 Also, an additional optional exception slot is used for a 16-bit delta,
70 with one more excWord bit if the delta is actually negative,
71 for a reasonably compact, and compressible, encoding of simple case mappings
72 between distant blocks for Cherokee, Georgian, and similar.
73 Another excWord bit is used to indicate that the character has no simple case folding,
74 even if it has a simple lowercase mapping.
75 
76 The file contains the following structures:
77 
78     const int32_t indexes[i0] with values i0, i1, ...:
79     (see UCASE_IX_... constants for names of indexes)
80 
81     i0 indexLength; -- length of indexes[] (UCASE_IX_TOP)
82     i1 dataLength; -- length in bytes of the post-header data (incl. indexes[])
83     i2 trieSize; -- size in bytes of the case mapping properties trie
84     i3 exceptionsLength; -- length in uint16_t of the exceptions array
85     i4 unfoldLength; -- length in uint16_t of the reverse-folding array (new in format version 1.1)
86 
87     i5..i14 reservedIndexes; -- reserved values; 0 for now
88 
89     i15 maxFullLength; -- maximum length of a full case mapping/folding string
90 
91 
92     Serialized trie, see utrie2.h;
93 
94     const uint16_t exceptions[exceptionsLength];
95 
96     const UChar unfold[unfoldLength];
97 
98 
99 Trie data word:
100 Bits
101 if(exception) {
102     15..4   unsigned exception index
103 } else {
104     if(not uncased) {
105         15..7   signed delta to simple case mapping code point
106                 (add delta to input code point)
107     } else {
108         15..7   reserved, 0
109     }
110      6..5   0 normal character with cc=0
111             1 soft-dotted character
112             2 cc=230
113             3 other cc
114             The runtime code relies on these two bits to be adjacent with this encoding.
115 }
116     4   case-sensitive
117     3   exception
118     2   case-ignorable
119  1..0   0 uncased
120         1 lowercase
121         2 uppercase
122         3 titlecase
123         The runtime code relies on the case-ignorable and case type bits 2..0
124         to be the lowest bits with this encoding.
125 
126 
127 Exceptions:
128 A sub-array of the exceptions array is indexed by the exception index in a
129 trie word.
130 The sub-array consists of the following fields:
131     uint16_t excWord;
132     uint16_t optional values [];
133     UTF-16 strings for full (string) mappings for lowercase, case folding, uppercase, titlecase
134 
135 excWord: (see UCASE_EXC_...)
136 Bits
137     15  conditional case folding
138     14  conditional special casing
139 13..12  same as non-exception trie data bits 6..5
140         moved here because the exception index needs more bits than the delta
141         0 normal character with cc=0
142         1 soft-dotted character
143         2 cc=230
144         3 other cc
145     11  same as non-exception case-sensitive bit
146     10  the delta in the optional value slot is negative
147      9  no simple case folding, even if there is a simple lowercase mapping
148      8  if set, then for each optional-value slot there are 2 uint16_t values
149         (high and low parts of 32-bit values)
150         instead of single ones
151  7.. 0  bits for which optional value is present
152 
153 Optional-value slots:
154 0   lowercase mapping (code point)
155 1   case folding (code point)
156 2   uppercase mapping (code point)
157 3   titlecase mapping (code point)
158 4   delta to simple case mapping code point
159     (add delta to input code point, or subtract if excWord bit 10 is set)
160 5   reserved
161 6   closure mappings (new in format version 1.1)
162 7   there is at least one full (string) case mapping
163     the length of each is encoded in a nibble of this optional value,
164     and the strings follow this optional value in the same order:
165     lower/fold/upper/title
166 
167 The optional closure mappings value is used as follows:
168 Bits 0..3 contain the length of a string of code points for case closure.
169 The string immediately follows the full case mappings, or the closure value
170 slot if there are no full case mappings.
171 Bits 4..15 are reserved and could be used in the future to indicate the
172 number of strings for case closure.
173 Complete case closure for a code point is given by the union of all simple
174 and full case mappings and foldings, plus the case closure code points
175 (and potentially, in the future, case closure strings).
176 
177 For space saving, some values are not stored. Lookups are as follows:
178 - If special casing is conditional, then no full lower/upper/title mapping
179   strings are stored.
180 - If case folding is conditional, then no simple or full case foldings are
181   stored.
182 - Fall back in this order:
183     full (string) mapping -- if full mappings are used
184     simple (code point) mapping of the same type
185     simple fold->simple lower
186     simple title->simple upper
187     finally, the original code point (no mapping)
188 
189 This fallback order is strict:
190 In particular, the fallback from full case folding is to simple case folding,
191 not to full lowercase mapping.
192 
193 Reverse case folding data ("unfold") array: (new in format version 1.1)
194 
195 This array stores some miscellaneous values followed by a table. The data maps
196 back from multi-character strings to their original code points, for use
197 in case closure.
198 
199 The table contains two columns of strings.
200 The string in the first column is the case folding of each of the code points
201 in the second column. The strings are terminated with NUL or by the end of the
202 column, whichever comes first.
203 
204 The miscellaneous data takes up one pseudo-row and includes:
205 - number of rows
206 - number of UChars per row
207 - number of UChars in the left (folding string) column
208 
209 The table is sorted by its first column. Values in the first column are unique.
210 
211 ----------------------------------------------------------------------------- */
212 
213 U_NAMESPACE_USE
214 
215 /* UDataInfo cf. udata.h */
216 static UDataInfo dataInfo={
217     sizeof(UDataInfo),
218     0,
219 
220     U_IS_BIG_ENDIAN,
221     U_CHARSET_FAMILY,
222     U_SIZEOF_UCHAR,
223     0,
224 
225     /* dataFormat="cAsE" */
226     { UCASE_FMT_0, UCASE_FMT_1, UCASE_FMT_2, UCASE_FMT_3 },
227     { 4, 0, 0, 0 },  /* formatVersion */
228     { 11, 0, 0, 0 }  /* dataVersion */
229 };
230 
231 #define UGENCASE_EXC_SHIFT     20
232 #define UGENCASE_EXC_MASK      0xfff00000
233 
234 enum {
235     MAX_EXC_COUNT=(UGENCASE_EXC_MASK>>UGENCASE_EXC_SHIFT)+1
236 };
237 
238 struct ExcProps {
ExcPropsExcProps239     ExcProps() :
240             delta(0), hasConditionalCaseMappings(FALSE), hasTurkicCaseFolding(FALSE),
241             hasNoSimpleCaseFolding(FALSE) {}
ExcPropsExcProps242     ExcProps(const UniProps &otherProps) :
243             props(otherProps),
244             delta(0), hasConditionalCaseMappings(FALSE), hasTurkicCaseFolding(FALSE),
245             hasNoSimpleCaseFolding(FALSE) {}
246 
247     UniProps props;
248     UnicodeSet closure;
249     int32_t delta;
250     UBool hasConditionalCaseMappings;
251     UBool hasTurkicCaseFolding;
252     UBool hasNoSimpleCaseFolding;
253 };
254 
255 /*
256  * Values for the ucase.icu unfold[] data array.
257  * The values are stored in ucase.icu so that the runtime code will work with
258  * changing values, but they are hardcoded here for simplicity.
259  * They are optimized, that is, provide for minimal table column widths,
260  * for the actual Unicode data, so that the table size is minimized.
261  * Future versions of Unicode may require increases of some of these values.
262  */
263 enum {
264     UGENCASE_UNFOLD_STRING_WIDTH=3,
265     UGENCASE_UNFOLD_CP_WIDTH=2,
266     UGENCASE_UNFOLD_WIDTH=UGENCASE_UNFOLD_STRING_WIDTH+UGENCASE_UNFOLD_CP_WIDTH
267 };
268 
269 class CasePropsBuilder : public PropsBuilder {
270 public:
271     CasePropsBuilder(UErrorCode &errorCode);
272     virtual ~CasePropsBuilder();
273 
274     virtual void setUnicodeVersion(const UVersionInfo version);
275     virtual void setProps(const UniProps &, const UnicodeSet &newValues, UErrorCode &errorCode);
276     virtual void build(UErrorCode &errorCode);
277     virtual void writeCSourceFile(const char *path, UErrorCode &errorCode);
278     virtual void writeBinaryData(const char *path, UBool withCopyright, UErrorCode &errorCode);
279 
280 private:
281     uint32_t makeExcProps(UChar32 c, uint32_t value, UErrorCode &errorCode);
282     void addUnfolding(UChar32 c, const UnicodeString &s, UErrorCode &errorCode);
283     void makeUnfoldData(UErrorCode &errorCode);
284     void addClosureMapping(UChar32 src, UChar32 dest, UErrorCode &errorCode);
285     UBool addClosure(UChar32 orig, UChar32 prev2, UChar32 prev, UChar32 c, uint32_t value,
286                      UErrorCode &errorCode);
287     void makeCaseClosure(UErrorCode &errorCode);
288     int32_t makeException(UChar32 c, uint32_t value, ExcProps &ep, UErrorCode &errorCode);
289     void makeExceptions(UErrorCode &errorCode);
290 
291     UnicodeSet relevantProps;
292     /*
293      * Unicode set collecting the case-sensitive characters;
294      * see uchar.h UCHAR_CASE_SENSITIVE.
295      * Add code points from case mappings/foldings in
296      * the root locale and with default options.
297      */
298     UnicodeSet caseSensitive;
299     /* reverse case folding ("unfold") data */
300     UnicodeString unfold;
301     UnicodeString exceptions;
302     ExcProps **excProps;
303     int32_t excPropsCount;
304     /* becomes indexes[UCASE_IX_MAX_FULL_LENGTH] */
305     int32_t maxFullLength;
306     UTrie2 *pTrie;
307 };
308 
CasePropsBuilder(UErrorCode & errorCode)309 CasePropsBuilder::CasePropsBuilder(UErrorCode &errorCode)
310         : excProps(NULL), excPropsCount(0), maxFullLength(U16_MAX_LENGTH), pTrie(NULL) {
311     // This builder encodes the following properties.
312     relevantProps.
313         add(UCHAR_CANONICAL_COMBINING_CLASS).  // 0 vs. 230 vs. other
314         add(UCHAR_SOFT_DOTTED).
315         add(UCHAR_LOWERCASE).
316         add(UCHAR_UPPERCASE).
317         add(UCHAR_CASE_IGNORABLE).
318         add(UCHAR_SIMPLE_CASE_FOLDING).
319         add(UCHAR_SIMPLE_LOWERCASE_MAPPING).
320         add(UCHAR_SIMPLE_TITLECASE_MAPPING).
321         add(UCHAR_SIMPLE_UPPERCASE_MAPPING).
322         add(UCHAR_CASE_FOLDING).
323         add(UCHAR_LOWERCASE_MAPPING).
324         add(UCHAR_TITLECASE_MAPPING).
325         add(UCHAR_UPPERCASE_MAPPING).
326         add(PPUCD_CONDITIONAL_CASE_MAPPINGS).
327         add(PPUCD_TURKIC_CASE_FOLDING);
328     // Write "unfold" meta data into the first row. Must be UGENCASE_UNFOLD_WIDTH UChars.
329     unfold.
330         append(0).
331         append((UChar)UGENCASE_UNFOLD_WIDTH).
332         append((UChar)UGENCASE_UNFOLD_STRING_WIDTH).
333         append(0).
334         append(0);
335     U_ASSERT(unfold.length()==UGENCASE_UNFOLD_WIDTH);
336     pTrie=utrie2_open(0, 0, &errorCode);
337     if(U_FAILURE(errorCode)) {
338         fprintf(stderr, "genprops error: casepropsbuilder utrie2_open() failed - %s\n",
339                 u_errorName(errorCode));
340         return;
341     }
342     excProps=new ExcProps *[MAX_EXC_COUNT];
343     if(excProps==NULL) {
344         fprintf(stderr,
345                 "genprops error: casepropsbuilder out of memory allocating "
346                 "the array of exceptions properties\n");
347         errorCode=U_MEMORY_ALLOCATION_ERROR;
348     }
349 }
350 
~CasePropsBuilder()351 CasePropsBuilder::~CasePropsBuilder() {
352     utrie2_close(pTrie);
353     for(int32_t i=0; i<excPropsCount; ++i) {
354         delete excProps[i];
355     }
356     delete[] excProps;
357 }
358 
359 void
setUnicodeVersion(const UVersionInfo version)360 CasePropsBuilder::setUnicodeVersion(const UVersionInfo version) {
361     uprv_memcpy(dataInfo.dataVersion, version, 4);
362 }
363 
364 /* -------------------------------------------------------------------------- */
365 
366 void
addUnfolding(UChar32 c,const UnicodeString & s,UErrorCode & errorCode)367 CasePropsBuilder::addUnfolding(UChar32 c, const UnicodeString &s, UErrorCode &errorCode) {
368     if(U_FAILURE(errorCode)) { return; }
369 
370     int32_t length=s.length();
371     if(length>UGENCASE_UNFOLD_STRING_WIDTH) {
372         fprintf(stderr, "genprops error: case folding too long (length=%ld>%d=UGENCASE_UNFOLD_STRING_WIDTH)\n",
373                 (long)length, UGENCASE_UNFOLD_STRING_WIDTH);
374         errorCode=U_INTERNAL_PROGRAM_ERROR;
375     }
376     unfold.append(s);
377     while(length<UGENCASE_UNFOLD_STRING_WIDTH) {
378         unfold.append(0);
379         ++length;
380     }
381 
382     unfold.append(c);
383     if(U16_LENGTH(c)<UGENCASE_UNFOLD_CP_WIDTH) {
384         unfold.append(0);
385     }
386 
387     U_ASSERT((unfold.length()%UGENCASE_UNFOLD_WIDTH)==0);
388 }
389 
390 /* store a character's properties ------------------------------------------- */
391 
392 void
setProps(const UniProps & props,const UnicodeSet & newValues,UErrorCode & errorCode)393 CasePropsBuilder::setProps(const UniProps &props, const UnicodeSet &newValues,
394                            UErrorCode &errorCode) {
395     if(U_FAILURE(errorCode) || newValues.containsNone(relevantProps)) { return; }
396 
397     UChar32 start=props.start;
398     UChar32 end=props.end;
399 
400     /* default: map to self */
401     int32_t delta=0;
402     UBool noDelta=FALSE;
403 
404     uint32_t type;
405     if(props.binProps[UCHAR_LOWERCASE]) {
406         type=UCASE_LOWER;
407     } else if(props.binProps[UCHAR_UPPERCASE]) {
408         type=UCASE_UPPER;
409     } else if(props.getIntProp(UCHAR_GENERAL_CATEGORY)==U_TITLECASE_LETTER) {
410         type=UCASE_TITLE;
411     } else {
412         type=UCASE_NONE;
413     }
414     uint32_t value=type;
415 
416     // Examine simple case mappings.
417     UBool hasMapping=FALSE;
418     if(props.suc>=0) {
419         /* uppercase mapping as delta if the character is lowercase */
420         hasMapping=TRUE;
421         if(type==UCASE_LOWER) {
422             delta=props.suc-start;
423         } else {
424             noDelta=TRUE;
425             value|=UCASE_EXCEPTION;
426         }
427     }
428     if(props.slc>=0) {
429         /* lowercase mapping as delta if the character is uppercase or titlecase */
430         hasMapping=TRUE;
431         if(type>=UCASE_UPPER) {
432             delta=props.slc-start;
433         } else {
434             noDelta=TRUE;
435             value|=UCASE_EXCEPTION;
436         }
437     }
438     if(props.stc>=0) {
439         hasMapping=TRUE;
440     }
441     if(props.suc!=props.stc) {
442         noDelta=TRUE;
443         value|=UCASE_EXCEPTION;
444     }
445 
446     // Simple case folding falls back to simple lowercasing.
447     // If they differ, then store them separately.
448     UChar32 scf=props.scf;
449     if(scf>=0 && scf!=props.slc) {
450         hasMapping=noDelta=TRUE;
451         value|=UCASE_EXCEPTION;
452     }
453 
454     // If there is no case folding but there is a lowercase mapping,
455     // then set a bit for that.
456     // For example: Cherokee uppercase syllables since Unicode 8.
457     // (Full case folding falls back to simple case folding,
458     // not to full lowercasing, so we need not also handle it specially
459     // for such cases.)
460     UBool hasNoSimpleCaseFolding=FALSE;
461     if(scf<0 && props.slc>=0) {
462         hasNoSimpleCaseFolding=TRUE;
463         value|=UCASE_EXCEPTION;
464     }
465 
466     if(noDelta) {
467         delta=0;
468     } else if(delta<UCASE_MIN_DELTA || UCASE_MAX_DELTA<delta) {
469         // The case mapping delta is too big for the main data word.
470         // Store it in an exceptions slot.
471         value|=UCASE_EXCEPTION;
472     }
473 
474     // Examine full case mappings.
475     if(!props.lc.isEmpty() || !props.uc.isEmpty() || !props.tc.isEmpty() ||
476         newValues.contains(PPUCD_CONDITIONAL_CASE_MAPPINGS)
477     ) {
478         hasMapping=TRUE;
479         value|=UCASE_EXCEPTION;
480     }
481     if( (!props.cf.isEmpty() && props.cf!=UnicodeString(props.scf)) ||
482         newValues.contains(PPUCD_TURKIC_CASE_FOLDING)
483     ) {
484         hasMapping=TRUE;
485         value|=UCASE_EXCEPTION;
486     }
487 
488     if(props.binProps[UCHAR_SOFT_DOTTED]) {
489         value|=UCASE_SOFT_DOTTED;
490     }
491     int32_t cc=props.getIntProp(UCHAR_CANONICAL_COMBINING_CLASS);
492     if(cc!=0) {
493         if(props.binProps[UCHAR_SOFT_DOTTED]) {
494             fprintf(stderr, "genprops error: a soft-dotted character has ccc!=0\n");
495             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
496             return;
497         }
498         if(cc==230) {
499             value|=UCASE_ABOVE;
500         } else {
501             value|=UCASE_OTHER_ACCENT;
502         }
503     }
504 
505     if(props.binProps[UCHAR_CASE_IGNORABLE]) {
506         value|=UCASE_IGNORABLE;
507     }
508 
509     if((hasMapping || (value&UCASE_EXCEPTION)) && start!=end) {
510         fprintf(stderr,
511                 "genprops error: range %04lX..%04lX has case mappings "
512                 "or reasons for data structure exceptions\n",
513                 (long)start, (long)end);
514         errorCode=U_ILLEGAL_ARGUMENT_ERROR;
515         return;
516     }
517 
518     /* handle exceptions */
519     if(value&UCASE_EXCEPTION) {
520         /* simply store exceptions for later processing and encoding */
521         if(excPropsCount==MAX_EXC_COUNT) {
522             fprintf(stderr, "genprops error: casepropsbuilder: too many exceptions\n");
523             errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
524             return;
525         }
526         ExcProps *newExcProps=new ExcProps(props);
527         if(newExcProps==NULL) {
528             fprintf(stderr,
529                     "genprops error: casepropsbuilder out of memory allocating "
530                     "exceptions properties\n");
531             errorCode=U_MEMORY_ALLOCATION_ERROR;
532             return;
533         }
534         newExcProps->props.scf=scf;
535         newExcProps->delta=delta;
536         newExcProps->hasConditionalCaseMappings=
537             newValues.contains(PPUCD_CONDITIONAL_CASE_MAPPINGS) ||
538             // See ICU-13416: և ligature ech-yiwn has language-specific
539             // uppercase and titlecase mappings.
540             start==0x0587;
541         newExcProps->hasTurkicCaseFolding=newValues.contains(PPUCD_TURKIC_CASE_FOLDING);
542         newExcProps->hasNoSimpleCaseFolding=hasNoSimpleCaseFolding;
543         value|=(uint32_t)excPropsCount<<UGENCASE_EXC_SHIFT;
544         excProps[excPropsCount++]=newExcProps;
545     } else {
546         /* store the simple case mapping delta */
547         value|=((uint32_t)delta<<UCASE_DELTA_SHIFT)&UCASE_DELTA_MASK;
548     }
549 
550     utrie2_setRange32(pTrie, start, end, value, TRUE, &errorCode);
551     if(U_FAILURE(errorCode)) {
552         fprintf(stderr, "genprops error: unable to set case mapping values: %s\n",
553                 u_errorName(errorCode));
554         return;
555     }
556 
557     if(hasMapping) {
558         /* update the case-sensitive set */
559         caseSensitive.add(start);
560         if(scf>=0) { caseSensitive.add(scf); }
561         if(props.slc>=0) { caseSensitive.add(props.slc); }
562         if(props.suc>=0) { caseSensitive.add(props.suc); }
563         if(props.stc>=0) { caseSensitive.add(props.stc); }
564         caseSensitive.addAll(props.cf);
565         caseSensitive.addAll(props.lc);
566         caseSensitive.addAll(props.uc);
567         caseSensitive.addAll(props.tc);
568 
569         /* update maxFullLength */
570         if(props.cf.length()>maxFullLength) { maxFullLength=props.cf.length(); }
571         if(props.lc.length()>maxFullLength) { maxFullLength=props.lc.length(); }
572         if(props.uc.length()>maxFullLength) { maxFullLength=props.uc.length(); }
573         if(props.tc.length()>maxFullLength) { maxFullLength=props.tc.length(); }
574     }
575 
576     /* add the multi-character case folding to the "unfold" data */
577     if(props.cf.hasMoreChar32Than(0, 0x7fffffff, 1)) {
578         addUnfolding(start, props.cf, errorCode);
579     }
580 }
581 
582 uint32_t
makeExcProps(UChar32 c,uint32_t value,UErrorCode & errorCode)583 CasePropsBuilder::makeExcProps(UChar32 c, uint32_t value, UErrorCode &errorCode) {
584     if(U_FAILURE(errorCode)) { return 0; }
585     if(excPropsCount==MAX_EXC_COUNT) {
586         fprintf(stderr, "genprops error: casepropsbuilder: too many exceptions\n");
587         errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
588         return 0;
589     }
590     LocalPointer<ExcProps> newExcProps(new ExcProps);
591     if(newExcProps==NULL) {
592         fprintf(stderr,
593                 "genprops error: casepropsbuilder out of memory allocating "
594                 "exceptions properties\n");
595         errorCode=U_MEMORY_ALLOCATION_ERROR;
596         return 0;
597     }
598 
599     if((value&UCASE_TYPE_MASK)>UCASE_NONE) {
600         // Decode the simple case mapping.
601         UChar32 next=c+UCASE_GET_DELTA(value);
602         if(next!=c) {
603             UniProps &p=newExcProps->props;
604             if((value&UCASE_TYPE_MASK)==UCASE_LOWER) {
605                 p.suc=p.stc=next;
606             } else {
607                 p.slc=next;
608             }
609         }
610     }
611 
612     value&=~(UGENCASE_EXC_MASK|UCASE_DELTA_MASK); // remove previous simple mapping
613     value|=(uint32_t)excPropsCount<<UGENCASE_EXC_SHIFT;
614     value|=UCASE_EXCEPTION;
615     excProps[excPropsCount++]=newExcProps.orphan();
616     return value;
617 }
618 
619 /* finalize reverse case folding ("unfold") data ---------------------------- */
620 
621 static int32_t U_CALLCONV
compareUnfold(const void * context,const void * left,const void * right)622 compareUnfold(const void *context, const void *left, const void *right) {
623     return u_memcmp((const UChar *)left, (const UChar *)right, UGENCASE_UNFOLD_WIDTH);
624 }
625 
626 void
makeUnfoldData(UErrorCode & errorCode)627 CasePropsBuilder::makeUnfoldData(UErrorCode &errorCode) {
628     if(U_FAILURE(errorCode)) { return; }
629 
630     UChar *p, *q;
631     int32_t i, j, k;
632 
633     /* sort the data */
634     int32_t unfoldLength=unfold.length();
635     int32_t unfoldRows=unfoldLength/UGENCASE_UNFOLD_WIDTH-1;
636     UChar *unfoldBuffer=unfold.getBuffer(-1);
637     uprv_sortArray(unfoldBuffer+UGENCASE_UNFOLD_WIDTH, unfoldRows, UGENCASE_UNFOLD_WIDTH*2,
638                    compareUnfold, NULL, FALSE, &errorCode);
639 
640     /* make unique-string rows by merging adjacent ones' code point columns */
641 
642     /* make p point to row i-1 */
643     p=unfoldBuffer+UGENCASE_UNFOLD_WIDTH;
644 
645     for(i=1; i<unfoldRows;) {
646         if(0==u_memcmp(p, p+UGENCASE_UNFOLD_WIDTH, UGENCASE_UNFOLD_STRING_WIDTH)) {
647             /* concatenate code point columns */
648             q=p+UGENCASE_UNFOLD_STRING_WIDTH;
649             for(j=1; j<UGENCASE_UNFOLD_CP_WIDTH && q[j]!=0; ++j) {}
650             for(k=0; k<UGENCASE_UNFOLD_CP_WIDTH && q[UGENCASE_UNFOLD_WIDTH+k]!=0; ++j, ++k) {
651                 q[j]=q[UGENCASE_UNFOLD_WIDTH+k];
652             }
653             if(j>UGENCASE_UNFOLD_CP_WIDTH) {
654                 fprintf(stderr, "genprops error: too many code points in unfold[]: %ld>%d=UGENCASE_UNFOLD_CP_WIDTH\n",
655                         (long)j, UGENCASE_UNFOLD_CP_WIDTH);
656                 errorCode=U_BUFFER_OVERFLOW_ERROR;
657                 return;
658             }
659 
660             /* move following rows up one */
661             --unfoldRows;
662             u_memmove(p+UGENCASE_UNFOLD_WIDTH, p+UGENCASE_UNFOLD_WIDTH*2, (unfoldRows-i)*UGENCASE_UNFOLD_WIDTH);
663         } else {
664             p+=UGENCASE_UNFOLD_WIDTH;
665             ++i;
666         }
667     }
668 
669     unfoldBuffer[UCASE_UNFOLD_ROWS]=(UChar)unfoldRows;
670 
671     if(beVerbose) {
672         puts("unfold data:");
673 
674         p=unfoldBuffer;
675         for(i=0; i<unfoldRows; ++i) {
676             p+=UGENCASE_UNFOLD_WIDTH;
677             printf("[%2d] %04x %04x %04x <- %04x %04x\n",
678                    (int)i, p[0], p[1], p[2], p[3], p[4]);
679         }
680     }
681 
682     unfold.releaseBuffer((unfoldRows+1)*UGENCASE_UNFOLD_WIDTH);
683 }
684 
685 /* case closure ------------------------------------------------------------- */
686 
687 void
addClosureMapping(UChar32 src,UChar32 dest,UErrorCode & errorCode)688 CasePropsBuilder::addClosureMapping(UChar32 src, UChar32 dest, UErrorCode &errorCode) {
689     if(U_FAILURE(errorCode)) { return; }
690 
691     if(beVerbose) {
692         printf("add closure mapping U+%04lx->U+%04lx\n",
693                 (unsigned long)src, (unsigned long)dest);
694     }
695 
696     uint32_t value=utrie2_get32(pTrie, src);
697     if((value&UCASE_EXCEPTION)==0) {
698         /*
699          * decode value into p2 (enough for makeException() to work properly),
700          * add the closure mapping,
701          * and set the new exception for src
702          */
703         value=makeExcProps(src, value, errorCode);
704         utrie2_set32(pTrie, src, value, &errorCode);
705         if(U_FAILURE(errorCode)) {
706             fprintf(stderr, "genprops error: unable to set case mapping values, code: %s\n",
707                             u_errorName(errorCode));
708             return;
709         }
710     }
711     excProps[value>>UGENCASE_EXC_SHIFT]->closure.add(dest);
712 }
713 
714 /*
715  * Find missing case mapping relationships and add mappings for case closure.
716  * This function starts from an "original" code point and recursively
717  * finds its case mappings and the case mappings of where it maps to.
718  *
719  * The recursion depth is capped at 3 nested calls of this function.
720  * In each call, the current code point is c, and the function enumerates
721  * all of c's simple (single-code point) case mappings.
722  * prev is the code point that case-mapped to c.
723  * prev2 is the code point that case-mapped to prev.
724  *
725  * The initial function call has prev2<0, prev<0, and c==orig
726  * (marking no code points).
727  * It enumerates c's case mappings and recurses without further action.
728  *
729  * The second-level function call has prev2<0, prev==orig, and c is
730  * the destination code point of one of prev's case mappings.
731  * The function checks if any of c's case mappings go back to orig
732  * and adds a closure mapping if not.
733  * In other words, it turns a case mapping relationship of
734  *   orig->c
735  * into
736  *   orig<->c
737  *
738  * The third-level function call has prev2==orig, prev>=0, and c is
739  * the destination code point of one of prev's case mappings.
740  * (And prev is the destination of one of prev2's case mappings.)
741  * The function checks if any of c's case mappings go back to orig
742  * and adds a closure mapping if not.
743  * In other words, it turns case mapping relationships of
744  *   orig->prev->c or orig->prev<->c
745  * into
746  *   orig->prev->c->orig or orig->prev<->c->orig
747  * etc.
748  * (Graphically, this closes a triangle.)
749  *
750  * With repeated application on all code points until no more closure mappings
751  * are added, all case equivalence groups get complete mappings.
752  * That is, in each group of code points with case relationships
753  * each code point will in the end have some mapping to each other
754  * code point in the group.
755  *
756  * @return TRUE if a closure mapping was added
757  */
758 UBool
addClosure(UChar32 orig,UChar32 prev2,UChar32 prev,UChar32 c,uint32_t value,UErrorCode & errorCode)759 CasePropsBuilder::addClosure(UChar32 orig, UChar32 prev2, UChar32 prev, UChar32 c, uint32_t value,
760                              UErrorCode &errorCode) {
761     if(U_FAILURE(errorCode)) { return FALSE; }
762 
763     UChar32 next;
764     UBool someMappingsAdded=FALSE;
765 
766     if(c!=orig) {
767         /* get the properties for c */
768         value=utrie2_get32(pTrie, c);
769     }
770     /* else if c==orig then c's value was passed in */
771 
772     if(value&UCASE_EXCEPTION) {
773         UnicodeSet set;
774 
775         ExcProps &ep=*excProps[value>>UGENCASE_EXC_SHIFT];
776         UniProps &p=ep.props;
777 
778         /*
779          * marker for whether any of c's mappings goes to orig
780          * c==orig: prevent adding a closure mapping when getting orig's own, direct mappings
781          */
782         UBool mapsToOrig=(UBool)(c==orig);
783 
784         /* collect c's case mapping destinations in set[] */
785         if((next=p.suc)>=0 && next!=c) {
786             set.add(next);
787         }
788         if((next=p.slc)>=0 && next!=c) {
789             set.add(next);
790         }
791         if(p.suc!=(next=p.stc) && next!=c) {
792             set.add(next);
793         }
794         if((next=p.scf)>=0 && next!=c) {
795             set.add(next);
796         }
797 
798         /* add c's current closure mappings to set */
799         set.addAll(ep.closure);
800 
801         /* process all code points to which c case-maps */
802         UnicodeSetIterator iter(set);
803         while(iter.next()) {
804             next=iter.getCodepoint(); /* next!=c */
805 
806             if(next==orig) {
807                 mapsToOrig=TRUE; /* remember that we map to orig */
808             } else if(prev2<0 && next!=prev) {
809                 /*
810                  * recurse unless
811                  * we have reached maximum depth (prev2>=0) or
812                  * this is a mapping to one of the previous code points (orig, prev, c)
813                  */
814                 someMappingsAdded|=addClosure(orig, prev, c, next, 0, errorCode);
815             }
816         }
817 
818         if(!mapsToOrig) {
819             addClosureMapping(c, orig, errorCode);
820             return TRUE;
821         }
822     } else {
823         if((value&UCASE_TYPE_MASK)>UCASE_NONE) {
824             /* one simple case mapping, don't care which one */
825             next=c+UCASE_GET_DELTA(value);
826             if(next!=c) {
827                 /*
828                  * recurse unless
829                  * we have reached maximum depth (prev2>=0) or
830                  * this is a mapping to one of the previous code points (orig, prev, c)
831                  */
832                 if(prev2<0 && next!=orig && next!=prev) {
833                     someMappingsAdded|=addClosure(orig, prev, c, next, 0, errorCode);
834                 }
835 
836                 if(c!=orig && next!=orig) {
837                     /* c does not map to orig, add a closure mapping c->orig */
838                     addClosureMapping(c, orig, errorCode);
839                     return TRUE;
840                 }
841             }
842         }
843     }
844 
845     return someMappingsAdded;
846 }
847 
848 void
makeCaseClosure(UErrorCode & errorCode)849 CasePropsBuilder::makeCaseClosure(UErrorCode &errorCode) {
850     if(U_FAILURE(errorCode)) { return; }
851 
852     /*
853      * finalize the "unfold" data because we need to use it to add closure mappings
854      * for situations like FB05->"st"<-FB06
855      * where we would otherwise miss the FB05<->FB06 relationship
856      */
857     makeUnfoldData(errorCode);
858 
859     /* use the "unfold" data to add mappings */
860 
861     /* p always points to the code points; this loop ignores the strings completely */
862     const UChar *p=unfold.getBuffer()+UGENCASE_UNFOLD_WIDTH+UGENCASE_UNFOLD_STRING_WIDTH;
863     int32_t unfoldRows=unfold.length()/UGENCASE_UNFOLD_WIDTH-1;
864 
865     for(int32_t i=0; i<unfoldRows; p+=UGENCASE_UNFOLD_WIDTH, ++i) {
866         int32_t j=0;
867         UChar32 c;
868         U16_NEXT_UNSAFE(p, j, c);
869         while(j<UGENCASE_UNFOLD_CP_WIDTH && p[j]!=0) {
870             UChar32 c2;
871             U16_NEXT_UNSAFE(p, j, c2);
872             addClosure(c, U_SENTINEL, c, c2, 0, errorCode);
873         }
874     }
875 
876     if(beVerbose) {
877         puts("---- ---- ---- ---- (done with closures from unfolding)");
878     }
879 
880     /* add further closure mappings from analyzing simple mappings */
881     UBool someMappingsAdded;
882     do {
883         someMappingsAdded=FALSE;
884 
885         for(UChar32 c=0; c<=0x10ffff; ++c) {
886             uint32_t value=utrie2_get32(pTrie, c);
887             if(value!=0) {
888                 someMappingsAdded|=addClosure(c, U_SENTINEL, U_SENTINEL, c, value, errorCode);
889             }
890         }
891 
892         if(beVerbose && someMappingsAdded) {
893             puts("---- ---- ---- ----");
894         }
895     } while(someMappingsAdded);
896 }
897 
898 /* exceptions --------------------------------------------------------------- */
899 
900 static UBool
fullMappingEqualsSimple(const UnicodeString & s,UChar32 simple,UChar32 c)901 fullMappingEqualsSimple(const UnicodeString &s, UChar32 simple, UChar32 c) {
902     if(simple<=0) {
903         simple=c; /* UCD has no simple mapping if it's the same as the code point itself */
904     }
905     return s.length()==U16_LENGTH(simple) && s.char32At(0)==simple;
906 }
907 
908 int32_t
makeException(UChar32 c,uint32_t value,ExcProps & ep,UErrorCode & errorCode)909 CasePropsBuilder::makeException(UChar32 c, uint32_t value, ExcProps &ep, UErrorCode &errorCode) {
910     if(U_FAILURE(errorCode)) { return 0; }
911 
912     /* exceptions.length() might be returned for storing in the trie word */
913     if(exceptions.length()>=UCASE_MAX_EXCEPTIONS) {
914         fprintf(stderr, "genprops error: casepropsbuilder: too many exceptions words\n");
915         errorCode=U_BUFFER_OVERFLOW_ERROR;
916         return 0;
917     }
918 
919     /* copy and shift the soft-dotted and case-sensitive bits */
920     UChar excWord=(UChar)((value&(UCASE_DOT_MASK|UCASE_SENSITIVE))<<UCASE_EXC_DOT_SHIFT);
921 
922     UniProps &p=ep.props;
923 
924     /* set the bits for conditional mappings */
925     if(ep.hasConditionalCaseMappings) {
926         excWord|=UCASE_EXC_CONDITIONAL_SPECIAL;
927         p.lc.remove();
928         p.uc.remove();
929         p.tc.remove();
930     }
931     if(ep.hasTurkicCaseFolding) {
932         excWord|=UCASE_EXC_CONDITIONAL_FOLD;
933         p.cf.remove();
934     }
935     if(ep.hasNoSimpleCaseFolding) {
936         excWord|=UCASE_EXC_NO_SIMPLE_CASE_FOLDING;
937     }
938 
939     /* remove redundant data */
940     /* do not store full mappings if they are the same as the simple ones */
941     if(fullMappingEqualsSimple(p.lc, p.slc, c)) {
942         p.lc.remove();
943     }
944     if(fullMappingEqualsSimple(p.uc, p.suc, c)) {
945         p.uc.remove();
946     }
947     if(fullMappingEqualsSimple(p.tc, p.stc, c)) {
948         p.tc.remove();
949     }
950     if(fullMappingEqualsSimple(p.cf, p.scf, c)) {
951         p.cf.remove();
952     }
953 
954     /* write the optional slots */
955     uint32_t slots[8];
956     uint32_t slotBits=0;
957     int32_t count=0;
958 
959     if(ep.delta!=0) {
960         int32_t delta=ep.delta;
961         if(delta<0) {
962             excWord|=UCASE_EXC_DELTA_IS_NEGATIVE;
963             delta=-delta;
964         }
965         slots[count]=(uint32_t)delta;
966         slotBits|=slots[count];
967         ++count;
968         excWord|=U_MASK(UCASE_EXC_DELTA);
969     } else {
970         if(p.slc>=0) {
971             slots[count]=(uint32_t)p.slc;
972             slotBits|=slots[count];
973             ++count;
974             excWord|=U_MASK(UCASE_EXC_LOWER);
975         }
976         if( p.scf>=0 &&
977                 (p.slc>=0 ?
978                     p.scf!=p.slc :
979                     p.scf!=c)) {
980             slots[count]=(uint32_t)p.scf;
981             slotBits|=slots[count];
982             ++count;
983             excWord|=U_MASK(UCASE_EXC_FOLD);
984         }
985         if(p.suc>=0) {
986             slots[count]=(uint32_t)p.suc;
987             slotBits|=slots[count];
988             ++count;
989             excWord|=U_MASK(UCASE_EXC_UPPER);
990         }
991         if(p.suc!=p.stc) {
992             if(p.stc>=0) {
993                 slots[count]=(uint32_t)p.stc;
994             } else {
995                 slots[count]=(uint32_t)c;
996             }
997             slotBits|=slots[count];
998             ++count;
999             excWord|=U_MASK(UCASE_EXC_TITLE);
1000         }
1001     }
1002 
1003     /* length of case closure */
1004     UnicodeString closureString;
1005     if(!ep.closure.isEmpty()) {
1006         UnicodeSetIterator iter(ep.closure);
1007         while(iter.next()) { closureString.append(iter.getCodepoint()); }
1008         int32_t length=closureString.length();
1009         if(length>UCASE_CLOSURE_MAX_LENGTH) {
1010             fprintf(stderr,
1011                     "genprops error: case closure for U+%04lX has length %d "
1012                     "which exceeds UCASE_CLOSURE_MAX_LENGTH=%d\n",
1013                     (long)c, (int)length, (int)UCASE_CLOSURE_MAX_LENGTH);
1014             errorCode=U_BUFFER_OVERFLOW_ERROR;
1015             return 0;
1016         }
1017         slots[count]=(uint32_t)length; /* must be 1..UCASE_CLOSURE_MAX_LENGTH */
1018         slotBits|=slots[count];
1019         ++count;
1020         excWord|=U_MASK(UCASE_EXC_CLOSURE);
1021     }
1022 
1023     /* lengths of full case mapping strings, stored in the last slot */
1024     int32_t fullLengths=
1025         p.lc.length()|
1026         (p.cf.length()<<4)|
1027         (p.uc.length()<<8)|
1028         (p.tc.length()<<12);
1029     if(fullLengths!=0) {
1030         slots[count]=(uint32_t)fullLengths;
1031         slotBits|=slots[count];
1032         ++count;
1033         excWord|=U_MASK(UCASE_EXC_FULL_MAPPINGS);
1034     }
1035 
1036     if(count==0) {
1037         /* No optional slots: Try to share excWord entries. */
1038         int32_t excIndex=exceptions.indexOf((UChar)excWord);
1039         if(excIndex>=0) {
1040             return excIndex;
1041         }
1042         /* not found */
1043         excIndex=exceptions.length();
1044         exceptions.append((UChar)excWord);
1045         return excIndex;
1046     } else {
1047         /* write slots */
1048         UnicodeString excString;
1049         excString.append((UChar)0);  /* placeholder for excWord which will be stored at excIndex */
1050 
1051         if(slotBits<=0xffff) {
1052             for(int32_t i=0; i<count; ++i) {
1053                 excString.append((UChar)slots[i]);
1054             }
1055         } else {
1056             excWord|=UCASE_EXC_DOUBLE_SLOTS;
1057             for(int32_t i=0; i<count; ++i) {
1058                 excString.append((UChar)(slots[i]>>16));
1059                 excString.append((UChar)slots[i]);
1060             }
1061         }
1062 
1063         /* write the full case mapping strings */
1064         excString.append(p.lc);
1065         excString.append(p.cf);
1066         excString.append(p.uc);
1067         excString.append(p.tc);
1068 
1069         /* write the closure data */
1070         excString.append(closureString);
1071 
1072         /* write the main exceptions word */
1073         excString.setCharAt(0, (UChar)excWord);
1074 
1075         // Try to share data.
1076         if(count==1 && ep.delta!=0) {
1077             int32_t excIndex=exceptions.indexOf(excString);
1078             if(excIndex>=0) {
1079                 return excIndex;
1080             }
1081         }
1082         int32_t excIndex=exceptions.length();
1083         exceptions.append(excString);
1084         return excIndex;
1085     }
1086 }
1087 
1088 void
makeExceptions(UErrorCode & errorCode)1089 CasePropsBuilder::makeExceptions(UErrorCode &errorCode) {
1090     if(U_FAILURE(errorCode)) { return; }
1091 
1092     /*
1093      * Encode case-ignorable as delta==1 on uncased characters,
1094      * and with an exception bit on cased characters and characters with another exception.
1095      *
1096      * Change from temporary UGENCASE_EXC_SHIFT'ed index into excProps[]
1097      * to UCASE_EXC_SHIFT'ed index into encoded exceptions[].
1098      */
1099     for(UChar32 c=0; c<=0x10ffff; ++c) {
1100         uint32_t value=utrie2_get32(pTrie, c);
1101         if(value&UCASE_EXCEPTION) {
1102             int32_t excIndex=makeException(c, value, *excProps[value>>UGENCASE_EXC_SHIFT], errorCode);
1103             value=(value&~(UGENCASE_EXC_MASK|UCASE_EXC_MASK))|((uint32_t)excIndex<<UCASE_EXC_SHIFT);
1104             utrie2_set32(pTrie, c, value, &errorCode);
1105         }
1106     }
1107 }
1108 
1109 /* generate output data ----------------------------------------------------- */
1110 
1111 static int32_t indexes[UCASE_IX_TOP]={
1112     UCASE_IX_TOP, 0, 0, 0,
1113     0, 0, 0, 0,
1114     0, 0, 0, 0,
1115     0, 0, 0, 0
1116 };
1117 
1118 static uint8_t trieBlock[100000];
1119 static int32_t trieSize;
1120 
1121 void
build(UErrorCode & errorCode)1122 CasePropsBuilder::build(UErrorCode &errorCode) {
1123     if(!beQuiet) {
1124         puts("* ucase.icu stats *");
1125     }
1126 
1127     makeCaseClosure(errorCode);
1128     if(U_FAILURE(errorCode)) { return; }
1129 
1130     /*
1131      * Add one complex mapping to caseSensitive that was filtered out above:
1132      * Greek final Sigma has a conditional mapping but not locale-sensitive,
1133      * and it is taken when lowercasing just U+03A3 alone.
1134      * 03A3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK CAPITAL LETTER SIGMA
1135      */
1136     caseSensitive.add(0x3c2);
1137 
1138     UnicodeSetIterator iter(caseSensitive);
1139     while(iter.next()) {
1140         UChar32 c=iter.getCodepoint();
1141         uint32_t value=utrie2_get32(pTrie, c);
1142         if((value&UCASE_SENSITIVE)==0) {
1143             utrie2_set32(pTrie, c, value|UCASE_SENSITIVE, &errorCode);
1144         }
1145     }
1146     if(U_FAILURE(errorCode)) {
1147         fprintf(stderr, "genprops/case error: unable to set UCASE_SENSITIVE: %s\n",
1148                 u_errorName(errorCode));
1149         return;
1150     }
1151 
1152     makeExceptions(errorCode);
1153     if(U_FAILURE(errorCode)) { return; }
1154 
1155     utrie2_freeze(pTrie, UTRIE2_16_VALUE_BITS, &errorCode);
1156     if(U_FAILURE(errorCode)) {
1157         fprintf(stderr, "genprops/case error: utrie2_freeze() failed: %s\n",
1158                 u_errorName(errorCode));
1159         return;
1160     }
1161     trieSize=utrie2_serialize(pTrie, trieBlock, sizeof(trieBlock), &errorCode);
1162     if(U_FAILURE(errorCode)) {
1163         fprintf(stderr, "genprops/case error: utrie2_serialize() failed: %s (length %ld)\n",
1164                 u_errorName(errorCode), (long)trieSize);
1165         return;
1166     }
1167 
1168     indexes[UCASE_IX_EXC_LENGTH]=exceptions.length();
1169     indexes[UCASE_IX_TRIE_SIZE]=trieSize;
1170     indexes[UCASE_IX_UNFOLD_LENGTH]=unfold.length();
1171     indexes[UCASE_IX_LENGTH]=(int32_t)sizeof(indexes)+trieSize+2*exceptions.length()+2*unfold.length();
1172 
1173     indexes[UCASE_IX_MAX_FULL_LENGTH]=maxFullLength;
1174 
1175     if(!beQuiet) {
1176         printf("trie size in bytes:                    %5d\n", (int)trieSize);
1177         printf("number of code points with exceptions: %5d\n", excPropsCount);
1178         printf("size in bytes of exceptions:           %5d\n", 2*exceptions.length());
1179         printf("size in bytes of reverse foldings:     %5d\n", 2*unfold.length());
1180         printf("data size:                             %5d\n", (int)indexes[UCASE_IX_LENGTH]);
1181     }
1182 }
1183 
1184 void
writeCSourceFile(const char * path,UErrorCode & errorCode)1185 CasePropsBuilder::writeCSourceFile(const char *path, UErrorCode &errorCode) {
1186     if(U_FAILURE(errorCode)) { return; }
1187 
1188     FILE *f=usrc_create(path, "ucase_props_data.h", 2016,
1189                         "icu/tools/unicode/c/genprops/casepropsbuilder.cpp");
1190     if(f==NULL) {
1191         errorCode=U_FILE_ACCESS_ERROR;
1192         return;
1193     }
1194     fputs("#ifdef INCLUDED_FROM_UCASE_CPP\n\n", f);
1195     usrc_writeArray(f,
1196         "static const UVersionInfo ucase_props_dataVersion={",
1197         dataInfo.dataVersion, 8, 4,
1198         "};\n\n");
1199     usrc_writeArray(f,
1200         "static const int32_t ucase_props_indexes[UCASE_IX_TOP]={",
1201         indexes, 32, UCASE_IX_TOP,
1202         "};\n\n");
1203     usrc_writeUTrie2Arrays(f,
1204         "static const uint16_t ucase_props_trieIndex[%ld]={\n", NULL,
1205         pTrie,
1206         "\n};\n\n");
1207     usrc_writeArray(f,
1208         "static const uint16_t ucase_props_exceptions[%ld]={\n",
1209         exceptions.getBuffer(), 16, exceptions.length(),
1210         "\n};\n\n");
1211     usrc_writeArray(f,
1212         "static const uint16_t ucase_props_unfold[%ld]={\n",
1213         unfold.getBuffer(), 16, unfold.length(),
1214         "\n};\n\n");
1215     fputs(
1216         "static const UCaseProps ucase_props_singleton={\n"
1217         "  NULL,\n"
1218         "  ucase_props_indexes,\n"
1219         "  ucase_props_exceptions,\n"
1220         "  ucase_props_unfold,\n",
1221         f);
1222     usrc_writeUTrie2Struct(f,
1223         "  {\n",
1224         pTrie, "ucase_props_trieIndex", NULL,
1225         "  },\n");
1226     usrc_writeArray(f, "  { ", dataInfo.formatVersion, 8, 4, " }\n");
1227     fputs("};\n\n"
1228           "#endif  // INCLUDED_FROM_UCASE_CPP\n", f);
1229     fclose(f);
1230 }
1231 
1232 void
writeBinaryData(const char * path,UBool withCopyright,UErrorCode & errorCode)1233 CasePropsBuilder::writeBinaryData(const char *path, UBool withCopyright, UErrorCode &errorCode) {
1234     if(U_FAILURE(errorCode)) { return; }
1235 
1236     UNewDataMemory *pData=udata_create(path, "icu", "ucase", &dataInfo,
1237                                        withCopyright ? U_COPYRIGHT_STRING : NULL, &errorCode);
1238     if(U_FAILURE(errorCode)) {
1239         fprintf(stderr, "genprops: udata_create(%s, ucase.icu) failed - %s\n",
1240                 path, u_errorName(errorCode));
1241         return;
1242     }
1243 
1244     udata_writeBlock(pData, indexes, sizeof(indexes));
1245     udata_writeBlock(pData, trieBlock, trieSize);
1246     udata_writeBlock(pData, exceptions.getBuffer(), 2*exceptions.length());
1247     udata_writeBlock(pData, unfold.getBuffer(), 2*unfold.length());
1248 
1249     /* finish up */
1250     long dataLength=udata_finish(pData, &errorCode);
1251     if(U_FAILURE(errorCode)) {
1252         fprintf(stderr, "genprops error: casepropsbuilder error %d writing the output file\n", errorCode);
1253         exit(errorCode);
1254     }
1255 
1256     if(dataLength!=indexes[UCASE_IX_LENGTH]) {
1257         fprintf(stderr,
1258                 "udata_finish(ucase.icu) reports %ld bytes written but should be %ld\n",
1259                 dataLength, (long)indexes[UCASE_IX_LENGTH]);
1260         errorCode=U_INTERNAL_PROGRAM_ERROR;
1261     }
1262 }
1263 
1264 PropsBuilder *
createCasePropsBuilder(UErrorCode & errorCode)1265 createCasePropsBuilder(UErrorCode &errorCode) {
1266     if(U_FAILURE(errorCode)) { return NULL; }
1267     PropsBuilder *pb=new CasePropsBuilder(errorCode);
1268     if(pb==NULL) {
1269         errorCode=U_MEMORY_ALLOCATION_ERROR;
1270     }
1271     return pb;
1272 }
1273 
1274 /*
1275  * Hey, Emacs, please set the following:
1276  *
1277  * Local Variables:
1278  * indent-tabs-mode: nil
1279  * End:
1280  *
1281  */
1282