• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 *   Copyright (C) 1999-2014, International Business Machines
7 *   Corporation and others.  All Rights Reserved.
8 *
9 *******************************************************************************
10 *   file name:  store.c
11 *   encoding:   UTF-8
12 *   tab size:   8 (not used)
13 *   indentation:4
14 *
15 *   created on: 2003-02-06
16 *   created by: Ram Viswanadha
17 *
18 */
19 
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include "unicode/utypes.h"
23 #include "cmemory.h"
24 #include "cstring.h"
25 #include "filestrm.h"
26 #include "toolutil.h"
27 #include "unicode/udata.h"
28 #include "unicode/utf16.h"
29 #include "utrie.h"
30 #include "unewdata.h"
31 #include "gensprep.h"
32 #include "uhash.h"
33 
34 
35 #define DO_DEBUG_OUT 0
36 
37 
38 /*
39  * StringPrep profile file format ------------------------------------
40  *
41  * The file format prepared and written here contains a 16-bit trie and a mapping table.
42  *
43  * Before the data contents described below, there are the headers required by
44  * the udata API for loading ICU data. Especially, a UDataInfo structure
45  * precedes the actual data. It contains platform properties values and the
46  * file format version.
47  *
48  * The following is a description of format version 2.
49  *
50  * Data contents:
51  *
52  * The contents is a parsed, binary form of RFC3454 and possibly
53  * NormalizationCorrections.txt depending on the options specified on the profile.
54  *
55  * Any Unicode code point from 0 to 0x10ffff can be looked up to get
56  * the trie-word, if any, for that code point. This means that the input
57  * to the lookup are 21-bit unsigned integers, with not all of the
58  * 21-bit range used.
59  *
60  * *.spp files customarily begin with a UDataInfo structure, see udata.h and .c.
61  * After that there are the following structures:
62  *
63  * int32_t indexes[_SPREP_INDEX_TOP];           -- _SPREP_INDEX_TOP=16, see enum in sprpimpl.h file
64  *
65  * UTrie stringPrepTrie;                        -- size in bytes=indexes[_SPREP_INDEX_TRIE_SIZE]
66  *
67  * uint16_t mappingTable[];                     -- Contains the sequecence of code units that the code point maps to
68  *                                                 size in bytes = indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]
69  *
70  * The indexes array contains the following values:
71  *  indexes[_SPREP_INDEX_TRIE_SIZE]                  -- The size of the StringPrep trie in bytes
72  *  indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]          -- The size of the mappingTable in bytes
73  *  indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION]  -- The index of Unicode version of last entry in NormalizationCorrections.txt
74  *  indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START]    -- The starting index of 1 UChar  mapping index in the mapping table
75  *  indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]   -- The starting index of 2 UChars mapping index in the mapping table
76  *  indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] -- The starting index of 3 UChars mapping index in the mapping table
77  *  indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]  -- The starting index of 4 UChars mapping index in the mapping table
78  *  indexes[_SPREP_OPTIONS]                          -- Bit set of options to turn on in the profile, e.g: USPREP_NORMALIZATION_ON, USPREP_CHECK_BIDI_ON
79  *
80  *
81  * StringPrep Trie :
82  *
83  * The StringPrep tries is a 16-bit trie that contains data for the profile.
84  * Each code point is associated with a value (trie-word) in the trie.
85  *
86  * - structure of data words from the trie
87  *
88  *  i)  A value greater than or equal to _SPREP_TYPE_THRESHOLD (0xFFF0)
89  *      represents the type associated with the code point
90  *      if(trieWord >= _SPREP_TYPE_THRESHOLD){
91  *          type = trieWord - 0xFFF0;
92  *      }
93  *      The type can be :
94  *             USPREP_UNASSIGNED
95  *             USPREP_PROHIBITED
96  *             USPREP_DELETE
97  *
98  *  ii) A value less than _SPREP_TYPE_THRESHOLD means the type is USPREP_MAP and
99  *      contains distribution described below
100  *
101  *      0       -  ON : The code point is prohibited (USPREP_PROHIBITED). This is to allow for codepoint that are both prohibited and mapped.
102  *      1       -  ON : The value in the next 14 bits is an index into the mapping table
103  *                 OFF: The value in the next 14 bits is an delta value from the code point
104  *      2..15   -  Contains data as described by bit 1. If all bits are set
105  *                 (value = _SPREP_MAX_INDEX_VALUE) then the type is USPREP_DELETE
106  *
107  *
108  * Mapping Table:
109  * The data in mapping table is sorted according to the length of the mapping sequence.
110  * If the type of the code point is USPREP_MAP and value in trie word is an index, the index
111  * is compared with start indexes of sequence length start to figure out the length according to
112  * the following algorithm:
113  *
114  *              if(       index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] &&
115  *                        index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){
116  *                   length = 1;
117  *               }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] &&
118  *                        index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){
119  *                   length = 2;
120  *               }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] &&
121  *                        index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){
122  *                   length = 3;
123  *               }else{
124  *                   // The first position in the mapping table contains the length
125  *                   // of the sequence
126  *                   length = mappingTable[index++];
127  *
128  *               }
129  *
130  */
131 
132 /* file data ---------------------------------------------------------------- */
133 /* indexes[] value names */
134 
135 #if UCONFIG_NO_IDNA
136 
137 /* dummy UDataInfo cf. udata.h */
138 static UDataInfo dataInfo = {
139     sizeof(UDataInfo),
140     0,
141 
142     U_IS_BIG_ENDIAN,
143     U_CHARSET_FAMILY,
144     U_SIZEOF_UCHAR,
145     0,
146 
147     { 0, 0, 0, 0 },                 /* dummy dataFormat */
148     { 0, 0, 0, 0 },                 /* dummy formatVersion */
149     { 0, 0, 0, 0 }                  /* dummy dataVersion */
150 };
151 
152 #else
153 
154 static int32_t indexes[_SPREP_INDEX_TOP]={ 0 };
155 
156 static uint16_t* mappingData= NULL;
157 static int32_t mappingDataCapacity = 0; /* we skip the first index in mapping data */
158 static int16_t currentIndex = 0; /* the current index into the data trie */
159 static int32_t maxLength = 0;  /* maximum length of mapping string */
160 
161 
162 /* UDataInfo cf. udata.h */
163 static UDataInfo dataInfo={
164     sizeof(UDataInfo),
165     0,
166 
167     U_IS_BIG_ENDIAN,
168     U_CHARSET_FAMILY,
169     U_SIZEOF_UCHAR,
170     0,
171 
172     { 0x53, 0x50, 0x52, 0x50 },                 /* dataFormat="SPRP" */
173     { 3, 2, UTRIE_SHIFT, UTRIE_INDEX_SHIFT },   /* formatVersion */
174     { 3, 2, 0, 0 }                              /* dataVersion (Unicode version) */
175 };
176 void
setUnicodeVersion(const char * v)177 setUnicodeVersion(const char *v) {
178     UVersionInfo version;
179     u_versionFromString(version, v);
180     uprv_memcpy(dataInfo.dataVersion, version, 4);
181 }
182 
183 void
setUnicodeVersionNC(UVersionInfo version)184 setUnicodeVersionNC(UVersionInfo version){
185     uint32_t univer = version[0] << 24;
186     univer += version[1] << 16;
187     univer += version[2] << 8;
188     univer += version[3];
189     indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION] = univer;
190 }
191 static UNewTrie *sprepTrie;
192 
193 #define MAX_DATA_LENGTH 11500
194 
195 
196 #define SPREP_DELTA_RANGE_POSITIVE_LIMIT              8191
197 #define SPREP_DELTA_RANGE_NEGATIVE_LIMIT              -8192
198 
199 
200 extern void
init()201 init() {
202 
203     sprepTrie = (UNewTrie *)uprv_calloc(1, sizeof(UNewTrie));
204 
205     /* initialize the two tries */
206     if(NULL==utrie_open(sprepTrie, NULL, MAX_DATA_LENGTH, 0, 0, FALSE)) {
207         fprintf(stderr, "error: failed to initialize tries\n");
208         exit(U_MEMORY_ALLOCATION_ERROR);
209     }
210 }
211 
212 static UHashtable* hashTable = NULL;
213 
214 
215 typedef struct ValueStruct {
216     UChar* mapping;
217     int16_t length;
218     UStringPrepType type;
219 } ValueStruct;
220 
221 /* Callback for deleting the value from the hashtable */
valueDeleter(void * obj)222 static void U_CALLCONV valueDeleter(void* obj){
223     ValueStruct* value = (ValueStruct*) obj;
224     uprv_free(value->mapping);
225     uprv_free(value);
226 }
227 
228 /* Callback for hashing the entry */
hashEntry(const UHashTok parm)229 static int32_t U_CALLCONV hashEntry(const UHashTok parm) {
230     return  parm.integer;
231 }
232 
233 /* Callback for comparing two entries */
compareEntries(const UHashTok p1,const UHashTok p2)234 static UBool U_CALLCONV compareEntries(const UHashTok p1, const UHashTok p2) {
235     return (UBool)(p1.integer != p2.integer);
236 }
237 
238 
239 static void
storeMappingData(void)240 storeMappingData(void){
241 
242     int32_t pos = UHASH_FIRST;
243     const UHashElement* element = NULL;
244     ValueStruct* value  = NULL;
245     int32_t codepoint = 0;
246     int32_t elementCount = 0;
247     int32_t writtenElementCount = 0;
248     int32_t mappingLength = 1; /* minimum mapping length */
249     int32_t oldMappingLength = 0;
250     uint16_t trieWord =0;
251     int32_t limitIndex = 0;
252 
253     if (hashTable == NULL) {
254         return;
255     }
256     elementCount = uhash_count(hashTable);
257 
258 	/*initialize the mapping data */
259     mappingData = (uint16_t*) uprv_calloc(mappingDataCapacity, U_SIZEOF_UCHAR);
260 
261     while(writtenElementCount < elementCount){
262 
263         while( (element = uhash_nextElement(hashTable, &pos))!=NULL){
264 
265             codepoint = element->key.integer;
266             value = (ValueStruct*)element->value.pointer;
267 
268             /* store the start of indexes */
269             if(oldMappingLength != mappingLength){
270                 /* Assume that index[] is used according to the enums defined */
271                 if(oldMappingLength <=_SPREP_MAX_INDEX_TOP_LENGTH){
272                     indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION+mappingLength] = currentIndex;
273                 }
274                 if(oldMappingLength <= _SPREP_MAX_INDEX_TOP_LENGTH &&
275                    mappingLength == _SPREP_MAX_INDEX_TOP_LENGTH +1){
276 
277                     limitIndex = currentIndex;
278 
279                 }
280                 oldMappingLength = mappingLength;
281             }
282 
283             if(value->length == mappingLength){
284                 uint32_t savedTrieWord = 0;
285                 trieWord = currentIndex << 2;
286                 /* turn on the 2nd bit to signal that the following bits contain an index */
287                 trieWord += 0x02;
288 
289                 if(trieWord > _SPREP_TYPE_THRESHOLD){
290                     fprintf(stderr,"trieWord cannot contain value greater than 0x%04X.\n",_SPREP_TYPE_THRESHOLD);
291                     exit(U_ILLEGAL_CHAR_FOUND);
292                 }
293                 /* figure out if the code point has type already stored */
294                 savedTrieWord= utrie_get32(sprepTrie,codepoint,NULL);
295                 if(savedTrieWord!=0){
296                     if((savedTrieWord- _SPREP_TYPE_THRESHOLD) == USPREP_PROHIBITED){
297                         /* turn on the first bit in trie word */
298                         trieWord += 0x01;
299                     }else{
300                         /*
301                          * the codepoint has value something other than prohibited
302                          * and a mapping .. error!
303                          */
304                         fprintf(stderr,"Type for codepoint \\U%08X already set!.\n", (int)codepoint);
305                         exit(U_ILLEGAL_ARGUMENT_ERROR);
306                     }
307                 }
308 
309                 /* now set the value in the trie */
310                 if(!utrie_set32(sprepTrie,codepoint,trieWord)){
311                     fprintf(stderr,"Could not set the value for code point.\n");
312                     exit(U_ILLEGAL_ARGUMENT_ERROR);
313                 }
314 
315                 /* written the trie word for the codepoint... increment the count*/
316                 writtenElementCount++;
317 
318                 /* sanity check are we exceeding the max number allowed */
319                 if(currentIndex+value->length+1 > _SPREP_MAX_INDEX_VALUE){
320                     fprintf(stderr, "Too many entries in the mapping table %i. Maximum allowed is %i\n",
321                         currentIndex+value->length, _SPREP_MAX_INDEX_VALUE);
322                     exit(U_INDEX_OUTOFBOUNDS_ERROR);
323                 }
324 
325                 /* copy the mapping data */
326                 /* write the length */
327                 if(mappingLength > _SPREP_MAX_INDEX_TOP_LENGTH ){
328                      /* the cast here is safe since we donot expect the length to be > 65535 */
329                      mappingData[currentIndex++] = (uint16_t) mappingLength;
330                 }
331                 /* copy the contents to mappindData array */
332                 u_memmove(mappingData+currentIndex, value->mapping, value->length);
333                 currentIndex += value->length;
334                 if (currentIndex > mappingDataCapacity) {
335                     /* If this happens there is a bug in the computation of the mapping data size in storeMapping() */
336                     fprintf(stderr, "gensprep, fatal error at %s, %d.  Aborting.\n", __FILE__, __LINE__);
337                     exit(U_INTERNAL_PROGRAM_ERROR);
338                 }
339             }
340         }
341         mappingLength++;
342         pos = -1;
343     }
344     /* set the last length for range check */
345     if(mappingLength <= _SPREP_MAX_INDEX_TOP_LENGTH){
346         indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION+mappingLength] = currentIndex+1;
347     }else{
348         indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START] = limitIndex;
349     }
350 
351 }
352 
setOptions(int32_t options)353 extern void setOptions(int32_t options){
354     indexes[_SPREP_OPTIONS] = options;
355 }
356 extern void
storeMapping(uint32_t codepoint,uint32_t * mapping,int32_t length,UStringPrepType type,UErrorCode * status)357 storeMapping(uint32_t codepoint, uint32_t* mapping,int32_t length,
358              UStringPrepType type, UErrorCode* status){
359 
360 
361     UChar* map = NULL;
362     int16_t adjustedLen=0, i, j;
363     uint16_t trieWord = 0;
364     ValueStruct *value = NULL;
365     uint32_t savedTrieWord = 0;
366 
367     /* initialize the hashtable */
368     if(hashTable==NULL){
369         hashTable = uhash_open(hashEntry, compareEntries, NULL, status);
370         uhash_setValueDeleter(hashTable, valueDeleter);
371     }
372 
373     /* figure out if the code point has type already stored */
374     savedTrieWord= utrie_get32(sprepTrie,codepoint,NULL);
375     if(savedTrieWord!=0){
376         if((savedTrieWord- _SPREP_TYPE_THRESHOLD) == USPREP_PROHIBITED){
377             /* turn on the first bit in trie word */
378             trieWord += 0x01;
379         }else{
380             /*
381              * the codepoint has value something other than prohibited
382              * and a mapping .. error!
383              */
384             fprintf(stderr,"Type for codepoint \\U%08X already set!.\n", (int)codepoint);
385             exit(U_ILLEGAL_ARGUMENT_ERROR);
386         }
387     }
388 
389     /* figure out the real length */
390     for(i=0; i<length; i++){
391         adjustedLen += U16_LENGTH(mapping[i]);
392     }
393 
394     if(adjustedLen == 0){
395         trieWord = (uint16_t)(_SPREP_MAX_INDEX_VALUE << 2);
396         /* make sure that the value of trieWord is less than the threshold */
397         if(trieWord < _SPREP_TYPE_THRESHOLD){
398             /* now set the value in the trie */
399             if(!utrie_set32(sprepTrie,codepoint,trieWord)){
400                 fprintf(stderr,"Could not set the value for code point.\n");
401                 exit(U_ILLEGAL_ARGUMENT_ERROR);
402             }
403             /* value is set so just return */
404             return;
405         }else{
406             fprintf(stderr,"trieWord cannot contain value greater than threshold 0x%04X.\n",_SPREP_TYPE_THRESHOLD);
407             exit(U_ILLEGAL_CHAR_FOUND);
408         }
409     }
410 
411     if(adjustedLen == 1){
412         /* calculate the delta */
413         int16_t delta = (int16_t)((int32_t)codepoint - (int16_t) mapping[0]);
414         if(delta >= SPREP_DELTA_RANGE_NEGATIVE_LIMIT && delta <= SPREP_DELTA_RANGE_POSITIVE_LIMIT){
415 
416             trieWord = delta;
417             trieWord <<= 2;
418 
419 
420             /* make sure that the second bit is OFF */
421             if((trieWord & 0x02) != 0 ){
422                 fprintf(stderr,"The second bit in the trie word is not zero while storing a delta.\n");
423                 exit(U_INTERNAL_PROGRAM_ERROR);
424             }
425             /* make sure that the value of trieWord is less than the threshold */
426             if(trieWord < _SPREP_TYPE_THRESHOLD){
427                 /* now set the value in the trie */
428                 if(!utrie_set32(sprepTrie,codepoint,trieWord)){
429                     fprintf(stderr,"Could not set the value for code point.\n");
430                     exit(U_ILLEGAL_ARGUMENT_ERROR);
431                 }
432                 /* value is set so just return */
433                 return;
434             }
435         }
436         /*
437          * if the delta is not in the given range or if the trieWord is larger than the threshold
438          * just fall through for storing the mapping in the mapping table
439          */
440     }
441 
442     map = (UChar*) uprv_calloc(adjustedLen + 1, U_SIZEOF_UCHAR);
443 
444     for (i=0, j=0; i<length; i++) {
445         U16_APPEND_UNSAFE(map, j, mapping[i]);
446     }
447 
448     value = (ValueStruct*) uprv_malloc(sizeof(ValueStruct));
449     value->mapping = map;
450     value->type    = type;
451     value->length  = adjustedLen;
452     if(value->length > _SPREP_MAX_INDEX_TOP_LENGTH){
453         mappingDataCapacity++;
454     }
455     if(maxLength < value->length){
456         maxLength = value->length;
457     }
458     uhash_iput(hashTable,codepoint,value,status);
459     mappingDataCapacity += adjustedLen;
460 
461     if(U_FAILURE(*status)){
462         fprintf(stderr, "Failed to put entries into the hastable. Error: %s\n", u_errorName(*status));
463         exit(*status);
464     }
465 }
466 
467 
468 extern void
storeRange(uint32_t start,uint32_t end,UStringPrepType type,UErrorCode * status)469 storeRange(uint32_t start, uint32_t end, UStringPrepType type, UErrorCode* status){
470     (void)status; // suppress compiler warnings about unused variable
471     uint16_t trieWord = 0;
472 
473     if((int)(_SPREP_TYPE_THRESHOLD + type) > 0xFFFF){
474         fprintf(stderr,"trieWord cannot contain value greater than 0xFFFF.\n");
475         exit(U_ILLEGAL_CHAR_FOUND);
476     }
477     trieWord = (_SPREP_TYPE_THRESHOLD + type); /* the top 4 bits contain the value */
478     if(start == end){
479         uint32_t savedTrieWord = utrie_get32(sprepTrie, start, NULL);
480         if(savedTrieWord>0){
481             if(savedTrieWord < _SPREP_TYPE_THRESHOLD && type == USPREP_PROHIBITED){
482                 /*
483                  * A mapping is stored in the trie word
484                  * and the only other possible type that a
485                  * code point can have is USPREP_PROHIBITED
486                  *
487                  */
488 
489                 /* turn on the 0th bit in the savedTrieWord */
490                 savedTrieWord += 0x01;
491 
492                 /* the downcast is safe since we only save 16 bit values */
493                 trieWord = (uint16_t)savedTrieWord;
494 
495                 /* make sure that the value of trieWord is less than the threshold */
496                 if(trieWord < _SPREP_TYPE_THRESHOLD){
497                     /* now set the value in the trie */
498                     if(!utrie_set32(sprepTrie,start,trieWord)){
499                         fprintf(stderr,"Could not set the value for code point.\n");
500                         exit(U_ILLEGAL_ARGUMENT_ERROR);
501                     }
502                     /* value is set so just return */
503                     return;
504                 }else{
505                     fprintf(stderr,"trieWord cannot contain value greater than threshold 0x%04X.\n",_SPREP_TYPE_THRESHOLD);
506                     exit(U_ILLEGAL_CHAR_FOUND);
507                 }
508 
509             }else if(savedTrieWord != trieWord){
510                 fprintf(stderr,"Value for codepoint \\U%08X already set!.\n", (int)start);
511                 exit(U_ILLEGAL_ARGUMENT_ERROR);
512             }
513             /* if savedTrieWord == trieWord .. fall through and set the value */
514         }
515         if(!utrie_set32(sprepTrie,start,trieWord)){
516             fprintf(stderr,"Could not set the value for code point \\U%08X.\n", (int)start);
517             exit(U_ILLEGAL_ARGUMENT_ERROR);
518         }
519     }else{
520         if(!utrie_setRange32(sprepTrie, start, end+1, trieWord, FALSE)){
521             fprintf(stderr,"Value for certain codepoint already set.\n");
522             exit(U_ILLEGAL_CHAR_FOUND);
523         }
524     }
525 
526 }
527 
528 /* folding value: just store the offset (16 bits) if there is any non-0 entry */
529 static uint32_t U_CALLCONV
getFoldedValue(UNewTrie * trie,UChar32 start,int32_t offset)530 getFoldedValue(UNewTrie *trie, UChar32 start, int32_t offset) {
531     uint32_t value;
532     UChar32 limit=0;
533     UBool inBlockZero;
534 
535     limit=start+0x400;
536     while(start<limit) {
537         value=utrie_get32(trie, start, &inBlockZero);
538         if(inBlockZero) {
539             start+=UTRIE_DATA_BLOCK_LENGTH;
540         } else if(value!=0) {
541             return (uint32_t)offset;
542         } else {
543             ++start;
544         }
545     }
546     return 0;
547 
548 }
549 
550 #endif /* #if !UCONFIG_NO_IDNA */
551 
552 extern void
generateData(const char * dataDir,const char * bundleName)553 generateData(const char *dataDir, const char* bundleName) {
554     static uint8_t sprepTrieBlock[100000];
555 
556     UNewDataMemory *pData;
557     UErrorCode errorCode=U_ZERO_ERROR;
558     int32_t size, dataLength;
559     char* fileName = (char*) uprv_malloc(uprv_strlen(bundleName) +100);
560 
561 #if UCONFIG_NO_IDNA
562 
563     size=0;
564 
565 #else
566 
567     int32_t sprepTrieSize;
568 
569     /* sort and add mapping data */
570     storeMappingData();
571 
572     sprepTrieSize=utrie_serialize(sprepTrie, sprepTrieBlock, sizeof(sprepTrieBlock), getFoldedValue, TRUE, &errorCode);
573     if(U_FAILURE(errorCode)) {
574         fprintf(stderr, "error: utrie_serialize(sprep trie) failed, %s\n", u_errorName(errorCode));
575         exit(errorCode);
576     }
577 
578     size = sprepTrieSize + mappingDataCapacity*U_SIZEOF_UCHAR + sizeof(indexes);
579     if(beVerbose) {
580         printf("size of sprep trie              %5u bytes\n", (int)sprepTrieSize);
581         printf("size of " U_ICUDATA_NAME "_%s." DATA_TYPE " contents: %ld bytes\n", bundleName,(long)size);
582         printf("size of mapping data array %5u bytes\n",(int)mappingDataCapacity * U_SIZEOF_UCHAR);
583         printf("Number of code units in mappingData (currentIndex) are: %i \n", currentIndex);
584         printf("Maximum length of the mapping string is : %i \n", (int)maxLength);
585     }
586 
587 #endif
588 
589     fileName[0]=0;
590     uprv_strcat(fileName,bundleName);
591     /* write the data */
592     pData=udata_create(dataDir, DATA_TYPE, fileName, &dataInfo,
593                        haveCopyright ? U_COPYRIGHT_STRING : NULL, &errorCode);
594     if(U_FAILURE(errorCode)) {
595         fprintf(stderr, "gensprep: unable to create the output file, error %d\n", errorCode);
596         exit(errorCode);
597     }
598 
599 #if !UCONFIG_NO_IDNA
600 
601     indexes[_SPREP_INDEX_TRIE_SIZE]=sprepTrieSize;
602     indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]=mappingDataCapacity*U_SIZEOF_UCHAR;
603 
604     udata_writeBlock(pData, indexes, sizeof(indexes));
605     udata_writeBlock(pData, sprepTrieBlock, sprepTrieSize);
606     udata_writeBlock(pData, mappingData, indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]);
607 
608 
609 #endif
610 
611     /* finish up */
612     dataLength=udata_finish(pData, &errorCode);
613     if(U_FAILURE(errorCode)) {
614         fprintf(stderr, "gensprep: error %d writing the output file\n", errorCode);
615         exit(errorCode);
616     }
617 
618     if(dataLength!=size) {
619         fprintf(stderr, "gensprep error: data length %ld != calculated size %ld\n",
620             (long)dataLength, (long)size);
621         exit(U_INTERNAL_PROGRAM_ERROR);
622     }
623 
624 #if !UCONFIG_NO_IDNA
625     /* done with writing the data .. close the hashtable */
626     if (hashTable != NULL) {
627         uhash_close(hashTable);
628     }
629 #endif
630 
631     uprv_free(fileName);
632 }
633 
634 #if !UCONFIG_NO_IDNA
635 
636 extern void
cleanUpData(void)637 cleanUpData(void) {
638     uprv_free(mappingData);
639     utrie_close(sprepTrie);
640     uprv_free(sprepTrie);
641 }
642 
643 #endif /* #if !UCONFIG_NO_IDNA */
644 
645 /*
646  * Hey, Emacs, please set the following:
647  *
648  * Local Variables:
649  * indent-tabs-mode: nil
650  * End:
651  *
652  */
653