• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  ********************************************************************
3  * COPYRIGHT:
4  * Copyright (c) 1996-2007, International Business Machines Corporation and
5  * others. All Rights Reserved.
6  ********************************************************************
7  *
8  *  uconv_bld.c:
9  *
10  *  Defines functions that are used in the creation/initialization/deletion
11  *  of converters and related structures.
12  *  uses uconv_io.h routines to access disk information
13  *  is used by ucnv.h to implement public API create/delete/flushCache routines
14  * Modification History:
15  *
16  *   Date        Name        Description
17  *
18  *   06/20/2000  helena      OS/400 port changes; mostly typecast.
19  *   06/29/2000  helena      Major rewrite of the callback interface.
20 */
21 
22 #include "unicode/utypes.h"
23 
24 #if !UCONFIG_NO_CONVERSION
25 
26 #include "unicode/putil.h"
27 #include "unicode/udata.h"
28 #include "unicode/ucnv.h"
29 #include "unicode/uloc.h"
30 #include "utracimp.h"
31 #include "ucnv_io.h"
32 #include "ucnv_bld.h"
33 #include "ucnvmbcs.h"
34 #include "ucnv_ext.h"
35 #include "ucnv_cnv.h"
36 #include "ucnv_imp.h"
37 #include "uhash.h"
38 #include "umutex.h"
39 #include "cstring.h"
40 #include "cmemory.h"
41 #include "ucln_cmn.h"
42 #include "ustr_cnv.h"
43 
44 
45 
46 #if 0
47 #include <stdio.h>
48 extern void UCNV_DEBUG_LOG(char *what, char *who, void *p, int l);
49 #define UCNV_DEBUG_LOG(x,y,z) UCNV_DEBUG_LOG(x,y,z,__LINE__)
50 #else
51 # define UCNV_DEBUG_LOG(x,y,z)
52 #endif
53 
54 static const UConverterSharedData * const
55 converterData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES]={
56     NULL, NULL,
57 
58 #if UCONFIG_NO_LEGACY_CONVERSION
59     NULL,
60 #else
61     &_MBCSData,
62 #endif
63 
64     &_Latin1Data,
65     &_UTF8Data, &_UTF16BEData, &_UTF16LEData, &_UTF32BEData, &_UTF32LEData,
66     NULL,
67 
68 #if UCONFIG_NO_LEGACY_CONVERSION
69     NULL,
70     NULL, NULL, NULL, NULL, NULL, NULL,
71     NULL, NULL, NULL, NULL, NULL, NULL,
72     NULL,
73 #else
74     &_ISO2022Data,
75     &_LMBCSData1,&_LMBCSData2, &_LMBCSData3, &_LMBCSData4, &_LMBCSData5, &_LMBCSData6,
76     &_LMBCSData8,&_LMBCSData11,&_LMBCSData16,&_LMBCSData17,&_LMBCSData18,&_LMBCSData19,
77     &_HZData,
78 #endif
79 
80     &_SCSUData,
81 
82 #if UCONFIG_NO_LEGACY_CONVERSION
83     NULL,
84 #else
85     &_ISCIIData,
86 #endif
87 
88     &_ASCIIData,
89     &_UTF7Data, &_Bocu1Data, &_UTF16Data, &_UTF32Data, &_CESU8Data, &_IMAPData
90 };
91 
92 /* Please keep this in binary sorted order for getAlgorithmicTypeFromName.
93    Also the name should be in lower case and all spaces, dashes and underscores
94    removed
95 */
96 static struct {
97   const char *name;
98   const UConverterType type;
99 } const cnvNameType[] = {
100   { "bocu1", UCNV_BOCU1 },
101   { "cesu8", UCNV_CESU8 },
102 #if !UCONFIG_NO_LEGACY_CONVERSION
103   { "hz",UCNV_HZ },
104 #endif
105   { "imapmailboxname", UCNV_IMAP_MAILBOX },
106 #if !UCONFIG_NO_LEGACY_CONVERSION
107   { "iscii", UCNV_ISCII },
108   { "iso2022", UCNV_ISO_2022 },
109 #endif
110   { "iso88591", UCNV_LATIN_1 },
111 #if !UCONFIG_NO_LEGACY_CONVERSION
112   { "lmbcs1", UCNV_LMBCS_1 },
113   { "lmbcs11",UCNV_LMBCS_11 },
114   { "lmbcs16",UCNV_LMBCS_16 },
115   { "lmbcs17",UCNV_LMBCS_17 },
116   { "lmbcs18",UCNV_LMBCS_18 },
117   { "lmbcs19",UCNV_LMBCS_19 },
118   { "lmbcs2", UCNV_LMBCS_2 },
119   { "lmbcs3", UCNV_LMBCS_3 },
120   { "lmbcs4", UCNV_LMBCS_4 },
121   { "lmbcs5", UCNV_LMBCS_5 },
122   { "lmbcs6", UCNV_LMBCS_6 },
123   { "lmbcs8", UCNV_LMBCS_8 },
124 #endif
125   { "scsu", UCNV_SCSU },
126   { "usascii", UCNV_US_ASCII },
127   { "utf16", UCNV_UTF16 },
128   { "utf16be", UCNV_UTF16_BigEndian },
129   { "utf16le", UCNV_UTF16_LittleEndian },
130 #if U_IS_BIG_ENDIAN
131   { "utf16oppositeendian", UCNV_UTF16_LittleEndian },
132   { "utf16platformendian", UCNV_UTF16_BigEndian },
133 #else
134   { "utf16oppositeendian", UCNV_UTF16_BigEndian},
135   { "utf16platformendian", UCNV_UTF16_LittleEndian },
136 #endif
137   { "utf32", UCNV_UTF32 },
138   { "utf32be", UCNV_UTF32_BigEndian },
139   { "utf32le", UCNV_UTF32_LittleEndian },
140 #if U_IS_BIG_ENDIAN
141   { "utf32oppositeendian", UCNV_UTF32_LittleEndian },
142   { "utf32platformendian", UCNV_UTF32_BigEndian },
143 #else
144   { "utf32oppositeendian", UCNV_UTF32_BigEndian },
145   { "utf32platformendian", UCNV_UTF32_LittleEndian },
146 #endif
147   { "utf7", UCNV_UTF7 },
148   { "utf8", UCNV_UTF8 }
149 };
150 
151 
152 /*initializes some global variables */
153 static UHashtable *SHARED_DATA_HASHTABLE = NULL;
154 static UMTX        cnvCacheMutex = NULL;  /* Mutex for synchronizing cnv cache access. */
155                                           /*  Note:  the global mutex is used for      */
156                                           /*         reference count updates.          */
157 
158 static const char **gAvailableConverters = NULL;
159 static uint16_t gAvailableConverterCount = 0;
160 
161 /* This contains the resolved converter name. So no further alias lookup is needed again. */
162 static char gDefaultConverterNameBuffer[UCNV_MAX_CONVERTER_NAME_LENGTH + 1]; /* +1 for NULL */
163 static const char *gDefaultConverterName = NULL;
164 
165 /*
166 If the default converter is an algorithmic converter, this is the cached value.
167 We don't cache a full UConverter and clone it because ucnv_clone doesn't have
168 less overhead than an algorithmic open. We don't cache non-algorithmic converters
169 because ucnv_flushCache must be able to unload the default converter and its table.
170 */
171 static const UConverterSharedData *gDefaultAlgorithmicSharedData = NULL;
172 
173 /* Does gDefaultConverterName have a converter option and require extra parsing? */
174 static UBool gDefaultConverterContainsOption;
175 
176 
177 static const char DATA_TYPE[] = "cnv";
178 
179 /* ucnv_cleanup - delete all storage held by the converter cache, except any  */
180 /*                in use by open converters.                                  */
181 /*                Not thread safe.                                            */
182 /*                Not supported API.                                          */
ucnv_cleanup(void)183 static UBool U_CALLCONV ucnv_cleanup(void) {
184     ucnv_flushCache();
185     if (SHARED_DATA_HASHTABLE != NULL && uhash_count(SHARED_DATA_HASHTABLE) == 0) {
186         uhash_close(SHARED_DATA_HASHTABLE);
187         SHARED_DATA_HASHTABLE = NULL;
188     }
189 
190     /* Called from ucnv_flushCache because it allocates the hashtable */
191     /*ucnv_flushAvailableConverterCache();*/
192 
193     gDefaultConverterName = NULL;
194     gDefaultConverterNameBuffer[0] = 0;
195     gDefaultConverterContainsOption = FALSE;
196     gDefaultAlgorithmicSharedData = NULL;
197 
198     umtx_destroy(&cnvCacheMutex);    /* Don't worry about destroying the mutex even  */
199                                      /*  if the hash table still exists.  The mutex  */
200                                      /*  will lazily re-init  itself if needed.      */
201     return (SHARED_DATA_HASHTABLE == NULL);
202 }
203 
204 static UBool U_CALLCONV
isCnvAcceptable(void * context,const char * type,const char * name,const UDataInfo * pInfo)205 isCnvAcceptable(void *context,
206              const char *type, const char *name,
207              const UDataInfo *pInfo) {
208     return (UBool)(
209         pInfo->size>=20 &&
210         pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
211         pInfo->charsetFamily==U_CHARSET_FAMILY &&
212         pInfo->sizeofUChar==U_SIZEOF_UCHAR &&
213         pInfo->dataFormat[0]==0x63 &&   /* dataFormat="cnvt" */
214         pInfo->dataFormat[1]==0x6e &&
215         pInfo->dataFormat[2]==0x76 &&
216         pInfo->dataFormat[3]==0x74 &&
217         pInfo->formatVersion[0]==6);  /* Everything will be version 6 */
218 }
219 
220 /**
221  * Un flatten shared data from a UDATA..
222  */
223 static UConverterSharedData*
ucnv_data_unFlattenClone(UConverterLoadArgs * pArgs,UDataMemory * pData,UErrorCode * status)224 ucnv_data_unFlattenClone(UConverterLoadArgs *pArgs, UDataMemory *pData, UErrorCode *status)
225 {
226     /* UDataInfo info; -- necessary only if some converters have different formatVersion */
227     const uint8_t *raw = (const uint8_t *)udata_getMemory(pData);
228     const UConverterStaticData *source = (const UConverterStaticData *) raw;
229     UConverterSharedData *data;
230     UConverterType type = (UConverterType)source->conversionType;
231 
232     if(U_FAILURE(*status))
233         return NULL;
234 
235     if( (uint16_t)type >= UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES ||
236         converterData[type] == NULL ||
237         converterData[type]->referenceCounter != 1 ||
238         source->structSize != sizeof(UConverterStaticData))
239     {
240         *status = U_INVALID_TABLE_FORMAT;
241         return NULL;
242     }
243 
244     data = (UConverterSharedData *)uprv_malloc(sizeof(UConverterSharedData));
245     if(data == NULL) {
246         *status = U_MEMORY_ALLOCATION_ERROR;
247         return NULL;
248     }
249 
250     /* copy initial values from the static structure for this type */
251     uprv_memcpy(data, converterData[type], sizeof(UConverterSharedData));
252 
253 #if 0 /* made UConverterMBCSTable part of UConverterSharedData -- markus 20031107 */
254     /*
255      * It would be much more efficient if the table were a direct member, not a pointer.
256      * However, that would add to the size of all UConverterSharedData objects
257      * even if they do not use this table (especially algorithmic ones).
258      * If this changes, then the static templates from converterData[type]
259      * need more entries.
260      *
261      * In principle, it would be cleaner if the load() function below
262      * allocated the table.
263      */
264     data->table = (UConverterTable *)uprv_malloc(sizeof(UConverterTable));
265     if(data->table == NULL) {
266         uprv_free(data);
267         *status = U_MEMORY_ALLOCATION_ERROR;
268         return NULL;
269     }
270     uprv_memset(data->table, 0, sizeof(UConverterTable));
271 #endif
272 
273     data->staticData = source;
274 
275     data->sharedDataCached = FALSE;
276 
277     /* fill in fields from the loaded data */
278     data->dataMemory = (void*)pData; /* for future use */
279 
280     if(data->impl->load != NULL) {
281         data->impl->load(data, pArgs, raw + source->structSize, status);
282         if(U_FAILURE(*status)) {
283             uprv_free(data->table);
284             uprv_free(data);
285             return NULL;
286         }
287     }
288     return data;
289 }
290 
291 /*Takes an alias name gets an actual converter file name
292  *goes to disk and opens it.
293  *allocates the memory and returns a new UConverter object
294  */
createConverterFromFile(UConverterLoadArgs * pArgs,UErrorCode * err)295 static UConverterSharedData *createConverterFromFile(UConverterLoadArgs *pArgs, UErrorCode * err)
296 {
297     UDataMemory *data;
298     UConverterSharedData *sharedData;
299 
300     UTRACE_ENTRY_OC(UTRACE_UCNV_LOAD);
301 
302     if (U_FAILURE (*err)) {
303         UTRACE_EXIT_STATUS(*err);
304         return NULL;
305     }
306 
307     UTRACE_DATA2(UTRACE_OPEN_CLOSE, "load converter %s from package %s", pArgs->name, pArgs->pkg);
308 
309     data = udata_openChoice(pArgs->pkg, DATA_TYPE, pArgs->name, isCnvAcceptable, NULL, err);
310     if(U_FAILURE(*err))
311     {
312         UTRACE_EXIT_STATUS(*err);
313         return NULL;
314     }
315 
316     sharedData = ucnv_data_unFlattenClone(pArgs, data, err);
317     if(U_FAILURE(*err))
318     {
319         udata_close(data);
320         UTRACE_EXIT_STATUS(*err);
321         return NULL;
322     }
323 
324     /*
325      * TODO Store pkg in a field in the shared data so that delta-only converters
326      * can load base converters from the same package.
327      * If the pkg name is longer than the field, then either do not load the converter
328      * in the first place, or just set the pkg field to "".
329      */
330 
331     UTRACE_EXIT_PTR_STATUS(sharedData, *err);
332     return sharedData;
333 }
334 
335 /*returns a converter type from a string
336  */
337 static const UConverterSharedData *
getAlgorithmicTypeFromName(const char * realName)338 getAlgorithmicTypeFromName(const char *realName)
339 {
340     uint32_t mid, start, limit;
341     uint32_t lastMid;
342     int result;
343     char strippedName[UCNV_MAX_CONVERTER_NAME_LENGTH];
344 
345     /* Lower case and remove ignoreable characters. */
346     ucnv_io_stripForCompare(strippedName, realName);
347 
348     /* do a binary search for the alias */
349     start = 0;
350     limit = sizeof(cnvNameType)/sizeof(cnvNameType[0]);
351     mid = limit;
352     lastMid = UINT32_MAX;
353 
354     for (;;) {
355         mid = (uint32_t)((start + limit) / 2);
356         if (lastMid == mid) {   /* Have we moved? */
357             break;  /* We haven't moved, and it wasn't found. */
358         }
359         lastMid = mid;
360         result = uprv_strcmp(strippedName, cnvNameType[mid].name);
361 
362         if (result < 0) {
363             limit = mid;
364         } else if (result > 0) {
365             start = mid;
366         } else {
367             return converterData[cnvNameType[mid].type];
368         }
369     }
370 
371     return NULL;
372 }
373 
374 /*
375 * Based on the number of known converters, this determines how many times larger
376 * the shared data hash table should be. When on small platforms, or just a couple
377 * of converters are used, this number should be 2. When memory is plentiful, or
378 * when ucnv_countAvailable is ever used with a lot of available converters,
379 * this should be 4.
380 * Larger numbers reduce the number of hash collisions, but use more memory.
381 */
382 #define UCNV_CACHE_LOAD_FACTOR 2
383 
384 /* Puts the shared data in the static hashtable SHARED_DATA_HASHTABLE */
385 /*   Will always be called with the cnvCacheMutex alrady being held   */
386 /*     by the calling function.                                       */
387 /* Stores the shared data in the SHARED_DATA_HASHTABLE
388  * @param data The shared data
389  */
390 static void
ucnv_shareConverterData(UConverterSharedData * data)391 ucnv_shareConverterData(UConverterSharedData * data)
392 {
393     UErrorCode err = U_ZERO_ERROR;
394     /*Lazy evaluates the Hashtable itself */
395     /*void *sanity = NULL;*/
396 
397     if (SHARED_DATA_HASHTABLE == NULL)
398     {
399         SHARED_DATA_HASHTABLE = uhash_openSize(uhash_hashChars, uhash_compareChars, NULL,
400                             ucnv_io_countKnownConverters(&err)*UCNV_CACHE_LOAD_FACTOR,
401                             &err);
402         ucln_common_registerCleanup(UCLN_COMMON_UCNV, ucnv_cleanup);
403 
404         if (U_FAILURE(err))
405             return;
406     }
407 
408     /* ### check to see if the element is not already there! */
409 
410     /*
411     sanity =   ucnv_getSharedConverterData (data->staticData->name);
412     if(sanity != NULL)
413     {
414     UCNV_DEBUG_LOG("put:overwrite!",data->staticData->name,sanity);
415     }
416     UCNV_DEBUG_LOG("put:chk",data->staticData->name,sanity);
417     */
418 
419     /* Mark it shared */
420     data->sharedDataCached = TRUE;
421 
422     uhash_put(SHARED_DATA_HASHTABLE,
423             (void*) data->staticData->name, /* Okay to cast away const as long as
424             keyDeleter == NULL */
425             data,
426             &err);
427     UCNV_DEBUG_LOG("put", data->staticData->name,data);
428 
429 }
430 
431 /*  Look up a converter name in the shared data cache.                    */
432 /*    cnvCacheMutex must be held by the caller to protect the hash table. */
433 /* gets the shared data from the SHARED_DATA_HASHTABLE (might return NULL if it isn't there)
434  * @param name The name of the shared data
435  * @return the shared data from the SHARED_DATA_HASHTABLE
436  */
437 static UConverterSharedData *
ucnv_getSharedConverterData(const char * name)438 ucnv_getSharedConverterData(const char *name)
439 {
440     /*special case when no Table has yet been created we return NULL */
441     if (SHARED_DATA_HASHTABLE == NULL)
442     {
443         return NULL;
444     }
445     else
446     {
447         UConverterSharedData *rc;
448 
449         rc = (UConverterSharedData*)uhash_get(SHARED_DATA_HASHTABLE, name);
450         UCNV_DEBUG_LOG("get",name,rc);
451         return rc;
452     }
453 }
454 
455 /*frees the string of memory blocks associates with a sharedConverter
456  *if and only if the referenceCounter == 0
457  */
458 /* Deletes (frees) the Shared data it's passed. first it checks the referenceCounter to
459  * see if anyone is using it, if not it frees all the memory stemming from sharedConverterData and
460  * returns TRUE,
461  * otherwise returns FALSE
462  * @param sharedConverterData The shared data
463  * @return if not it frees all the memory stemming from sharedConverterData and
464  * returns TRUE, otherwise returns FALSE
465  */
466 static UBool
ucnv_deleteSharedConverterData(UConverterSharedData * deadSharedData)467 ucnv_deleteSharedConverterData(UConverterSharedData * deadSharedData)
468 {
469     UTRACE_ENTRY_OC(UTRACE_UCNV_UNLOAD);
470     UTRACE_DATA2(UTRACE_OPEN_CLOSE, "unload converter %s shared data %p", deadSharedData->staticData->name, deadSharedData);
471 
472     if (deadSharedData->referenceCounter > 0) {
473         UTRACE_EXIT_VALUE((int32_t)FALSE);
474         return FALSE;
475     }
476 
477     if (deadSharedData->impl->unload != NULL) {
478         deadSharedData->impl->unload(deadSharedData);
479     }
480 
481     if(deadSharedData->dataMemory != NULL)
482     {
483         UDataMemory *data = (UDataMemory*)deadSharedData->dataMemory;
484         udata_close(data);
485     }
486 
487     if(deadSharedData->table != NULL)
488     {
489         uprv_free(deadSharedData->table);
490     }
491 
492 #if 0
493     /* if the static data is actually owned by the shared data */
494     /* enable if we ever have this situation. */
495     if(deadSharedData->staticDataOwned == TRUE) /* see ucnv_bld.h */
496     {
497         uprv_free((void*)deadSharedData->staticData);
498     }
499 #endif
500 
501 #if 0
502     /* Zap it ! */
503     uprv_memset(deadSharedData->0, sizeof(*deadSharedData));
504 #endif
505 
506     uprv_free(deadSharedData);
507 
508     UTRACE_EXIT_VALUE((int32_t)TRUE);
509     return TRUE;
510 }
511 
512 /**
513  * Load a non-algorithmic converter.
514  * If pkg==NULL, then this function must be called inside umtx_lock(&cnvCacheMutex).
515  */
516 UConverterSharedData *
ucnv_load(UConverterLoadArgs * pArgs,UErrorCode * err)517 ucnv_load(UConverterLoadArgs *pArgs, UErrorCode *err) {
518     UConverterSharedData *mySharedConverterData;
519 
520     if(err == NULL || U_FAILURE(*err)) {
521         return NULL;
522     }
523 
524     if(pArgs->pkg != NULL && *pArgs->pkg != 0) {
525         /* application-provided converters are not currently cached */
526         return createConverterFromFile(pArgs, err);
527     }
528 
529     mySharedConverterData = ucnv_getSharedConverterData(pArgs->name);
530     if (mySharedConverterData == NULL)
531     {
532         /*Not cached, we need to stream it in from file */
533         mySharedConverterData = createConverterFromFile(pArgs, err);
534         if (U_FAILURE (*err) || (mySharedConverterData == NULL))
535         {
536             return NULL;
537         }
538         else
539         {
540             /* share it with other library clients */
541             ucnv_shareConverterData(mySharedConverterData);
542         }
543     }
544     else
545     {
546         /* The data for this converter was already in the cache.            */
547         /* Update the reference counter on the shared data: one more client */
548         mySharedConverterData->referenceCounter++;
549     }
550 
551     return mySharedConverterData;
552 }
553 
554 /**
555  * Unload a non-algorithmic converter.
556  * It must be sharedData->referenceCounter != ~0
557  * and this function must be called inside umtx_lock(&cnvCacheMutex).
558  */
559 void
ucnv_unload(UConverterSharedData * sharedData)560 ucnv_unload(UConverterSharedData *sharedData) {
561     if(sharedData != NULL) {
562         if (sharedData->referenceCounter > 0) {
563             sharedData->referenceCounter--;
564         }
565 
566         if((sharedData->referenceCounter <= 0)&&(sharedData->sharedDataCached == FALSE)) {
567             ucnv_deleteSharedConverterData(sharedData);
568         }
569     }
570 }
571 
572 void
ucnv_unloadSharedDataIfReady(UConverterSharedData * sharedData)573 ucnv_unloadSharedDataIfReady(UConverterSharedData *sharedData)
574 {
575     /*
576     Checking whether it's an algorithic converter is okay
577     in multithreaded applications because the value never changes.
578     Don't check referenceCounter for any other value.
579     */
580     if(sharedData != NULL && sharedData->referenceCounter != ~0) {
581         umtx_lock(&cnvCacheMutex);
582         ucnv_unload(sharedData);
583         umtx_unlock(&cnvCacheMutex);
584     }
585 }
586 
587 void
ucnv_incrementRefCount(UConverterSharedData * sharedData)588 ucnv_incrementRefCount(UConverterSharedData *sharedData)
589 {
590     if(sharedData != NULL && sharedData->referenceCounter != ~0) {
591         umtx_lock(&cnvCacheMutex);
592         sharedData->referenceCounter++;
593         umtx_unlock(&cnvCacheMutex);
594     }
595 }
596 
597 static void
parseConverterOptions(const char * inName,char * cnvName,char * locale,uint32_t * pFlags,UErrorCode * err)598 parseConverterOptions(const char *inName,
599                       char *cnvName,
600                       char *locale,
601                       uint32_t *pFlags,
602                       UErrorCode *err)
603 {
604     char c;
605     int32_t len = 0;
606 
607     /* copy the converter name itself to cnvName */
608     while((c=*inName)!=0 && c!=UCNV_OPTION_SEP_CHAR) {
609         if (++len>=UCNV_MAX_CONVERTER_NAME_LENGTH) {
610             *err = U_ILLEGAL_ARGUMENT_ERROR;    /* bad name */
611             *cnvName=0;
612             return;
613         }
614         *cnvName++=c;
615         inName++;
616     }
617     *cnvName=0;
618 
619     /* parse options. No more name copying should occur. */
620     while((c=*inName)!=0) {
621         if(c==UCNV_OPTION_SEP_CHAR) {
622             ++inName;
623         }
624 
625         /* inName is behind an option separator */
626         if(uprv_strncmp(inName, "locale=", 7)==0) {
627             /* do not modify locale itself in case we have multiple locale options */
628             char *dest=locale;
629 
630             /* copy the locale option value */
631             inName+=7;
632             len=0;
633             while((c=*inName)!=0 && c!=UCNV_OPTION_SEP_CHAR) {
634                 ++inName;
635 
636                 if(++len>=ULOC_FULLNAME_CAPACITY) {
637                     *err=U_ILLEGAL_ARGUMENT_ERROR;    /* bad name */
638                     *locale=0;
639                     return;
640                 }
641 
642                 *dest++=c;
643             }
644             *dest=0;
645         } else if(uprv_strncmp(inName, "version=", 8)==0) {
646             /* copy the version option value into bits 3..0 of *pFlags */
647             inName+=8;
648             c=*inName;
649             if(c==0) {
650                 *pFlags&=~UCNV_OPTION_VERSION;
651                 return;
652             } else if((uint8_t)(c-'0')<10) {
653                 *pFlags=(*pFlags&~UCNV_OPTION_VERSION)|(uint32_t)(c-'0');
654                 ++inName;
655             }
656         } else if(uprv_strncmp(inName, "swaplfnl", 8)==0) {
657             inName+=8;
658             *pFlags|=UCNV_OPTION_SWAP_LFNL;
659         /* add processing for new options here with another } else if(uprv_strncmp(inName, "option-name=", XX)==0) { */
660         } else {
661             /* ignore any other options until we define some */
662             while(((c = *inName++) != 0) && (c != UCNV_OPTION_SEP_CHAR)) {
663             }
664             if(c==0) {
665                 return;
666             }
667         }
668     }
669 }
670 
671 /*Logic determines if the converter is Algorithmic AND/OR cached
672  *depending on that:
673  * -we either go to get data from disk and cache it (Data=TRUE, Cached=False)
674  * -Get it from a Hashtable (Data=X, Cached=TRUE)
675  * -Call dataConverter initializer (Data=TRUE, Cached=TRUE)
676  * -Call AlgorithmicConverter initializer (Data=FALSE, Cached=TRUE)
677  */
678 UConverterSharedData *
ucnv_loadSharedData(const char * converterName,UConverterLookupData * lookup,UErrorCode * err)679 ucnv_loadSharedData(const char *converterName, UConverterLookupData *lookup, UErrorCode * err) {
680     UConverterLookupData stackLookup;
681     UConverterSharedData *mySharedConverterData = NULL;
682     UErrorCode internalErrorCode = U_ZERO_ERROR;
683     UBool mayContainOption = TRUE;
684     UBool checkForAlgorithmic = TRUE;
685 
686     if (U_FAILURE (*err)) {
687         return NULL;
688     }
689 
690     if(lookup == NULL) {
691         lookup = &stackLookup;
692     }
693 
694     lookup->locale[0] = 0;
695     lookup->options = 0;
696 
697     /* In case "name" is NULL we want to open the default converter. */
698     if (converterName == NULL) {
699         /* Call ucnv_getDefaultName first to query the name from the OS. */
700         lookup->realName = ucnv_getDefaultName();
701         if (lookup->realName == NULL) {
702             *err = U_MISSING_RESOURCE_ERROR;
703             return NULL;
704         }
705         mySharedConverterData = (UConverterSharedData *)gDefaultAlgorithmicSharedData;
706         checkForAlgorithmic = FALSE;
707         mayContainOption = gDefaultConverterContainsOption;
708         /* the default converter name is already canonical */
709     }
710     else if((converterName[0] == 'U' ?
711             (                           converterName[1] == 'T' && converterName[2] == 'F') :
712             (converterName[0] == 'u' && converterName[1] == 't' && converterName[2] == 'f'))
713         &&
714         (converterName[3] == '-' ?
715             (converterName[4] == '8' && converterName[5] == 0) :
716             (converterName[3] == '8' && converterName[4] == 0)))
717     {
718         /* fastpath for UTF-8 */
719         return (UConverterSharedData *)converterData[UCNV_UTF8];
720     }
721     else {
722         /* separate the converter name from the options */
723         parseConverterOptions(converterName, lookup->cnvName, lookup->locale, &lookup->options, err);
724         if (U_FAILURE(*err)) {
725             /* Very bad name used. */
726             return NULL;
727         }
728 
729         /* get the canonical converter name */
730         lookup->realName = ucnv_io_getConverterName(lookup->cnvName, &mayContainOption, &internalErrorCode);
731         if (U_FAILURE(internalErrorCode) || lookup->realName == NULL) {
732             /*
733             * set the input name in case the converter was added
734             * without updating the alias table, or when there is no alias table
735             */
736             lookup->realName = lookup->cnvName;
737         }
738     }
739 
740     /* separate the converter name from the options */
741     if(mayContainOption && lookup->realName != lookup->cnvName) {
742         parseConverterOptions(lookup->realName, lookup->cnvName, lookup->locale, &lookup->options, err);
743         lookup->realName = lookup->cnvName;
744     }
745 
746     /* get the shared data for an algorithmic converter, if it is one */
747     if (checkForAlgorithmic) {
748         mySharedConverterData = (UConverterSharedData *)getAlgorithmicTypeFromName(lookup->realName);
749     }
750     if (mySharedConverterData == NULL)
751     {
752         /* it is a data-based converter, get its shared data.               */
753         /* Hold the cnvCacheMutex through the whole process of checking the */
754         /*   converter data cache, and adding new entries to the cache      */
755         /*   to prevent other threads from modifying the cache during the   */
756         /*   process.                                                       */
757         UConverterLoadArgs args={ 0 };
758 
759         args.size=sizeof(UConverterLoadArgs);
760         args.nestedLoads=1;
761         args.options=lookup->options;
762         args.pkg=NULL;
763         args.name=lookup->realName;
764 
765         umtx_lock(&cnvCacheMutex);
766         mySharedConverterData = ucnv_load(&args, err);
767         umtx_unlock(&cnvCacheMutex);
768         if (U_FAILURE (*err) || (mySharedConverterData == NULL))
769         {
770             return NULL;
771         }
772     }
773 
774     return mySharedConverterData;
775 }
776 
777 UConverter *
ucnv_createConverter(UConverter * myUConverter,const char * converterName,UErrorCode * err)778 ucnv_createConverter(UConverter *myUConverter, const char *converterName, UErrorCode * err)
779 {
780     UConverterLookupData stackLookup;
781     UConverterSharedData *mySharedConverterData;
782 
783     UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN);
784 
785     if(U_SUCCESS(*err)) {
786         UTRACE_DATA1(UTRACE_OPEN_CLOSE, "open converter %s", converterName);
787 
788         mySharedConverterData = ucnv_loadSharedData(converterName, &stackLookup, err);
789 
790         if(U_SUCCESS(*err)) {
791             myUConverter = ucnv_createConverterFromSharedData(
792                 myUConverter, mySharedConverterData,
793                 stackLookup.realName, stackLookup.locale, stackLookup.options,
794                 err);
795 
796             if(U_SUCCESS(*err)) {
797                 UTRACE_EXIT_PTR_STATUS(myUConverter, *err);
798                 return myUConverter;
799             } else {
800                 ucnv_unloadSharedDataIfReady(mySharedConverterData);
801             }
802         }
803     }
804 
805     /* exit with error */
806     UTRACE_EXIT_STATUS(*err);
807     return NULL;
808 }
809 
810 UConverter *
ucnv_createAlgorithmicConverter(UConverter * myUConverter,UConverterType type,const char * locale,uint32_t options,UErrorCode * err)811 ucnv_createAlgorithmicConverter(UConverter *myUConverter,
812                                 UConverterType type,
813                                 const char *locale, uint32_t options,
814                                 UErrorCode *err) {
815     UConverter *cnv;
816     const UConverterSharedData *sharedData;
817     UBool isAlgorithmicConverter;
818 
819     UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN_ALGORITHMIC);
820     UTRACE_DATA1(UTRACE_OPEN_CLOSE, "open algorithmic converter type %d", (int32_t)type);
821 
822     if(type<0 || UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES<=type) {
823         *err = U_ILLEGAL_ARGUMENT_ERROR;
824         UTRACE_EXIT_STATUS(U_ILLEGAL_ARGUMENT_ERROR);
825         return NULL;
826     }
827 
828     sharedData = converterData[type];
829     umtx_lock(&cnvCacheMutex);
830     isAlgorithmicConverter = (UBool)(sharedData == NULL || sharedData->referenceCounter != ~0);
831     umtx_unlock(&cnvCacheMutex);
832     if (isAlgorithmicConverter) {
833         /* not a valid type, or not an algorithmic converter */
834         *err = U_ILLEGAL_ARGUMENT_ERROR;
835         UTRACE_EXIT_STATUS(U_ILLEGAL_ARGUMENT_ERROR);
836         return NULL;
837     }
838 
839     cnv = ucnv_createConverterFromSharedData(myUConverter, (UConverterSharedData *)sharedData, "",
840                 locale != NULL ? locale : "", options, err);
841 
842     UTRACE_EXIT_PTR_STATUS(cnv, *err);
843     return cnv;
844 }
845 
846 UConverter*
ucnv_createConverterFromPackage(const char * packageName,const char * converterName,UErrorCode * err)847 ucnv_createConverterFromPackage(const char *packageName, const char *converterName, UErrorCode * err)
848 {
849     char cnvName[UCNV_MAX_CONVERTER_NAME_LENGTH], locale[ULOC_FULLNAME_CAPACITY];
850     UConverter *myUConverter;
851     UConverterSharedData *mySharedConverterData;
852 
853     UConverterLoadArgs args={ 0 };
854 
855     UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN_PACKAGE);
856 
857     if(U_FAILURE(*err)) {
858         UTRACE_EXIT_STATUS(*err);
859         return NULL;
860     }
861 
862     UTRACE_DATA2(UTRACE_OPEN_CLOSE, "open converter %s from package %s", converterName, packageName);
863 
864     args.size=sizeof(UConverterLoadArgs);
865     args.nestedLoads=1;
866     args.pkg=packageName;
867 
868     /* first, get the options out of the converterName string */
869     parseConverterOptions(converterName, cnvName, locale, &args.options, err);
870     if (U_FAILURE(*err)) {
871         /* Very bad name used. */
872         UTRACE_EXIT_STATUS(*err);
873         return NULL;
874     }
875     args.name=cnvName;
876 
877     /* open the data, unflatten the shared structure */
878     mySharedConverterData = createConverterFromFile(&args, err);
879 
880     if (U_FAILURE(*err)) {
881         UTRACE_EXIT_STATUS(*err);
882         return NULL;
883     }
884 
885     /* create the actual converter */
886     myUConverter = ucnv_createConverterFromSharedData(NULL, mySharedConverterData, cnvName, locale, args.options, err);
887 
888     if (U_FAILURE(*err)) {
889         ucnv_close(myUConverter);
890         UTRACE_EXIT_STATUS(*err);
891         return NULL;
892     }
893 
894     UTRACE_EXIT_PTR_STATUS(myUConverter, *err);
895     return myUConverter;
896 }
897 
898 
899 UConverter*
ucnv_createConverterFromSharedData(UConverter * myUConverter,UConverterSharedData * mySharedConverterData,const char * realName,const char * locale,uint32_t options,UErrorCode * err)900 ucnv_createConverterFromSharedData(UConverter *myUConverter,
901                                    UConverterSharedData *mySharedConverterData,
902                                    const char *realName, const char *locale, uint32_t options,
903                                    UErrorCode *err)
904 {
905     UBool isCopyLocal;
906 
907     if(myUConverter == NULL)
908     {
909         myUConverter = (UConverter *) uprv_malloc (sizeof (UConverter));
910         if(myUConverter == NULL)
911         {
912             *err = U_MEMORY_ALLOCATION_ERROR;
913             return NULL;
914         }
915         isCopyLocal = FALSE;
916     } else {
917         isCopyLocal = TRUE;
918     }
919 
920     /* initialize the converter */
921     uprv_memset(myUConverter, 0, sizeof(UConverter));
922     myUConverter->isCopyLocal = isCopyLocal;
923     /*myUConverter->isExtraLocal = FALSE;*/ /* Set by the memset call */
924     myUConverter->sharedData = mySharedConverterData;
925     myUConverter->options = options;
926     myUConverter->preFromUFirstCP = U_SENTINEL;
927     myUConverter->fromCharErrorBehaviour = UCNV_TO_U_DEFAULT_CALLBACK;
928     myUConverter->fromUCharErrorBehaviour = UCNV_FROM_U_DEFAULT_CALLBACK;
929     myUConverter->toUnicodeStatus = mySharedConverterData->toUnicodeStatus;
930     myUConverter->maxBytesPerUChar = mySharedConverterData->staticData->maxBytesPerChar;
931     myUConverter->subChar1 = mySharedConverterData->staticData->subChar1;
932     myUConverter->subCharLen = mySharedConverterData->staticData->subCharLen;
933     myUConverter->subChars = (uint8_t *)myUConverter->subUChars;
934     uprv_memcpy(myUConverter->subChars, mySharedConverterData->staticData->subChar, myUConverter->subCharLen);
935 
936     if(mySharedConverterData->impl->open != NULL) {
937         mySharedConverterData->impl->open(myUConverter, realName, locale, options, err);
938         if(U_FAILURE(*err)) {
939             ucnv_close(myUConverter);
940             return NULL;
941         }
942     }
943 
944     return myUConverter;
945 }
946 
947 static void
ucnv_flushAvailableConverterCache()948 ucnv_flushAvailableConverterCache() {
949     if (gAvailableConverters) {
950         umtx_lock(&cnvCacheMutex);
951         gAvailableConverterCount = 0;
952         uprv_free((char **)gAvailableConverters);
953         gAvailableConverters = NULL;
954         umtx_unlock(&cnvCacheMutex);
955     }
956 }
957 
958 /*Frees all shared immutable objects that aren't referred to (reference count = 0)
959  */
960 U_CAPI int32_t U_EXPORT2
ucnv_flushCache()961 ucnv_flushCache ()
962 {
963     UConverterSharedData *mySharedData = NULL;
964     int32_t pos;
965     int32_t tableDeletedNum = 0;
966     const UHashElement *e;
967     UErrorCode status = U_ILLEGAL_ARGUMENT_ERROR;
968     int32_t i, remaining;
969 
970     UTRACE_ENTRY_OC(UTRACE_UCNV_FLUSH_CACHE);
971 
972     /* Close the default converter without creating a new one so that everything will be flushed. */
973     ucnv_close(u_getDefaultConverter(&status));
974 
975     /*if shared data hasn't even been lazy evaluated yet
976     * return 0
977     */
978     if (SHARED_DATA_HASHTABLE == NULL) {
979         UTRACE_EXIT_VALUE((int32_t)0);
980         return 0;
981     }
982 
983     /*creates an enumeration to iterate through every element in the
984     * table
985     *
986     * Synchronization:  holding cnvCacheMutex will prevent any other thread from
987     *                   accessing or modifying the hash table during the iteration.
988     *                   The reference count of an entry may be decremented by
989     *                   ucnv_close while the iteration is in process, but this is
990     *                   benign.  It can't be incremented (in ucnv_createConverter())
991     *                   because the sequence of looking up in the cache + incrementing
992     *                   is protected by cnvCacheMutex.
993     */
994     umtx_lock(&cnvCacheMutex);
995     /*
996      * double loop: A delta/extension-only converter has a pointer to its base table's
997      * shared data; the first iteration of the outer loop may see the delta converter
998      * before the base converter, and unloading the delta converter may get the base
999      * converter's reference counter down to 0.
1000      */
1001     i = 0;
1002     do {
1003         remaining = 0;
1004         pos = -1;
1005         while ((e = uhash_nextElement (SHARED_DATA_HASHTABLE, &pos)) != NULL)
1006         {
1007             mySharedData = (UConverterSharedData *) e->value.pointer;
1008             /*deletes only if reference counter == 0 */
1009             if (mySharedData->referenceCounter == 0)
1010             {
1011                 tableDeletedNum++;
1012 
1013                 UCNV_DEBUG_LOG("del",mySharedData->staticData->name,mySharedData);
1014 
1015                 uhash_removeElement(SHARED_DATA_HASHTABLE, e);
1016                 mySharedData->sharedDataCached = FALSE;
1017                 ucnv_deleteSharedConverterData (mySharedData);
1018             } else {
1019                 ++remaining;
1020             }
1021         }
1022     } while(++i == 1 && remaining > 0);
1023     umtx_unlock(&cnvCacheMutex);
1024 
1025     UTRACE_DATA1(UTRACE_INFO, "ucnv_flushCache() exits with %d converters remaining", remaining);
1026 
1027     ucnv_flushAvailableConverterCache();
1028 
1029     UTRACE_EXIT_VALUE(tableDeletedNum);
1030     return tableDeletedNum;
1031 }
1032 
1033 /* available converters list --------------------------------------------------- */
1034 
haveAvailableConverterList(UErrorCode * pErrorCode)1035 static UBool haveAvailableConverterList(UErrorCode *pErrorCode) {
1036     int needInit;
1037     UMTX_CHECK(&cnvCacheMutex, (gAvailableConverters == NULL), needInit);
1038     if (needInit) {
1039         UConverter tempConverter;
1040         UEnumeration *allConvEnum = NULL;
1041         uint16_t idx;
1042         uint16_t localConverterCount;
1043         uint16_t allConverterCount;
1044         UErrorCode localStatus;
1045         const char *converterName;
1046         const char **localConverterList;
1047 
1048         allConvEnum = ucnv_openAllNames(pErrorCode);
1049         allConverterCount = uenum_count(allConvEnum, pErrorCode);
1050         if (U_FAILURE(*pErrorCode)) {
1051             return FALSE;
1052         }
1053 
1054         /* We can't have more than "*converterTable" converters to open */
1055         localConverterList = (const char **) uprv_malloc(allConverterCount * sizeof(char*));
1056         if (!localConverterList) {
1057             *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
1058             return FALSE;
1059         }
1060 
1061         /* Open the default converter to make sure that it has first dibs in the hash table. */
1062         localStatus = U_ZERO_ERROR;
1063         ucnv_close(ucnv_createConverter(&tempConverter, NULL, &localStatus));
1064 
1065         localConverterCount = 0;
1066 
1067         for (idx = 0; idx < allConverterCount; idx++) {
1068             localStatus = U_ZERO_ERROR;
1069             converterName = uenum_next(allConvEnum, NULL, &localStatus);
1070             ucnv_close(ucnv_createConverter(&tempConverter, converterName, &localStatus));
1071             if (U_SUCCESS(localStatus)) {
1072                 localConverterList[localConverterCount++] = converterName;
1073             }
1074         }
1075         uenum_close(allConvEnum);
1076 
1077         umtx_lock(&cnvCacheMutex);
1078         if (gAvailableConverters == NULL) {
1079             gAvailableConverters = localConverterList;
1080             gAvailableConverterCount = localConverterCount;
1081             ucln_common_registerCleanup(UCLN_COMMON_UCNV, ucnv_cleanup);
1082         }
1083         else {
1084             uprv_free((char **)localConverterList);
1085         }
1086         umtx_unlock(&cnvCacheMutex);
1087     }
1088     return TRUE;
1089 }
1090 
1091 U_CFUNC uint16_t
ucnv_bld_countAvailableConverters(UErrorCode * pErrorCode)1092 ucnv_bld_countAvailableConverters(UErrorCode *pErrorCode) {
1093     if (haveAvailableConverterList(pErrorCode)) {
1094         return gAvailableConverterCount;
1095     }
1096     return 0;
1097 }
1098 
1099 U_CFUNC const char *
ucnv_bld_getAvailableConverter(uint16_t n,UErrorCode * pErrorCode)1100 ucnv_bld_getAvailableConverter(uint16_t n, UErrorCode *pErrorCode) {
1101     if (haveAvailableConverterList(pErrorCode)) {
1102         if (n < gAvailableConverterCount) {
1103             return gAvailableConverters[n];
1104         }
1105         *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
1106     }
1107     return NULL;
1108 }
1109 
1110 /* default converter name --------------------------------------------------- */
1111 
1112 /*
1113 Copy the canonical converter name.
1114 ucnv_getDefaultName must be thread safe, which can call this function.
1115 
1116 ucnv_setDefaultName calls this function and it doesn't have to be
1117 thread safe because there is no reliable/safe way to reset the
1118 converter in use in all threads. If you did reset the converter, you
1119 would not be sure that retrieving a default converter for one string
1120 would be the same type of default converter for a successive string.
1121 Since the name is a returned via ucnv_getDefaultName without copying,
1122 you shouldn't be modifying or deleting the string from a separate thread.
1123 */
1124 static U_INLINE void
internalSetName(const char * name,UErrorCode * status)1125 internalSetName(const char *name, UErrorCode *status) {
1126     UConverterLookupData lookup;
1127     int32_t length=(int32_t)(uprv_strlen(name));
1128     UBool containsOption = (UBool)(uprv_strchr(name, UCNV_OPTION_SEP_CHAR) != NULL);
1129     const UConverterSharedData *algorithmicSharedData;
1130 
1131     lookup.locale[0] = 0;
1132     lookup.options = 0;
1133     lookup.realName = name;
1134     if(containsOption) {
1135         parseConverterOptions(lookup.realName, lookup.cnvName, lookup.locale, &lookup.options, status);
1136         lookup.realName = lookup.cnvName;
1137     }
1138     algorithmicSharedData = getAlgorithmicTypeFromName(lookup.realName);
1139 
1140     umtx_lock(&cnvCacheMutex);
1141 
1142     gDefaultAlgorithmicSharedData = algorithmicSharedData;
1143     gDefaultConverterContainsOption = containsOption;
1144     uprv_memcpy(gDefaultConverterNameBuffer, name, length);
1145     gDefaultConverterNameBuffer[length]=0;
1146     gDefaultConverterName = gDefaultConverterNameBuffer;
1147 
1148     ucln_common_registerCleanup(UCLN_COMMON_UCNV, ucnv_cleanup);
1149 
1150     umtx_unlock(&cnvCacheMutex);
1151 }
1152 
1153 /*
1154  * In order to be really thread-safe, the get function would have to take
1155  * a buffer parameter and copy the current string inside a mutex block.
1156  * This implementation only tries to be really thread-safe while
1157  * setting the name.
1158  * It assumes that setting a pointer is atomic.
1159  */
1160 
1161 U_CAPI const char*  U_EXPORT2
ucnv_getDefaultName()1162 ucnv_getDefaultName() {
1163     /* local variable to be thread-safe */
1164     const char *name;
1165 
1166     /*
1167     Multiple calls to ucnv_getDefaultName must be thread safe,
1168     but ucnv_setDefaultName is not thread safe.
1169     */
1170     UMTX_CHECK(&cnvCacheMutex, gDefaultConverterName, name);
1171     if(name==NULL) {
1172         UErrorCode errorCode = U_ZERO_ERROR;
1173         UConverter *cnv = NULL;
1174 
1175         name = uprv_getDefaultCodepage();
1176 
1177         /* if the name is there, test it out and get the canonical name with options */
1178         if(name != NULL) {
1179             cnv = ucnv_open(name, &errorCode);
1180             if(U_SUCCESS(errorCode) && cnv != NULL) {
1181                 name = ucnv_getName(cnv, &errorCode);
1182             }
1183         }
1184 
1185         if(name == NULL || name[0] == 0
1186             || U_FAILURE(errorCode) || cnv == NULL
1187             || uprv_strlen(name)>=sizeof(gDefaultConverterNameBuffer))
1188         {
1189             /* Panic time, let's use a fallback. */
1190 #if (U_CHARSET_FAMILY == U_ASCII_FAMILY)
1191             name = "US-ASCII";
1192             /* there is no 'algorithmic' converter for EBCDIC */
1193 #elif defined(OS390)
1194             name = "ibm-1047_P100-1995" UCNV_SWAP_LFNL_OPTION_STRING;
1195 #else
1196             name = "ibm-37_P100-1995";
1197 #endif
1198         }
1199 
1200         internalSetName(name, &errorCode);
1201 
1202         /* The close may make the current name go away. */
1203         ucnv_close(cnv);
1204     }
1205 
1206     return name;
1207 }
1208 
1209 /*
1210 This function is not thread safe, and it can't be thread safe.
1211 See internalSetName or the API reference for details.
1212 */
1213 U_CAPI void U_EXPORT2
ucnv_setDefaultName(const char * converterName)1214 ucnv_setDefaultName(const char *converterName) {
1215     if(converterName==NULL) {
1216         /* reset to the default codepage */
1217         gDefaultConverterName=NULL;
1218     } else {
1219         UErrorCode errorCode = U_ZERO_ERROR;
1220         UConverter *cnv = NULL;
1221         const char *name = NULL;
1222 
1223         /* if the name is there, test it out and get the canonical name with options */
1224         cnv = ucnv_open(converterName, &errorCode);
1225         if(U_SUCCESS(errorCode) && cnv != NULL) {
1226             name = ucnv_getName(cnv, &errorCode);
1227         }
1228 
1229         if(U_SUCCESS(errorCode) && name!=NULL) {
1230             internalSetName(name, &errorCode);
1231         }
1232         /* else this converter is bad to use. Don't change it to a bad value. */
1233 
1234         /* The close may make the current name go away. */
1235         ucnv_close(cnv);
1236     }
1237 }
1238 
1239 /* data swapping ------------------------------------------------------------ */
1240 
1241 /* most of this might belong more properly into ucnvmbcs.c, but that is so large */
1242 
1243 #if !UCONFIG_NO_LEGACY_CONVERSION
1244 
1245 U_CAPI int32_t U_EXPORT2
ucnv_swap(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)1246 ucnv_swap(const UDataSwapper *ds,
1247           const void *inData, int32_t length, void *outData,
1248           UErrorCode *pErrorCode) {
1249     const UDataInfo *pInfo;
1250     int32_t headerSize;
1251 
1252     const uint8_t *inBytes;
1253     uint8_t *outBytes;
1254 
1255     uint32_t offset, count, staticDataSize;
1256     int32_t size;
1257 
1258     const UConverterStaticData *inStaticData;
1259     UConverterStaticData *outStaticData;
1260 
1261     const _MBCSHeader *inMBCSHeader;
1262     _MBCSHeader *outMBCSHeader;
1263     _MBCSHeader mbcsHeader;
1264     uint32_t mbcsHeaderLength;
1265     UBool noFromU=FALSE;
1266 
1267     uint8_t outputType;
1268 
1269     int32_t maxFastUChar, mbcsIndexLength;
1270 
1271     const int32_t *inExtIndexes;
1272     int32_t extOffset;
1273 
1274     /* udata_swapDataHeader checks the arguments */
1275     headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
1276     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1277         return 0;
1278     }
1279 
1280     /* check data format and format version */
1281     pInfo=(const UDataInfo *)((const char *)inData+4);
1282     if(!(
1283         pInfo->dataFormat[0]==0x63 &&   /* dataFormat="cnvt" */
1284         pInfo->dataFormat[1]==0x6e &&
1285         pInfo->dataFormat[2]==0x76 &&
1286         pInfo->dataFormat[3]==0x74 &&
1287         pInfo->formatVersion[0]==6 &&
1288         pInfo->formatVersion[1]>=2
1289     )) {
1290         udata_printError(ds, "ucnv_swap(): data format %02x.%02x.%02x.%02x (format version %02x.%02x) is not recognized as an ICU .cnv conversion table\n",
1291                          pInfo->dataFormat[0], pInfo->dataFormat[1],
1292                          pInfo->dataFormat[2], pInfo->dataFormat[3],
1293                          pInfo->formatVersion[0], pInfo->formatVersion[1]);
1294         *pErrorCode=U_UNSUPPORTED_ERROR;
1295         return 0;
1296     }
1297 
1298     inBytes=(const uint8_t *)inData+headerSize;
1299     outBytes=(uint8_t *)outData+headerSize;
1300 
1301     /* read the initial UConverterStaticData structure after the UDataInfo header */
1302     inStaticData=(const UConverterStaticData *)inBytes;
1303     outStaticData=(UConverterStaticData *)outBytes;
1304 
1305     if(length<0) {
1306         staticDataSize=ds->readUInt32(inStaticData->structSize);
1307     } else {
1308         length-=headerSize;
1309         if( length<sizeof(UConverterStaticData) ||
1310             (uint32_t)length<(staticDataSize=ds->readUInt32(inStaticData->structSize))
1311         ) {
1312             udata_printError(ds, "ucnv_swap(): too few bytes (%d after header) for an ICU .cnv conversion table\n",
1313                              length);
1314             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
1315             return 0;
1316         }
1317     }
1318 
1319     if(length>=0) {
1320         /* swap the static data */
1321         if(inStaticData!=outStaticData) {
1322             uprv_memcpy(outStaticData, inStaticData, staticDataSize);
1323         }
1324 
1325         ds->swapArray32(ds, &inStaticData->structSize, 4,
1326                            &outStaticData->structSize, pErrorCode);
1327         ds->swapArray32(ds, &inStaticData->codepage, 4,
1328                            &outStaticData->codepage, pErrorCode);
1329 
1330         ds->swapInvChars(ds, inStaticData->name, (int32_t)uprv_strlen(inStaticData->name),
1331                             outStaticData->name, pErrorCode);
1332         if(U_FAILURE(*pErrorCode)) {
1333             udata_printError(ds, "ucnv_swap(): error swapping converter name\n");
1334             return 0;
1335         }
1336     }
1337 
1338     inBytes+=staticDataSize;
1339     outBytes+=staticDataSize;
1340     if(length>=0) {
1341         length-=(int32_t)staticDataSize;
1342     }
1343 
1344     /* check for supported conversionType values */
1345     if(inStaticData->conversionType==UCNV_MBCS) {
1346         /* swap MBCS data */
1347         inMBCSHeader=(const _MBCSHeader *)inBytes;
1348         outMBCSHeader=(_MBCSHeader *)outBytes;
1349 
1350         if(0<=length && length<sizeof(_MBCSHeader)) {
1351             udata_printError(ds, "ucnv_swap(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table\n",
1352                                 length);
1353             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
1354             return 0;
1355         }
1356         if(inMBCSHeader->version[0]==4 && inMBCSHeader->version[1]>=1) {
1357             mbcsHeaderLength=MBCS_HEADER_V4_LENGTH;
1358         } else if(inMBCSHeader->version[0]==5 && inMBCSHeader->version[1]>=3 &&
1359                   ((mbcsHeader.options=ds->readUInt32(inMBCSHeader->options))&
1360                    MBCS_OPT_UNKNOWN_INCOMPATIBLE_MASK)==0
1361         ) {
1362             mbcsHeaderLength=mbcsHeader.options&MBCS_OPT_LENGTH_MASK;
1363             noFromU=(UBool)((mbcsHeader.options&MBCS_OPT_NO_FROM_U)!=0);
1364         } else {
1365             udata_printError(ds, "ucnv_swap(): unsupported _MBCSHeader.version %d.%d\n",
1366                              inMBCSHeader->version[0], inMBCSHeader->version[1]);
1367             *pErrorCode=U_UNSUPPORTED_ERROR;
1368             return 0;
1369         }
1370 
1371         uprv_memcpy(mbcsHeader.version, inMBCSHeader->version, 4);
1372         mbcsHeader.countStates=         ds->readUInt32(inMBCSHeader->countStates);
1373         mbcsHeader.countToUFallbacks=   ds->readUInt32(inMBCSHeader->countToUFallbacks);
1374         mbcsHeader.offsetToUCodeUnits=  ds->readUInt32(inMBCSHeader->offsetToUCodeUnits);
1375         mbcsHeader.offsetFromUTable=    ds->readUInt32(inMBCSHeader->offsetFromUTable);
1376         mbcsHeader.offsetFromUBytes=    ds->readUInt32(inMBCSHeader->offsetFromUBytes);
1377         mbcsHeader.flags=               ds->readUInt32(inMBCSHeader->flags);
1378         mbcsHeader.fromUBytesLength=    ds->readUInt32(inMBCSHeader->fromUBytesLength);
1379         /* mbcsHeader.options have been read above */
1380 
1381         extOffset=(int32_t)(mbcsHeader.flags>>8);
1382         outputType=(uint8_t)mbcsHeader.flags;
1383         if(noFromU && outputType==MBCS_OUTPUT_1) {
1384             udata_printError(ds, "ucnv_swap(): unsupported combination of makeconv --small with SBCS\n");
1385             *pErrorCode=U_UNSUPPORTED_ERROR;
1386             return 0;
1387         }
1388 
1389         /* make sure that the output type is known */
1390         switch(outputType) {
1391         case MBCS_OUTPUT_1:
1392         case MBCS_OUTPUT_2:
1393         case MBCS_OUTPUT_3:
1394         case MBCS_OUTPUT_4:
1395         case MBCS_OUTPUT_3_EUC:
1396         case MBCS_OUTPUT_4_EUC:
1397         case MBCS_OUTPUT_2_SISO:
1398         case MBCS_OUTPUT_EXT_ONLY:
1399             /* OK */
1400             break;
1401         default:
1402             udata_printError(ds, "ucnv_swap(): unsupported MBCS output type 0x%x\n",
1403                              outputType);
1404             *pErrorCode=U_UNSUPPORTED_ERROR;
1405             return 0;
1406         }
1407 
1408         /* calculate the length of the MBCS data */
1409 
1410         /*
1411          * utf8Friendly MBCS files (mbcsHeader.version 4.3)
1412          * contain an additional mbcsIndex table:
1413          *   uint16_t[(maxFastUChar+1)>>6];
1414          * where maxFastUChar=((mbcsHeader.version[2]<<8)|0xff).
1415          */
1416         maxFastUChar=0;
1417         mbcsIndexLength=0;
1418         if( outputType!=MBCS_OUTPUT_EXT_ONLY && outputType!=MBCS_OUTPUT_1 &&
1419             mbcsHeader.version[1]>=3 && (maxFastUChar=mbcsHeader.version[2])!=0
1420         ) {
1421             maxFastUChar=(maxFastUChar<<8)|0xff;
1422             mbcsIndexLength=((maxFastUChar+1)>>6)*2;  /* number of bytes */
1423         }
1424 
1425         if(extOffset==0) {
1426             size=(int32_t)(mbcsHeader.offsetFromUBytes+mbcsIndexLength);
1427             if(!noFromU) {
1428                 size+=(int32_t)mbcsHeader.fromUBytesLength;
1429             }
1430 
1431             /* avoid compiler warnings - not otherwise necessary, and the value does not matter */
1432             inExtIndexes=NULL;
1433         } else {
1434             /* there is extension data after the base data, see ucnv_ext.h */
1435             if(length>=0 && length<(extOffset+UCNV_EXT_INDEXES_MIN_LENGTH*4)) {
1436                 udata_printError(ds, "ucnv_swap(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table with extension data\n",
1437                                  length);
1438                 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
1439                 return 0;
1440             }
1441 
1442             inExtIndexes=(const int32_t *)(inBytes+extOffset);
1443             size=extOffset+udata_readInt32(ds, inExtIndexes[UCNV_EXT_SIZE]);
1444         }
1445 
1446         if(length>=0) {
1447             if(length<size) {
1448                 udata_printError(ds, "ucnv_swap(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table\n",
1449                                  length);
1450                 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
1451                 return 0;
1452             }
1453 
1454             /* copy the data for inaccessible bytes */
1455             if(inBytes!=outBytes) {
1456                 uprv_memcpy(outBytes, inBytes, size);
1457             }
1458 
1459             /* swap the MBCSHeader, except for the version field */
1460             count=mbcsHeaderLength*4;
1461             ds->swapArray32(ds, &inMBCSHeader->countStates, count-4,
1462                                &outMBCSHeader->countStates, pErrorCode);
1463 
1464             if(outputType==MBCS_OUTPUT_EXT_ONLY) {
1465                 /*
1466                  * extension-only file,
1467                  * contains a base name instead of normal base table data
1468                  */
1469 
1470                 /* swap the base name, between the header and the extension data */
1471                 const char *inBaseName=(const char *)inBytes+count;
1472                 char *outBaseName=(char *)outBytes+count;
1473                 ds->swapInvChars(ds, inBaseName, (int32_t)uprv_strlen(inBaseName),
1474                                     outBaseName, pErrorCode);
1475             } else {
1476                 /* normal file with base table data */
1477 
1478                 /* swap the state table, 1kB per state */
1479                 offset=count;
1480                 count=mbcsHeader.countStates*1024;
1481                 ds->swapArray32(ds, inBytes+offset, (int32_t)count,
1482                                    outBytes+offset, pErrorCode);
1483 
1484                 /* swap the toUFallbacks[] */
1485                 offset+=count;
1486                 count=mbcsHeader.countToUFallbacks*8;
1487                 ds->swapArray32(ds, inBytes+offset, (int32_t)count,
1488                                    outBytes+offset, pErrorCode);
1489 
1490                 /* swap the unicodeCodeUnits[] */
1491                 offset=mbcsHeader.offsetToUCodeUnits;
1492                 count=mbcsHeader.offsetFromUTable-offset;
1493                 ds->swapArray16(ds, inBytes+offset, (int32_t)count,
1494                                    outBytes+offset, pErrorCode);
1495 
1496                 /* offset to the stage 1 table, independent of the outputType */
1497                 offset=mbcsHeader.offsetFromUTable;
1498 
1499                 if(outputType==MBCS_OUTPUT_1) {
1500                     /* SBCS: swap the fromU tables, all 16 bits wide */
1501                     count=(mbcsHeader.offsetFromUBytes-offset)+mbcsHeader.fromUBytesLength;
1502                     ds->swapArray16(ds, inBytes+offset, (int32_t)count,
1503                                        outBytes+offset, pErrorCode);
1504                 } else {
1505                     /* otherwise: swap the stage tables separately */
1506 
1507                     /* stage 1 table: uint16_t[0x440 or 0x40] */
1508                     if(inStaticData->unicodeMask&UCNV_HAS_SUPPLEMENTARY) {
1509                         count=0x440*2; /* for all of Unicode */
1510                     } else {
1511                         count=0x40*2; /* only BMP */
1512                     }
1513                     ds->swapArray16(ds, inBytes+offset, (int32_t)count,
1514                                        outBytes+offset, pErrorCode);
1515 
1516                     /* stage 2 table: uint32_t[] */
1517                     offset+=count;
1518                     count=mbcsHeader.offsetFromUBytes-offset;
1519                     ds->swapArray32(ds, inBytes+offset, (int32_t)count,
1520                                        outBytes+offset, pErrorCode);
1521 
1522                     /* stage 3/result bytes: sometimes uint16_t[] or uint32_t[] */
1523                     offset=mbcsHeader.offsetFromUBytes;
1524                     count= noFromU ? 0 : mbcsHeader.fromUBytesLength;
1525                     switch(outputType) {
1526                     case MBCS_OUTPUT_2:
1527                     case MBCS_OUTPUT_3_EUC:
1528                     case MBCS_OUTPUT_2_SISO:
1529                         ds->swapArray16(ds, inBytes+offset, (int32_t)count,
1530                                            outBytes+offset, pErrorCode);
1531                         break;
1532                     case MBCS_OUTPUT_4:
1533                         ds->swapArray32(ds, inBytes+offset, (int32_t)count,
1534                                            outBytes+offset, pErrorCode);
1535                         break;
1536                     default:
1537                         /* just uint8_t[], nothing to swap */
1538                         break;
1539                     }
1540 
1541                     if(mbcsIndexLength!=0) {
1542                         offset+=count;
1543                         count=mbcsIndexLength;
1544                         ds->swapArray16(ds, inBytes+offset, (int32_t)count,
1545                                            outBytes+offset, pErrorCode);
1546                     }
1547                 }
1548             }
1549 
1550             if(extOffset!=0) {
1551                 /* swap the extension data */
1552                 inBytes+=extOffset;
1553                 outBytes+=extOffset;
1554 
1555                 /* swap toUTable[] */
1556                 offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_INDEX]);
1557                 length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_LENGTH]);
1558                 ds->swapArray32(ds, inBytes+offset, length*4, outBytes+offset, pErrorCode);
1559 
1560                 /* swap toUUChars[] */
1561                 offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_UCHARS_INDEX]);
1562                 length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_UCHARS_LENGTH]);
1563                 ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode);
1564 
1565                 /* swap fromUTableUChars[] */
1566                 offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_UCHARS_INDEX]);
1567                 length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_LENGTH]);
1568                 ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode);
1569 
1570                 /* swap fromUTableValues[] */
1571                 offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_VALUES_INDEX]);
1572                 /* same length as for fromUTableUChars[] */
1573                 ds->swapArray32(ds, inBytes+offset, length*4, outBytes+offset, pErrorCode);
1574 
1575                 /* no need to swap fromUBytes[] */
1576 
1577                 /* swap fromUStage12[] */
1578                 offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_12_INDEX]);
1579                 length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_12_LENGTH]);
1580                 ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode);
1581 
1582                 /* swap fromUStage3[] */
1583                 offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3_INDEX]);
1584                 length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3_LENGTH]);
1585                 ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode);
1586 
1587                 /* swap fromUStage3b[] */
1588                 offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3B_INDEX]);
1589                 length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3B_LENGTH]);
1590                 ds->swapArray32(ds, inBytes+offset, length*4, outBytes+offset, pErrorCode);
1591 
1592                 /* swap indexes[] */
1593                 length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_INDEXES_LENGTH]);
1594                 ds->swapArray32(ds, inBytes, length*4, outBytes, pErrorCode);
1595             }
1596         }
1597     } else {
1598         udata_printError(ds, "ucnv_swap(): unknown conversionType=%d!=UCNV_MBCS\n",
1599                          inStaticData->conversionType);
1600         *pErrorCode=U_UNSUPPORTED_ERROR;
1601         return 0;
1602     }
1603 
1604     return headerSize+(int32_t)staticDataSize+size;
1605 }
1606 
1607 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */
1608 
1609 #endif
1610