• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 ******************************************************************************
3 *
4 *   Copyright (C) 1999-2007, International Business Machines
5 *   Corporation and others.  All Rights Reserved.
6 *
7 ******************************************************************************
8 *
9 *
10 *  ucnv_io.c:
11 *  initializes global variables and defines functions pertaining to converter
12 *  name resolution aspect of the conversion code.
13 *
14 *   new implementation:
15 *
16 *   created on: 1999nov22
17 *   created by: Markus W. Scherer
18 *
19 *   Use the binary cnvalias.icu (created from convrtrs.txt) to work
20 *   with aliases for converter names.
21 *
22 *   Date        Name        Description
23 *   11/22/1999  markus      Created
24 *   06/28/2002  grhoten     Major overhaul of the converter alias design.
25 *                           Now an alias can map to different converters
26 *                           depending on the specified standard.
27 *******************************************************************************
28 */
29 
30 #include "unicode/utypes.h"
31 
32 #if !UCONFIG_NO_CONVERSION
33 
34 #include "unicode/ucnv.h"
35 #include "unicode/udata.h"
36 
37 #include "umutex.h"
38 #include "uarrsort.h"
39 #include "udataswp.h"
40 #include "cstring.h"
41 #include "cmemory.h"
42 #include "ucnv_io.h"
43 #include "uenumimp.h"
44 #include "ucln_cmn.h"
45 
46 /* Format of cnvalias.icu -----------------------------------------------------
47  *
48  * cnvalias.icu is a binary, memory-mappable form of convrtrs.txt.
49  * This binary form contains several tables. All indexes are to uint16_t
50  * units, and not to the bytes (uint8_t units). Addressing everything on
51  * 16-bit boundaries allows us to store more information with small index
52  * numbers, which are also 16-bit in size. The majority of the table (except
53  * the string table) are 16-bit numbers.
54  *
55  * First there is the size of the Table of Contents (TOC). The TOC
56  * entries contain the size of each section. In order to find the offset
57  * you just need to sum up the previous offsets.
58  * The TOC length and entries are an array of uint32_t values.
59  * The first section after the TOC starts immediately after the TOC.
60  *
61  * 1) This section contains a list of converters. This list contains indexes
62  * into the string table for the converter name. The index of this list is
63  * also used by other sections, which are mentioned later on.
64  * This list is not sorted.
65  *
66  * 2) This section contains a list of tags. This list contains indexes
67  * into the string table for the tag name. The index of this list is
68  * also used by other sections, which are mentioned later on.
69  * This list is in priority order of standards.
70  *
71  * 3) This section contains a list of sorted unique aliases. This
72  * list contains indexes into the string table for the alias name. The
73  * index of this list is also used by other sections, like the 4th section.
74  * The index for the 3rd and 4th section is used to get the
75  * alias -> converter name mapping. Section 3 and 4 form a two column table.
76  * Some of the most significant bits of each index may contain other
77  * information (see findConverter for details).
78  *
79  * 4) This section contains a list of mapped converter names. Consider this
80  * as a table that maps the 3rd section to the 1st section. This list contains
81  * indexes into the 1st section. The index of this list is the same index in
82  * the 3rd section. There is also some extra information in the high bits of
83  * each converter index in this table. Currently it's only used to say that
84  * an alias mapped to this converter is ambiguous. See UCNV_CONVERTER_INDEX_MASK
85  * and UCNV_AMBIGUOUS_ALIAS_MAP_BIT for more information. This section is
86  * the predigested form of the 5th section so that an alias lookup can be fast.
87  *
88  * 5) This section contains a 2D array with indexes to the 6th section. This
89  * section is the full form of all alias mappings. The column index is the
90  * index into the converter list (column header). The row index is the index
91  * to tag list (row header). This 2D array is the top part a 3D array. The
92  * third dimension is in the 6th section.
93  *
94  * 6) This is blob of variable length arrays. Each array starts with a size,
95  * and is followed by indexes to alias names in the string table. This is
96  * the third dimension to the section 5. No other section should be referencing
97  * this section.
98  *
99  * 7) Starting in ICU 3.6, this can be a UConverterAliasOptions struct. Its
100  * presence indicates that a section 9 exists. UConverterAliasOptions specifies
101  * what type of string normalization is used among other potential things in the
102  * future.
103  *
104  * 8) This is the string table. All strings are indexed on an even address.
105  * There are two reasons for this. First many chip architectures locate strings
106  * faster on even address boundaries. Second, since all indexes are 16-bit
107  * numbers, this string table can be 128KB in size instead of 64KB when we
108  * only have strings starting on an even address.
109  *
110  * 9) When present this is a set of prenormalized strings from section 8. This
111  * table contains normalized strings with the dashes and spaces stripped out,
112  * and all strings lowercased. In the future, the options in section 7 may state
113  * other types of normalization.
114  *
115  * Here is the concept of section 5 and 6. It's a 3D cube. Each tag
116  * has a unique alias among all converters. That same alias can
117  * be mentioned in other standards on different converters,
118  * but only one alias per tag can be unique.
119  *
120  *
121  *              Converter Names (Usually in TR22 form)
122  *           -------------------------------------------.
123  *     T    /                                          /|
124  *     a   /                                          / |
125  *     g  /                                          /  |
126  *     s /                                          /   |
127  *      /                                          /    |
128  *      ------------------------------------------/     |
129  *    A |                                         |     |
130  *    l |                                         |     |
131  *    i |                                         |    /
132  *    a |                                         |   /
133  *    s |                                         |  /
134  *    e |                                         | /
135  *    s |                                         |/
136  *      -------------------------------------------
137  *
138  *
139  *
140  * Here is what it really looks like. It's like swiss cheese.
141  * There are holes. Some converters aren't recognized by
142  * a standard, or they are really old converters that the
143  * standard doesn't recognize anymore.
144  *
145  *              Converter Names (Usually in TR22 form)
146  *           -------------------------------------------.
147  *     T    /##########################################/|
148  *     a   /     #            #                       /#
149  *     g  /  #      ##     ##     ### # ### ### ### #/
150  *     s / #             #####  ####        ##  ## #/#
151  *      / ### # # ##  #  #   #          ### # #   #/##
152  *      ------------------------------------------/# #
153  *    A |### # # ##  #  #   #          ### # #   #|# #
154  *    l |# # #    #     #               ## #     #|# #
155  *    i |# # #    #     #                #       #|#
156  *    a |#                                       #|#
157  *    s |                                        #|#
158  *    e
159  *    s
160  *
161  */
162 
163 /**
164  * Used by the UEnumeration API
165  */
166 typedef struct UAliasContext {
167     uint32_t listOffset;
168     uint32_t listIdx;
169 } UAliasContext;
170 
171 static const char DATA_NAME[] = "cnvalias";
172 static const char DATA_TYPE[] = "icu";
173 
174 static UDataMemory *gAliasData=NULL;
175 
176 enum {
177     tocLengthIndex=0,
178     converterListIndex=1,
179     tagListIndex=2,
180     aliasListIndex=3,
181     untaggedConvArrayIndex=4,
182     taggedAliasArrayIndex=5,
183     taggedAliasListsIndex=6,
184     tableOptionsIndex=7,
185     stringTableIndex=8,
186     normalizedStringTableIndex=9,
187     offsetsCount,    /* length of the swapper's temporary offsets[] */
188     minTocLength=8 /* min. tocLength in the file, does not count the tocLengthIndex! */
189 };
190 
191 static const UConverterAliasOptions defaultTableOptions = {
192     UCNV_IO_UNNORMALIZED,
193     0 /* containsCnvOptionInfo */
194 };
195 static UConverterAlias gMainTable;
196 
197 #define GET_STRING(idx) (const char *)(gMainTable.stringTable + (idx))
198 #define GET_NORMALIZED_STRING(idx) (const char *)(gMainTable.normalizedStringTable + (idx))
199 
200 static UBool U_CALLCONV
isAcceptable(void * context,const char * type,const char * name,const UDataInfo * pInfo)201 isAcceptable(void *context,
202              const char *type, const char *name,
203              const UDataInfo *pInfo) {
204     return (UBool)(
205         pInfo->size>=20 &&
206         pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
207         pInfo->charsetFamily==U_CHARSET_FAMILY &&
208         pInfo->dataFormat[0]==0x43 &&   /* dataFormat="CvAl" */
209         pInfo->dataFormat[1]==0x76 &&
210         pInfo->dataFormat[2]==0x41 &&
211         pInfo->dataFormat[3]==0x6c &&
212         pInfo->formatVersion[0]==3);
213 }
214 
ucnv_io_cleanup(void)215 static UBool U_CALLCONV ucnv_io_cleanup(void)
216 {
217     if (gAliasData) {
218         udata_close(gAliasData);
219         gAliasData = NULL;
220     }
221 
222     uprv_memset(&gMainTable, 0, sizeof(gMainTable));
223 
224     return TRUE;                   /* Everything was cleaned up */
225 }
226 
227 static UBool
haveAliasData(UErrorCode * pErrorCode)228 haveAliasData(UErrorCode *pErrorCode) {
229     int needInit;
230 
231     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
232         return FALSE;
233     }
234 
235     UMTX_CHECK(NULL, (gAliasData==NULL), needInit);
236 
237     /* load converter alias data from file if necessary */
238     if (needInit) {
239         UDataMemory *data;
240         const uint16_t *table;
241         const uint32_t *sectionSizes;
242         uint32_t tableStart;
243         uint32_t currOffset;
244 
245         data = udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, pErrorCode);
246         if(U_FAILURE(*pErrorCode)) {
247             return FALSE;
248         }
249 
250         sectionSizes = (const uint32_t *)udata_getMemory(data);
251         table = (const uint16_t *)sectionSizes;
252 
253         tableStart      = sectionSizes[0];
254         if (tableStart < minTocLength) {
255             *pErrorCode = U_INVALID_FORMAT_ERROR;
256             udata_close(data);
257             return FALSE;
258         }
259 
260         umtx_lock(NULL);
261         if(gAliasData==NULL) {
262             gAliasData = data;
263             data=NULL;
264 
265             gMainTable.converterListSize      = sectionSizes[1];
266             gMainTable.tagListSize            = sectionSizes[2];
267             gMainTable.aliasListSize          = sectionSizes[3];
268             gMainTable.untaggedConvArraySize  = sectionSizes[4];
269             gMainTable.taggedAliasArraySize   = sectionSizes[5];
270             gMainTable.taggedAliasListsSize   = sectionSizes[6];
271             gMainTable.optionTableSize        = sectionSizes[7];
272             gMainTable.stringTableSize        = sectionSizes[8];
273 
274             if (tableStart > 8) {
275                 gMainTable.normalizedStringTableSize = sectionSizes[9];
276             }
277 
278             currOffset = tableStart * (sizeof(uint32_t)/sizeof(uint16_t)) + (sizeof(uint32_t)/sizeof(uint16_t));
279             gMainTable.converterList = table + currOffset;
280 
281             currOffset += gMainTable.converterListSize;
282             gMainTable.tagList = table + currOffset;
283 
284             currOffset += gMainTable.tagListSize;
285             gMainTable.aliasList = table + currOffset;
286 
287             currOffset += gMainTable.aliasListSize;
288             gMainTable.untaggedConvArray = table + currOffset;
289 
290             currOffset += gMainTable.untaggedConvArraySize;
291             gMainTable.taggedAliasArray = table + currOffset;
292 
293             /* aliasLists is a 1's based array, but it has a padding character */
294             currOffset += gMainTable.taggedAliasArraySize;
295             gMainTable.taggedAliasLists = table + currOffset;
296 
297             currOffset += gMainTable.taggedAliasListsSize;
298             if (gMainTable.optionTableSize > 0
299                 && ((const UConverterAliasOptions *)(table + currOffset))->stringNormalizationType < UCNV_IO_NORM_TYPE_COUNT)
300             {
301                 /* Faster table */
302                 gMainTable.optionTable = (const UConverterAliasOptions *)(table + currOffset);
303             }
304             else {
305                 /* Smaller table, or I can't handle this normalization mode!
306                 Use the original slower table lookup. */
307                 gMainTable.optionTable = &defaultTableOptions;
308             }
309 
310             currOffset += gMainTable.optionTableSize;
311             gMainTable.stringTable = table + currOffset;
312 
313             currOffset += gMainTable.stringTableSize;
314             gMainTable.normalizedStringTable = ((gMainTable.optionTable->stringNormalizationType == UCNV_IO_UNNORMALIZED)
315                 ? gMainTable.stringTable : (table + currOffset));
316 
317             ucln_common_registerCleanup(UCLN_COMMON_UCNV_IO, ucnv_io_cleanup);
318         }
319         umtx_unlock(NULL);
320 
321         /* if a different thread set it first, then close the extra data */
322         if(data!=NULL) {
323             udata_close(data); /* NULL if it was set correctly */
324         }
325     }
326 
327     return TRUE;
328 }
329 
330 static U_INLINE UBool
isAlias(const char * alias,UErrorCode * pErrorCode)331 isAlias(const char *alias, UErrorCode *pErrorCode) {
332     if(alias==NULL) {
333         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
334         return FALSE;
335     }
336     return (UBool)(*alias!=0);
337 }
338 
getTagNumber(const char * tagname)339 static uint32_t getTagNumber(const char *tagname) {
340     if (gMainTable.tagList) {
341         uint32_t tagNum;
342         for (tagNum = 0; tagNum < gMainTable.tagListSize; tagNum++) {
343             if (!uprv_stricmp(GET_STRING(gMainTable.tagList[tagNum]), tagname)) {
344                 return tagNum;
345             }
346         }
347     }
348 
349     return UINT32_MAX;
350 }
351 
352 /* character types relevant for ucnv_compareNames() */
353 enum {
354     IGNORE,
355     ZERO,
356     NONZERO,
357     MINLETTER /* any values from here on are lowercase letter mappings */
358 };
359 
360 /* character types for ASCII 00..7F */
361 static const uint8_t asciiTypes[128] = {
362     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
363     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
364     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
365     ZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, 0, 0, 0, 0, 0, 0,
366     0, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
367     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0, 0, 0, 0, 0,
368     0, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
369     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0, 0, 0, 0, 0
370 };
371 
372 #define GET_ASCII_TYPE(c) ((int8_t)(c) >= 0 ? asciiTypes[(uint8_t)c] : (uint8_t)IGNORE)
373 
374 /* character types for EBCDIC 80..FF */
375 static const uint8_t ebcdicTypes[128] = {
376     0,    0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0, 0, 0, 0, 0, 0,
377     0,    0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0, 0, 0, 0, 0, 0,
378     0,    0,    0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0, 0, 0, 0, 0, 0,
379     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
380     0,    0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0, 0, 0, 0, 0, 0,
381     0,    0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0, 0, 0, 0, 0, 0,
382     0,    0,    0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0, 0, 0, 0, 0, 0,
383     ZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, 0, 0, 0, 0, 0, 0
384 };
385 
386 #define GET_EBCDIC_TYPE(c) ((int8_t)(c) < 0 ? ebcdicTypes[(c)&0x7f] : (uint8_t)IGNORE)
387 
388 #if U_CHARSET_FAMILY==U_ASCII_FAMILY
389 #   define GET_CHAR_TYPE(c) GET_ASCII_TYPE(c)
390 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
391 #   define GET_CHAR_TYPE(c) GET_EBCDIC_TYPE(c)
392 #else
393 #   error U_CHARSET_FAMILY is not valid
394 #endif
395 
396 /* @see ucnv_compareNames */
397 U_CFUNC char * U_EXPORT2
ucnv_io_stripASCIIForCompare(char * dst,const char * name)398 ucnv_io_stripASCIIForCompare(char *dst, const char *name) {
399     char *dstItr = dst;
400     uint8_t type, nextType;
401     char c1;
402     UBool afterDigit = FALSE;
403 
404     while ((c1 = *name++) != 0) {
405         type = GET_ASCII_TYPE(c1);
406         switch (type) {
407         case IGNORE:
408             afterDigit = FALSE;
409             continue; /* ignore all but letters and digits */
410         case ZERO:
411             if (!afterDigit) {
412                 nextType = GET_ASCII_TYPE(*name);
413                 if (nextType == ZERO || nextType == NONZERO) {
414                     continue; /* ignore leading zero before another digit */
415                 }
416             }
417             break;
418         case NONZERO:
419             afterDigit = TRUE;
420             break;
421         default:
422             c1 = (char)type; /* lowercased letter */
423             afterDigit = FALSE;
424             break;
425         }
426         *dstItr++ = c1;
427     }
428     *dstItr = 0;
429     return dst;
430 }
431 
432 U_CFUNC char * U_EXPORT2
ucnv_io_stripEBCDICForCompare(char * dst,const char * name)433 ucnv_io_stripEBCDICForCompare(char *dst, const char *name) {
434     char *dstItr = dst;
435     uint8_t type, nextType;
436     char c1;
437     UBool afterDigit = FALSE;
438 
439     while ((c1 = *name++) != 0) {
440         type = GET_EBCDIC_TYPE(c1);
441         switch (type) {
442         case IGNORE:
443             afterDigit = FALSE;
444             continue; /* ignore all but letters and digits */
445         case ZERO:
446             if (!afterDigit) {
447                 nextType = GET_EBCDIC_TYPE(*name);
448                 if (nextType == ZERO || nextType == NONZERO) {
449                     continue; /* ignore leading zero before another digit */
450                 }
451             }
452             break;
453         case NONZERO:
454             afterDigit = TRUE;
455             break;
456         default:
457             c1 = (char)type; /* lowercased letter */
458             afterDigit = FALSE;
459             break;
460         }
461         *dstItr++ = c1;
462     }
463     *dstItr = 0;
464     return dst;
465 }
466 
467 /**
468  * Do a fuzzy compare of two converter/alias names.
469  * The comparison is case-insensitive, ignores leading zeroes if they are not
470  * followed by further digits, and ignores all but letters and digits.
471  * Thus the strings "UTF-8", "utf_8", "u*T@f08" and "Utf 8" are exactly equivalent.
472  * See section 1.4, Charset Alias Matching in Unicode Technical Standard #22
473  * at http://www.unicode.org/reports/tr22/
474  *
475  * This is a symmetrical (commutative) operation; order of arguments
476  * is insignificant.  This is an important property for sorting the
477  * list (when the list is preprocessed into binary form) and for
478  * performing binary searches on it at run time.
479  *
480  * @param name1 a converter name or alias, zero-terminated
481  * @param name2 a converter name or alias, zero-terminated
482  * @return 0 if the names match, or a negative value if the name1
483  * lexically precedes name2, or a positive value if the name1
484  * lexically follows name2.
485  *
486  * @see ucnv_io_stripForCompare
487  */
488 U_CAPI int U_EXPORT2
ucnv_compareNames(const char * name1,const char * name2)489 ucnv_compareNames(const char *name1, const char *name2) {
490     int rc;
491     uint8_t type, nextType;
492     char c1, c2;
493     UBool afterDigit1 = FALSE, afterDigit2 = FALSE;
494 
495     for (;;) {
496         while ((c1 = *name1++) != 0) {
497             type = GET_CHAR_TYPE(c1);
498             switch (type) {
499             case IGNORE:
500                 afterDigit1 = FALSE;
501                 continue; /* ignore all but letters and digits */
502             case ZERO:
503                 if (!afterDigit1) {
504                     nextType = GET_CHAR_TYPE(*name1);
505                     if (nextType == ZERO || nextType == NONZERO) {
506                         continue; /* ignore leading zero before another digit */
507                     }
508                 }
509                 break;
510             case NONZERO:
511                 afterDigit1 = TRUE;
512                 break;
513             default:
514                 c1 = (char)type; /* lowercased letter */
515                 afterDigit1 = FALSE;
516                 break;
517             }
518             break; /* deliver c1 */
519         }
520         while ((c2 = *name2++) != 0) {
521             type = GET_CHAR_TYPE(c2);
522             switch (type) {
523             case IGNORE:
524                 afterDigit2 = FALSE;
525                 continue; /* ignore all but letters and digits */
526             case ZERO:
527                 if (!afterDigit2) {
528                     nextType = GET_CHAR_TYPE(*name2);
529                     if (nextType == ZERO || nextType == NONZERO) {
530                         continue; /* ignore leading zero before another digit */
531                     }
532                 }
533                 break;
534             case NONZERO:
535                 afterDigit2 = TRUE;
536                 break;
537             default:
538                 c2 = (char)type; /* lowercased letter */
539                 afterDigit2 = FALSE;
540                 break;
541             }
542             break; /* deliver c2 */
543         }
544 
545         /* If we reach the ends of both strings then they match */
546         if ((c1|c2)==0) {
547             return 0;
548         }
549 
550         /* Case-insensitive comparison */
551         rc = (int)(unsigned char)c1 - (int)(unsigned char)c2;
552         if (rc != 0) {
553             return rc;
554         }
555     }
556 }
557 
558 /*
559  * search for an alias
560  * return the converter number index for gConverterList
561  */
562 static U_INLINE uint32_t
findConverter(const char * alias,UBool * containsOption,UErrorCode * pErrorCode)563 findConverter(const char *alias, UBool *containsOption, UErrorCode *pErrorCode) {
564     uint32_t mid, start, limit;
565     uint32_t lastMid;
566     int result;
567     int isUnnormalized = (gMainTable.optionTable->stringNormalizationType == UCNV_IO_UNNORMALIZED);
568     char strippedName[UCNV_MAX_CONVERTER_NAME_LENGTH];
569 
570     if (!isUnnormalized) {
571         if (uprv_strlen(alias) >= UCNV_MAX_CONVERTER_NAME_LENGTH) {
572             *pErrorCode = U_BUFFER_OVERFLOW_ERROR;
573             return UINT32_MAX;
574         }
575 
576         /* Lower case and remove ignoreable characters. */
577         ucnv_io_stripForCompare(strippedName, alias);
578         alias = strippedName;
579     }
580 
581     /* do a binary search for the alias */
582     start = 0;
583     limit = gMainTable.untaggedConvArraySize;
584     mid = limit;
585     lastMid = UINT32_MAX;
586 
587     for (;;) {
588         mid = (uint32_t)((start + limit) / 2);
589         if (lastMid == mid) {   /* Have we moved? */
590             break;  /* We haven't moved, and it wasn't found. */
591         }
592         lastMid = mid;
593         if (isUnnormalized) {
594             result = ucnv_compareNames(alias, GET_STRING(gMainTable.aliasList[mid]));
595         }
596         else {
597             result = uprv_strcmp(alias, GET_NORMALIZED_STRING(gMainTable.aliasList[mid]));
598         }
599 
600         if (result < 0) {
601             limit = mid;
602         } else if (result > 0) {
603             start = mid;
604         } else {
605             /* Since the gencnval tool folds duplicates into one entry,
606              * this alias in gAliasList is unique, but different standards
607              * may map an alias to different converters.
608              */
609             if (gMainTable.untaggedConvArray[mid] & UCNV_AMBIGUOUS_ALIAS_MAP_BIT) {
610                 *pErrorCode = U_AMBIGUOUS_ALIAS_WARNING;
611             }
612             /* State whether the canonical converter name contains an option.
613             This information is contained in this list in order to maintain backward & forward compatibility. */
614             if (containsOption) {
615                 UBool containsCnvOptionInfo = (UBool)gMainTable.optionTable->containsCnvOptionInfo;
616                 *containsOption = (UBool)((containsCnvOptionInfo
617                     && ((gMainTable.untaggedConvArray[mid] & UCNV_CONTAINS_OPTION_BIT) != 0))
618                     || !containsCnvOptionInfo);
619             }
620             return gMainTable.untaggedConvArray[mid] & UCNV_CONVERTER_INDEX_MASK;
621         }
622     }
623 
624     return UINT32_MAX;
625 }
626 
627 /*
628  * Is this alias in this list?
629  * alias and listOffset should be non-NULL.
630  */
631 static U_INLINE UBool
isAliasInList(const char * alias,uint32_t listOffset)632 isAliasInList(const char *alias, uint32_t listOffset) {
633     if (listOffset) {
634         uint32_t currAlias;
635         uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
636         /* +1 to skip listCount */
637         const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
638         for (currAlias = 0; currAlias < listCount; currAlias++) {
639             if (currList[currAlias]
640                 && ucnv_compareNames(alias, GET_STRING(currList[currAlias]))==0)
641             {
642                 return TRUE;
643             }
644         }
645     }
646     return FALSE;
647 }
648 
649 /*
650  * Search for an standard name of an alias (what is the default name
651  * that this standard uses?)
652  * return the listOffset for gTaggedAliasLists. If it's 0,
653  * the it couldn't be found, but the parameters are valid.
654  */
655 static uint32_t
findTaggedAliasListsOffset(const char * alias,const char * standard,UErrorCode * pErrorCode)656 findTaggedAliasListsOffset(const char *alias, const char *standard, UErrorCode *pErrorCode) {
657     uint32_t idx;
658     uint32_t listOffset;
659     uint32_t convNum;
660     UErrorCode myErr = U_ZERO_ERROR;
661     uint32_t tagNum = getTagNumber(standard);
662 
663     /* Make a quick guess. Hopefully they used a TR22 canonical alias. */
664     convNum = findConverter(alias, NULL, &myErr);
665     if (myErr != U_ZERO_ERROR) {
666         *pErrorCode = myErr;
667     }
668 
669     if (tagNum < (gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) && convNum < gMainTable.converterListSize) {
670         listOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + convNum];
671         if (listOffset && gMainTable.taggedAliasLists[listOffset + 1]) {
672             return listOffset;
673         }
674         if (myErr == U_AMBIGUOUS_ALIAS_WARNING) {
675             /* Uh Oh! They used an ambiguous alias.
676                We have to search the whole swiss cheese starting
677                at the highest standard affinity.
678                This may take a while.
679             */
680             for (idx = 0; idx < gMainTable.taggedAliasArraySize; idx++) {
681                 listOffset = gMainTable.taggedAliasArray[idx];
682                 if (listOffset && isAliasInList(alias, listOffset)) {
683                     uint32_t currTagNum = idx/gMainTable.converterListSize;
684                     uint32_t currConvNum = (idx - currTagNum*gMainTable.converterListSize);
685                     uint32_t tempListOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + currConvNum];
686                     if (tempListOffset && gMainTable.taggedAliasLists[tempListOffset + 1]) {
687                         return tempListOffset;
688                     }
689                     /* else keep on looking */
690                     /* We could speed this up by starting on the next row
691                        because an alias is unique per row, right now.
692                        This would change if alias versioning appears. */
693                 }
694             }
695             /* The standard doesn't know about the alias */
696         }
697         /* else no default name */
698         return 0;
699     }
700     /* else converter or tag not found */
701 
702     return UINT32_MAX;
703 }
704 
705 /* Return the canonical name */
706 static uint32_t
findTaggedConverterNum(const char * alias,const char * standard,UErrorCode * pErrorCode)707 findTaggedConverterNum(const char *alias, const char *standard, UErrorCode *pErrorCode) {
708     uint32_t idx;
709     uint32_t listOffset;
710     uint32_t convNum;
711     UErrorCode myErr = U_ZERO_ERROR;
712     uint32_t tagNum = getTagNumber(standard);
713 
714     /* Make a quick guess. Hopefully they used a TR22 canonical alias. */
715     convNum = findConverter(alias, NULL, &myErr);
716     if (myErr != U_ZERO_ERROR) {
717         *pErrorCode = myErr;
718     }
719 
720     if (tagNum < (gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) && convNum < gMainTable.converterListSize) {
721         listOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + convNum];
722         if (listOffset && isAliasInList(alias, listOffset)) {
723             return convNum;
724         }
725         if (myErr == U_AMBIGUOUS_ALIAS_WARNING) {
726             /* Uh Oh! They used an ambiguous alias.
727                We have to search one slice of the swiss cheese.
728                We search only in the requested tag, not the whole thing.
729                This may take a while.
730             */
731             uint32_t convStart = (tagNum)*gMainTable.converterListSize;
732             uint32_t convLimit = (tagNum+1)*gMainTable.converterListSize;
733             for (idx = convStart; idx < convLimit; idx++) {
734                 listOffset = gMainTable.taggedAliasArray[idx];
735                 if (listOffset && isAliasInList(alias, listOffset)) {
736                     return idx-convStart;
737                 }
738             }
739             /* The standard doesn't know about the alias */
740         }
741         /* else no canonical name */
742     }
743     /* else converter or tag not found */
744 
745     return UINT32_MAX;
746 }
747 
748 
749 
750 U_CFUNC const char *
ucnv_io_getConverterName(const char * alias,UBool * containsOption,UErrorCode * pErrorCode)751 ucnv_io_getConverterName(const char *alias, UBool *containsOption, UErrorCode *pErrorCode) {
752     if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
753         uint32_t convNum = findConverter(alias, containsOption, pErrorCode);
754         if (convNum < gMainTable.converterListSize) {
755             return GET_STRING(gMainTable.converterList[convNum]);
756         }
757         /* else converter not found */
758     }
759     return NULL;
760 }
761 
762 static int32_t U_CALLCONV
ucnv_io_countStandardAliases(UEnumeration * enumerator,UErrorCode * pErrorCode)763 ucnv_io_countStandardAliases(UEnumeration *enumerator, UErrorCode *pErrorCode) {
764     int32_t value = 0;
765     UAliasContext *myContext = (UAliasContext *)(enumerator->context);
766     uint32_t listOffset = myContext->listOffset;
767 
768     if (listOffset) {
769         value = gMainTable.taggedAliasLists[listOffset];
770     }
771     return value;
772 }
773 
774 static const char* U_CALLCONV
ucnv_io_nextStandardAliases(UEnumeration * enumerator,int32_t * resultLength,UErrorCode * pErrorCode)775 ucnv_io_nextStandardAliases(UEnumeration *enumerator,
776                             int32_t* resultLength,
777                             UErrorCode *pErrorCode)
778 {
779     UAliasContext *myContext = (UAliasContext *)(enumerator->context);
780     uint32_t listOffset = myContext->listOffset;
781 
782     if (listOffset) {
783         uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
784         const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
785 
786         if (myContext->listIdx < listCount) {
787             const char *myStr = GET_STRING(currList[myContext->listIdx++]);
788             if (resultLength) {
789                 *resultLength = (int32_t)uprv_strlen(myStr);
790             }
791             return myStr;
792         }
793     }
794     /* Either we accessed a zero length list, or we enumerated too far. */
795     if (resultLength) {
796         *resultLength = 0;
797     }
798     return NULL;
799 }
800 
801 static void U_CALLCONV
ucnv_io_resetStandardAliases(UEnumeration * enumerator,UErrorCode * pErrorCode)802 ucnv_io_resetStandardAliases(UEnumeration *enumerator, UErrorCode *pErrorCode) {
803     ((UAliasContext *)(enumerator->context))->listIdx = 0;
804 }
805 
806 static void U_CALLCONV
ucnv_io_closeUEnumeration(UEnumeration * enumerator)807 ucnv_io_closeUEnumeration(UEnumeration *enumerator) {
808     uprv_free(enumerator->context);
809     uprv_free(enumerator);
810 }
811 
812 /* Enumerate the aliases for the specified converter and standard tag */
813 static const UEnumeration gEnumAliases = {
814     NULL,
815     NULL,
816     ucnv_io_closeUEnumeration,
817     ucnv_io_countStandardAliases,
818     uenum_unextDefault,
819     ucnv_io_nextStandardAliases,
820     ucnv_io_resetStandardAliases
821 };
822 
823 U_CAPI UEnumeration * U_EXPORT2
ucnv_openStandardNames(const char * convName,const char * standard,UErrorCode * pErrorCode)824 ucnv_openStandardNames(const char *convName,
825                        const char *standard,
826                        UErrorCode *pErrorCode)
827 {
828     UEnumeration *myEnum = NULL;
829     if (haveAliasData(pErrorCode) && isAlias(convName, pErrorCode)) {
830         uint32_t listOffset = findTaggedAliasListsOffset(convName, standard, pErrorCode);
831 
832         /* When listOffset == 0, we want to acknowledge that the
833            converter name and standard are okay, but there
834            is nothing to enumerate. */
835         if (listOffset < gMainTable.taggedAliasListsSize) {
836             UAliasContext *myContext;
837 
838             myEnum = uprv_malloc(sizeof(UEnumeration));
839             if (myEnum == NULL) {
840                 *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
841                 return NULL;
842             }
843             uprv_memcpy(myEnum, &gEnumAliases, sizeof(UEnumeration));
844             myContext = uprv_malloc(sizeof(UAliasContext));
845             if (myContext == NULL) {
846                 *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
847                 uprv_free(myEnum);
848                 return NULL;
849             }
850             myContext->listOffset = listOffset;
851             myContext->listIdx = 0;
852             myEnum->context = myContext;
853         }
854         /* else converter or tag not found */
855     }
856     return myEnum;
857 }
858 
859 static uint16_t
ucnv_io_countAliases(const char * alias,UErrorCode * pErrorCode)860 ucnv_io_countAliases(const char *alias, UErrorCode *pErrorCode) {
861     if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
862         uint32_t convNum = findConverter(alias, NULL, pErrorCode);
863         if (convNum < gMainTable.converterListSize) {
864             /* tagListNum - 1 is the ALL tag */
865             int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum];
866 
867             if (listOffset) {
868                 return gMainTable.taggedAliasLists[listOffset];
869             }
870             /* else this shouldn't happen. internal program error */
871         }
872         /* else converter not found */
873     }
874     return 0;
875 }
876 
877 static uint16_t
ucnv_io_getAliases(const char * alias,uint16_t start,const char ** aliases,UErrorCode * pErrorCode)878 ucnv_io_getAliases(const char *alias, uint16_t start, const char **aliases, UErrorCode *pErrorCode) {
879     if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
880         uint32_t currAlias;
881         uint32_t convNum = findConverter(alias, NULL, pErrorCode);
882         if (convNum < gMainTable.converterListSize) {
883             /* tagListNum - 1 is the ALL tag */
884             int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum];
885 
886             if (listOffset) {
887                 uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
888                 /* +1 to skip listCount */
889                 const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
890 
891                 for (currAlias = start; currAlias < listCount; currAlias++) {
892                     aliases[currAlias] = GET_STRING(currList[currAlias]);
893                 }
894             }
895             /* else this shouldn't happen. internal program error */
896         }
897         /* else converter not found */
898     }
899     return 0;
900 }
901 
902 static const char *
ucnv_io_getAlias(const char * alias,uint16_t n,UErrorCode * pErrorCode)903 ucnv_io_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode) {
904     if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
905         uint32_t convNum = findConverter(alias, NULL, pErrorCode);
906         if (convNum < gMainTable.converterListSize) {
907             /* tagListNum - 1 is the ALL tag */
908             int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum];
909 
910             if (listOffset) {
911                 uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
912                 /* +1 to skip listCount */
913                 const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
914 
915                 if (n < listCount)  {
916                     return GET_STRING(currList[n]);
917                 }
918                 *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
919             }
920             /* else this shouldn't happen. internal program error */
921         }
922         /* else converter not found */
923     }
924     return NULL;
925 }
926 
927 static uint16_t
ucnv_io_countStandards(UErrorCode * pErrorCode)928 ucnv_io_countStandards(UErrorCode *pErrorCode) {
929     if (haveAliasData(pErrorCode)) {
930         /* Don't include the empty list */
931         return (uint16_t)(gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS);
932     }
933 
934     return 0;
935 }
936 
937 U_CAPI const char * U_EXPORT2
ucnv_getStandard(uint16_t n,UErrorCode * pErrorCode)938 ucnv_getStandard(uint16_t n, UErrorCode *pErrorCode) {
939     if (haveAliasData(pErrorCode)) {
940         if (n < gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) {
941             return GET_STRING(gMainTable.tagList[n]);
942         }
943         *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
944     }
945 
946     return NULL;
947 }
948 
949 U_CAPI const char * U_EXPORT2
ucnv_getStandardName(const char * alias,const char * standard,UErrorCode * pErrorCode)950 ucnv_getStandardName(const char *alias, const char *standard, UErrorCode *pErrorCode) {
951     if (haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
952         uint32_t listOffset = findTaggedAliasListsOffset(alias, standard, pErrorCode);
953 
954         if (0 < listOffset && listOffset < gMainTable.taggedAliasListsSize) {
955             const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
956 
957             /* Get the preferred name from this list */
958             if (currList[0]) {
959                 return GET_STRING(currList[0]);
960             }
961             /* else someone screwed up the alias table. */
962             /* *pErrorCode = U_INVALID_FORMAT_ERROR */
963         }
964     }
965 
966     return NULL;
967 }
968 
969 U_CAPI uint16_t U_EXPORT2
ucnv_countAliases(const char * alias,UErrorCode * pErrorCode)970 ucnv_countAliases(const char *alias, UErrorCode *pErrorCode)
971 {
972     return ucnv_io_countAliases(alias, pErrorCode);
973 }
974 
975 
976 U_CAPI const char* U_EXPORT2
ucnv_getAlias(const char * alias,uint16_t n,UErrorCode * pErrorCode)977 ucnv_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode)
978 {
979     return ucnv_io_getAlias(alias, n, pErrorCode);
980 }
981 
982 U_CAPI void U_EXPORT2
ucnv_getAliases(const char * alias,const char ** aliases,UErrorCode * pErrorCode)983 ucnv_getAliases(const char *alias, const char **aliases, UErrorCode *pErrorCode)
984 {
985     ucnv_io_getAliases(alias, 0, aliases, pErrorCode);
986 }
987 
988 U_CAPI uint16_t U_EXPORT2
ucnv_countStandards(void)989 ucnv_countStandards(void)
990 {
991     UErrorCode err = U_ZERO_ERROR;
992     return ucnv_io_countStandards(&err);
993 }
994 
995 U_CAPI const char * U_EXPORT2
ucnv_getCanonicalName(const char * alias,const char * standard,UErrorCode * pErrorCode)996 ucnv_getCanonicalName(const char *alias, const char *standard, UErrorCode *pErrorCode) {
997     if (haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
998         uint32_t convNum = findTaggedConverterNum(alias, standard, pErrorCode);
999 
1000         if (convNum < gMainTable.converterListSize) {
1001             return GET_STRING(gMainTable.converterList[convNum]);
1002         }
1003     }
1004 
1005     return NULL;
1006 }
1007 
1008 static int32_t U_CALLCONV
ucnv_io_countAllConverters(UEnumeration * enumerator,UErrorCode * pErrorCode)1009 ucnv_io_countAllConverters(UEnumeration *enumerator, UErrorCode *pErrorCode) {
1010     return gMainTable.converterListSize;
1011 }
1012 
1013 static const char* U_CALLCONV
ucnv_io_nextAllConverters(UEnumeration * enumerator,int32_t * resultLength,UErrorCode * pErrorCode)1014 ucnv_io_nextAllConverters(UEnumeration *enumerator,
1015                             int32_t* resultLength,
1016                             UErrorCode *pErrorCode)
1017 {
1018     uint16_t *myContext = (uint16_t *)(enumerator->context);
1019 
1020     if (*myContext < gMainTable.converterListSize) {
1021         const char *myStr = GET_STRING(gMainTable.converterList[(*myContext)++]);
1022         if (resultLength) {
1023             *resultLength = (int32_t)uprv_strlen(myStr);
1024         }
1025         return myStr;
1026     }
1027     /* Either we accessed a zero length list, or we enumerated too far. */
1028     if (resultLength) {
1029         *resultLength = 0;
1030     }
1031     return NULL;
1032 }
1033 
1034 static void U_CALLCONV
ucnv_io_resetAllConverters(UEnumeration * enumerator,UErrorCode * pErrorCode)1035 ucnv_io_resetAllConverters(UEnumeration *enumerator, UErrorCode *pErrorCode) {
1036     *((uint16_t *)(enumerator->context)) = 0;
1037 }
1038 
1039 static const UEnumeration gEnumAllConverters = {
1040     NULL,
1041     NULL,
1042     ucnv_io_closeUEnumeration,
1043     ucnv_io_countAllConverters,
1044     uenum_unextDefault,
1045     ucnv_io_nextAllConverters,
1046     ucnv_io_resetAllConverters
1047 };
1048 
1049 U_CAPI UEnumeration * U_EXPORT2
ucnv_openAllNames(UErrorCode * pErrorCode)1050 ucnv_openAllNames(UErrorCode *pErrorCode) {
1051     UEnumeration *myEnum = NULL;
1052     if (haveAliasData(pErrorCode)) {
1053         uint16_t *myContext;
1054 
1055         myEnum = uprv_malloc(sizeof(UEnumeration));
1056         if (myEnum == NULL) {
1057             *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
1058             return NULL;
1059         }
1060         uprv_memcpy(myEnum, &gEnumAllConverters, sizeof(UEnumeration));
1061         myContext = uprv_malloc(sizeof(uint16_t));
1062         if (myContext == NULL) {
1063             *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
1064             uprv_free(myEnum);
1065             return NULL;
1066         }
1067         *myContext = 0;
1068         myEnum->context = myContext;
1069     }
1070     return myEnum;
1071 }
1072 
1073 U_CFUNC uint16_t
ucnv_io_countKnownConverters(UErrorCode * pErrorCode)1074 ucnv_io_countKnownConverters(UErrorCode *pErrorCode) {
1075     if (haveAliasData(pErrorCode)) {
1076         return (uint16_t)gMainTable.converterListSize;
1077     }
1078     return 0;
1079 }
1080 
1081 /* alias table swapping ----------------------------------------------------- */
1082 
1083 typedef char * U_CALLCONV StripForCompareFn(char *dst, const char *name);
1084 
1085 /*
1086  * row of a temporary array
1087  *
1088  * gets platform-endian charset string indexes and sorting indexes;
1089  * after sorting this array by strings, the actual arrays are permutated
1090  * according to the sorting indexes
1091  */
1092 typedef struct TempRow {
1093     uint16_t strIndex, sortIndex;
1094 } TempRow;
1095 
1096 typedef struct TempAliasTable {
1097     const char *chars;
1098     TempRow *rows;
1099     uint16_t *resort;
1100     StripForCompareFn *stripForCompare;
1101 } TempAliasTable;
1102 
1103 enum {
1104     STACK_ROW_CAPACITY=500
1105 };
1106 
1107 static int32_t
io_compareRows(const void * context,const void * left,const void * right)1108 io_compareRows(const void *context, const void *left, const void *right) {
1109     char strippedLeft[UCNV_MAX_CONVERTER_NAME_LENGTH],
1110          strippedRight[UCNV_MAX_CONVERTER_NAME_LENGTH];
1111 
1112     TempAliasTable *tempTable=(TempAliasTable *)context;
1113     const char *chars=tempTable->chars;
1114 
1115     return (int32_t)uprv_strcmp(tempTable->stripForCompare(strippedLeft, chars+2*((const TempRow *)left)->strIndex),
1116                                 tempTable->stripForCompare(strippedRight, chars+2*((const TempRow *)right)->strIndex));
1117 }
1118 
1119 U_CAPI int32_t U_EXPORT2
ucnv_swapAliases(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)1120 ucnv_swapAliases(const UDataSwapper *ds,
1121                  const void *inData, int32_t length, void *outData,
1122                  UErrorCode *pErrorCode) {
1123     const UDataInfo *pInfo;
1124     int32_t headerSize;
1125 
1126     const uint16_t *inTable;
1127     const uint32_t *inSectionSizes;
1128     uint32_t toc[offsetsCount];
1129     uint32_t offsets[offsetsCount]; /* 16-bit-addressed offsets from inTable/outTable */
1130     uint32_t i, count, tocLength, topOffset;
1131 
1132     TempRow rows[STACK_ROW_CAPACITY];
1133     uint16_t resort[STACK_ROW_CAPACITY];
1134     TempAliasTable tempTable;
1135 
1136     /* udata_swapDataHeader checks the arguments */
1137     headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
1138     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1139         return 0;
1140     }
1141 
1142     /* check data format and format version */
1143     pInfo=(const UDataInfo *)((const char *)inData+4);
1144     if(!(
1145         pInfo->dataFormat[0]==0x43 &&   /* dataFormat="CvAl" */
1146         pInfo->dataFormat[1]==0x76 &&
1147         pInfo->dataFormat[2]==0x41 &&
1148         pInfo->dataFormat[3]==0x6c &&
1149         pInfo->formatVersion[0]==3
1150     )) {
1151         udata_printError(ds, "ucnv_swapAliases(): data format %02x.%02x.%02x.%02x (format version %02x) is not an alias table\n",
1152                          pInfo->dataFormat[0], pInfo->dataFormat[1],
1153                          pInfo->dataFormat[2], pInfo->dataFormat[3],
1154                          pInfo->formatVersion[0]);
1155         *pErrorCode=U_UNSUPPORTED_ERROR;
1156         return 0;
1157     }
1158 
1159     /* an alias table must contain at least the table of contents array */
1160     if(length>=0 && (length-headerSize)<4*(1+minTocLength)) {
1161         udata_printError(ds, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n",
1162                          length-headerSize);
1163         *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
1164         return 0;
1165     }
1166 
1167     inSectionSizes=(const uint32_t *)((const char *)inData+headerSize);
1168     inTable=(const uint16_t *)inSectionSizes;
1169     uprv_memset(toc, 0, sizeof(toc));
1170     toc[tocLengthIndex]=tocLength=ds->readUInt32(inSectionSizes[tocLengthIndex]);
1171     if(tocLength<minTocLength || offsetsCount<=tocLength) {
1172         udata_printError(ds, "ucnv_swapAliases(): table of contents contains unsupported number of sections (%u sections)\n", tocLength);
1173         *pErrorCode=U_INVALID_FORMAT_ERROR;
1174         return 0;
1175     }
1176 
1177     /* read the known part of the table of contents */
1178     for(i=converterListIndex; i<=tocLength; ++i) {
1179         toc[i]=ds->readUInt32(inSectionSizes[i]);
1180     }
1181 
1182     /* compute offsets */
1183     uprv_memset(offsets, 0, sizeof(offsets));
1184     offsets[converterListIndex]=2*(1+tocLength); /* count two 16-bit units per toc entry */
1185     for(i=tagListIndex; i<=tocLength; ++i) {
1186         offsets[i]=offsets[i-1]+toc[i-1];
1187     }
1188 
1189     /* compute the overall size of the after-header data, in numbers of 16-bit units */
1190     topOffset=offsets[i-1]+toc[i-1];
1191 
1192     if(length>=0) {
1193         uint16_t *outTable;
1194         const uint16_t *p, *p2;
1195         uint16_t *q, *q2;
1196         uint16_t oldIndex;
1197 
1198         if((length-headerSize)<(2*(int32_t)topOffset)) {
1199             udata_printError(ds, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n",
1200                              length-headerSize);
1201             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
1202             return 0;
1203         }
1204 
1205         outTable=(uint16_t *)((char *)outData+headerSize);
1206 
1207         /* swap the entire table of contents */
1208         ds->swapArray32(ds, inTable, 4*(1+tocLength), outTable, pErrorCode);
1209 
1210         /* swap unormalized strings & normalized strings */
1211         ds->swapInvChars(ds, inTable+offsets[stringTableIndex], 2*(int32_t)(toc[stringTableIndex]+toc[normalizedStringTableIndex]),
1212                              outTable+offsets[stringTableIndex], pErrorCode);
1213         if(U_FAILURE(*pErrorCode)) {
1214             udata_printError(ds, "ucnv_swapAliases().swapInvChars(charset names) failed\n");
1215             return 0;
1216         }
1217 
1218         if(ds->inCharset==ds->outCharset) {
1219             /* no need to sort, just swap all 16-bit values together */
1220             ds->swapArray16(ds,
1221                             inTable+offsets[converterListIndex],
1222                             2*(int32_t)(offsets[stringTableIndex]-offsets[converterListIndex]),
1223                             outTable+offsets[converterListIndex],
1224                             pErrorCode);
1225         } else {
1226             /* allocate the temporary table for sorting */
1227             count=toc[aliasListIndex];
1228 
1229             tempTable.chars=(const char *)(outTable+offsets[stringTableIndex]); /* sort by outCharset */
1230 
1231             if(count<=STACK_ROW_CAPACITY) {
1232                 tempTable.rows=rows;
1233                 tempTable.resort=resort;
1234             } else {
1235                 tempTable.rows=(TempRow *)uprv_malloc(count*sizeof(TempRow)+count*2);
1236                 if(tempTable.rows==NULL) {
1237                     udata_printError(ds, "ucnv_swapAliases(): unable to allocate memory for sorting tables (max length: %u)\n",
1238                                      count);
1239                     *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
1240                     return 0;
1241                 }
1242                 tempTable.resort=(uint16_t *)(tempTable.rows+count);
1243             }
1244 
1245             if(ds->outCharset==U_ASCII_FAMILY) {
1246                 tempTable.stripForCompare=ucnv_io_stripASCIIForCompare;
1247             } else /* U_EBCDIC_FAMILY */ {
1248                 tempTable.stripForCompare=ucnv_io_stripEBCDICForCompare;
1249             }
1250 
1251             /*
1252              * Sort unique aliases+mapped names.
1253              *
1254              * We need to sort the list again by outCharset strings because they
1255              * sort differently for different charset families.
1256              * First we set up a temporary table with the string indexes and
1257              * sorting indexes and sort that.
1258              * Then we permutate and copy/swap the actual values.
1259              */
1260             p=inTable+offsets[aliasListIndex];
1261             q=outTable+offsets[aliasListIndex];
1262 
1263             p2=inTable+offsets[untaggedConvArrayIndex];
1264             q2=outTable+offsets[untaggedConvArrayIndex];
1265 
1266             for(i=0; i<count; ++i) {
1267                 tempTable.rows[i].strIndex=ds->readUInt16(p[i]);
1268                 tempTable.rows[i].sortIndex=(uint16_t)i;
1269             }
1270 
1271             uprv_sortArray(tempTable.rows, (int32_t)count, sizeof(TempRow),
1272                            io_compareRows, &tempTable,
1273                            FALSE, pErrorCode);
1274 
1275             if(U_SUCCESS(*pErrorCode)) {
1276                 /* copy/swap/permutate items */
1277                 if(p!=q) {
1278                     for(i=0; i<count; ++i) {
1279                         oldIndex=tempTable.rows[i].sortIndex;
1280                         ds->swapArray16(ds, p+oldIndex, 2, q+i, pErrorCode);
1281                         ds->swapArray16(ds, p2+oldIndex, 2, q2+i, pErrorCode);
1282                     }
1283                 } else {
1284                     /*
1285                      * If we swap in-place, then the permutation must use another
1286                      * temporary array (tempTable.resort)
1287                      * before the results are copied to the outBundle.
1288                      */
1289                     uint16_t *r=tempTable.resort;
1290 
1291                     for(i=0; i<count; ++i) {
1292                         oldIndex=tempTable.rows[i].sortIndex;
1293                         ds->swapArray16(ds, p+oldIndex, 2, r+i, pErrorCode);
1294                     }
1295                     uprv_memcpy(q, r, 2*count);
1296 
1297                     for(i=0; i<count; ++i) {
1298                         oldIndex=tempTable.rows[i].sortIndex;
1299                         ds->swapArray16(ds, p2+oldIndex, 2, r+i, pErrorCode);
1300                     }
1301                     uprv_memcpy(q2, r, 2*count);
1302                 }
1303             }
1304 
1305             if(tempTable.rows!=rows) {
1306                 uprv_free(tempTable.rows);
1307             }
1308 
1309             if(U_FAILURE(*pErrorCode)) {
1310                 udata_printError(ds, "ucnv_swapAliases().uprv_sortArray(%u items) failed\n",
1311                                  count);
1312                 return 0;
1313             }
1314 
1315             /* swap remaining 16-bit values */
1316             ds->swapArray16(ds,
1317                             inTable+offsets[converterListIndex],
1318                             2*(int32_t)(offsets[aliasListIndex]-offsets[converterListIndex]),
1319                             outTable+offsets[converterListIndex],
1320                             pErrorCode);
1321             ds->swapArray16(ds,
1322                             inTable+offsets[taggedAliasArrayIndex],
1323                             2*(int32_t)(offsets[stringTableIndex]-offsets[taggedAliasArrayIndex]),
1324                             outTable+offsets[taggedAliasArrayIndex],
1325                             pErrorCode);
1326         }
1327     }
1328 
1329     return headerSize+2*(int32_t)topOffset;
1330 }
1331 
1332 #endif
1333 
1334 /*
1335  * Hey, Emacs, please set the following:
1336  *
1337  * Local Variables:
1338  * indent-tabs-mode: nil
1339  * End:
1340  *
1341  */
1342