• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 ******************************************************************************
3 *
4 *   Copyright (C) 1999-2012, International Business Machines
5 *   Corporation and others.  All Rights Reserved.
6 *
7 ******************************************************************************
8 *
9 *
10 *  ucnv_io.cpp:
11 *  initializes global variables and defines functions pertaining to converter
12 *  name resolution aspect of the conversion code.
13 *
14 *   new implementation:
15 *
16 *   created on: 1999nov22
17 *   created by: Markus W. Scherer
18 *
19 *   Use the binary cnvalias.icu (created from convrtrs.txt) to work
20 *   with aliases for converter names.
21 *
22 *   Date        Name        Description
23 *   11/22/1999  markus      Created
24 *   06/28/2002  grhoten     Major overhaul of the converter alias design.
25 *                           Now an alias can map to different converters
26 *                           depending on the specified standard.
27 *******************************************************************************
28 */
29 
30 #include "unicode/utypes.h"
31 
32 #if !UCONFIG_NO_CONVERSION
33 
34 #include "unicode/ucnv.h"
35 #include "unicode/udata.h"
36 
37 #include "umutex.h"
38 #include "uarrsort.h"
39 #include "udataswp.h"
40 #include "cstring.h"
41 #include "cmemory.h"
42 #include "ucnv_io.h"
43 #include "uenumimp.h"
44 #include "ucln_cmn.h"
45 
46 /* Format of cnvalias.icu -----------------------------------------------------
47  *
48  * cnvalias.icu is a binary, memory-mappable form of convrtrs.txt.
49  * This binary form contains several tables. All indexes are to uint16_t
50  * units, and not to the bytes (uint8_t units). Addressing everything on
51  * 16-bit boundaries allows us to store more information with small index
52  * numbers, which are also 16-bit in size. The majority of the table (except
53  * the string table) are 16-bit numbers.
54  *
55  * First there is the size of the Table of Contents (TOC). The TOC
56  * entries contain the size of each section. In order to find the offset
57  * you just need to sum up the previous offsets.
58  * The TOC length and entries are an array of uint32_t values.
59  * The first section after the TOC starts immediately after the TOC.
60  *
61  * 1) This section contains a list of converters. This list contains indexes
62  * into the string table for the converter name. The index of this list is
63  * also used by other sections, which are mentioned later on.
64  * This list is not sorted.
65  *
66  * 2) This section contains a list of tags. This list contains indexes
67  * into the string table for the tag name. The index of this list is
68  * also used by other sections, which are mentioned later on.
69  * This list is in priority order of standards.
70  *
71  * 3) This section contains a list of sorted unique aliases. This
72  * list contains indexes into the string table for the alias name. The
73  * index of this list is also used by other sections, like the 4th section.
74  * The index for the 3rd and 4th section is used to get the
75  * alias -> converter name mapping. Section 3 and 4 form a two column table.
76  * Some of the most significant bits of each index may contain other
77  * information (see findConverter for details).
78  *
79  * 4) This section contains a list of mapped converter names. Consider this
80  * as a table that maps the 3rd section to the 1st section. This list contains
81  * indexes into the 1st section. The index of this list is the same index in
82  * the 3rd section. There is also some extra information in the high bits of
83  * each converter index in this table. Currently it's only used to say that
84  * an alias mapped to this converter is ambiguous. See UCNV_CONVERTER_INDEX_MASK
85  * and UCNV_AMBIGUOUS_ALIAS_MAP_BIT for more information. This section is
86  * the predigested form of the 5th section so that an alias lookup can be fast.
87  *
88  * 5) This section contains a 2D array with indexes to the 6th section. This
89  * section is the full form of all alias mappings. The column index is the
90  * index into the converter list (column header). The row index is the index
91  * to tag list (row header). This 2D array is the top part a 3D array. The
92  * third dimension is in the 6th section.
93  *
94  * 6) This is blob of variable length arrays. Each array starts with a size,
95  * and is followed by indexes to alias names in the string table. This is
96  * the third dimension to the section 5. No other section should be referencing
97  * this section.
98  *
99  * 7) Starting in ICU 3.6, this can be a UConverterAliasOptions struct. Its
100  * presence indicates that a section 9 exists. UConverterAliasOptions specifies
101  * what type of string normalization is used among other potential things in the
102  * future.
103  *
104  * 8) This is the string table. All strings are indexed on an even address.
105  * There are two reasons for this. First many chip architectures locate strings
106  * faster on even address boundaries. Second, since all indexes are 16-bit
107  * numbers, this string table can be 128KB in size instead of 64KB when we
108  * only have strings starting on an even address.
109  *
110  * 9) When present this is a set of prenormalized strings from section 8. This
111  * table contains normalized strings with the dashes and spaces stripped out,
112  * and all strings lowercased. In the future, the options in section 7 may state
113  * other types of normalization.
114  *
115  * Here is the concept of section 5 and 6. It's a 3D cube. Each tag
116  * has a unique alias among all converters. That same alias can
117  * be mentioned in other standards on different converters,
118  * but only one alias per tag can be unique.
119  *
120  *
121  *              Converter Names (Usually in TR22 form)
122  *           -------------------------------------------.
123  *     T    /                                          /|
124  *     a   /                                          / |
125  *     g  /                                          /  |
126  *     s /                                          /   |
127  *      /                                          /    |
128  *      ------------------------------------------/     |
129  *    A |                                         |     |
130  *    l |                                         |     |
131  *    i |                                         |    /
132  *    a |                                         |   /
133  *    s |                                         |  /
134  *    e |                                         | /
135  *    s |                                         |/
136  *      -------------------------------------------
137  *
138  *
139  *
140  * Here is what it really looks like. It's like swiss cheese.
141  * There are holes. Some converters aren't recognized by
142  * a standard, or they are really old converters that the
143  * standard doesn't recognize anymore.
144  *
145  *              Converter Names (Usually in TR22 form)
146  *           -------------------------------------------.
147  *     T    /##########################################/|
148  *     a   /     #            #                       /#
149  *     g  /  #      ##     ##     ### # ### ### ### #/
150  *     s / #             #####  ####        ##  ## #/#
151  *      / ### # # ##  #  #   #          ### # #   #/##
152  *      ------------------------------------------/# #
153  *    A |### # # ##  #  #   #          ### # #   #|# #
154  *    l |# # #    #     #               ## #     #|# #
155  *    i |# # #    #     #                #       #|#
156  *    a |#                                       #|#
157  *    s |                                        #|#
158  *    e
159  *    s
160  *
161  */
162 
163 /**
164  * Used by the UEnumeration API
165  */
166 typedef struct UAliasContext {
167     uint32_t listOffset;
168     uint32_t listIdx;
169 } UAliasContext;
170 
171 static const char DATA_NAME[] = "cnvalias";
172 static const char DATA_TYPE[] = "icu";
173 
174 static UDataMemory *gAliasData=NULL;
175 
176 enum {
177     tocLengthIndex=0,
178     converterListIndex=1,
179     tagListIndex=2,
180     aliasListIndex=3,
181     untaggedConvArrayIndex=4,
182     taggedAliasArrayIndex=5,
183     taggedAliasListsIndex=6,
184     tableOptionsIndex=7,
185     stringTableIndex=8,
186     normalizedStringTableIndex=9,
187     offsetsCount,    /* length of the swapper's temporary offsets[] */
188     minTocLength=8 /* min. tocLength in the file, does not count the tocLengthIndex! */
189 };
190 
191 static const UConverterAliasOptions defaultTableOptions = {
192     UCNV_IO_UNNORMALIZED,
193     0 /* containsCnvOptionInfo */
194 };
195 static UConverterAlias gMainTable;
196 
197 #define GET_STRING(idx) (const char *)(gMainTable.stringTable + (idx))
198 #define GET_NORMALIZED_STRING(idx) (const char *)(gMainTable.normalizedStringTable + (idx))
199 
200 static UBool U_CALLCONV
isAcceptable(void *,const char *,const char *,const UDataInfo * pInfo)201 isAcceptable(void * /*context*/,
202              const char * /*type*/, const char * /*name*/,
203              const UDataInfo *pInfo) {
204     return (UBool)(
205         pInfo->size>=20 &&
206         pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
207         pInfo->charsetFamily==U_CHARSET_FAMILY &&
208         pInfo->dataFormat[0]==0x43 &&   /* dataFormat="CvAl" */
209         pInfo->dataFormat[1]==0x76 &&
210         pInfo->dataFormat[2]==0x41 &&
211         pInfo->dataFormat[3]==0x6c &&
212         pInfo->formatVersion[0]==3);
213 }
214 
ucnv_io_cleanup(void)215 static UBool U_CALLCONV ucnv_io_cleanup(void)
216 {
217     if (gAliasData) {
218         udata_close(gAliasData);
219         gAliasData = NULL;
220     }
221 
222     uprv_memset(&gMainTable, 0, sizeof(gMainTable));
223 
224     return TRUE;                   /* Everything was cleaned up */
225 }
226 
227 static UBool
haveAliasData(UErrorCode * pErrorCode)228 haveAliasData(UErrorCode *pErrorCode) {
229     int needInit;
230 
231     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
232         return FALSE;
233     }
234 
235     UMTX_CHECK(NULL, (gAliasData==NULL), needInit);
236 
237     /* load converter alias data from file if necessary */
238     if (needInit) {
239         UDataMemory *data;
240         const uint16_t *table;
241         const uint32_t *sectionSizes;
242         uint32_t tableStart;
243         uint32_t currOffset;
244 
245         data = udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, pErrorCode);
246         if(U_FAILURE(*pErrorCode)) {
247             return FALSE;
248         }
249 
250         sectionSizes = (const uint32_t *)udata_getMemory(data);
251         table = (const uint16_t *)sectionSizes;
252 
253         tableStart      = sectionSizes[0];
254         if (tableStart < minTocLength) {
255             *pErrorCode = U_INVALID_FORMAT_ERROR;
256             udata_close(data);
257             return FALSE;
258         }
259 
260         umtx_lock(NULL);
261         if(gAliasData==NULL) {
262             gMainTable.converterListSize      = sectionSizes[1];
263             gMainTable.tagListSize            = sectionSizes[2];
264             gMainTable.aliasListSize          = sectionSizes[3];
265             gMainTable.untaggedConvArraySize  = sectionSizes[4];
266             gMainTable.taggedAliasArraySize   = sectionSizes[5];
267             gMainTable.taggedAliasListsSize   = sectionSizes[6];
268             gMainTable.optionTableSize        = sectionSizes[7];
269             gMainTable.stringTableSize        = sectionSizes[8];
270 
271             if (tableStart > 8) {
272                 gMainTable.normalizedStringTableSize = sectionSizes[9];
273             }
274 
275             currOffset = tableStart * (sizeof(uint32_t)/sizeof(uint16_t)) + (sizeof(uint32_t)/sizeof(uint16_t));
276             gMainTable.converterList = table + currOffset;
277 
278             currOffset += gMainTable.converterListSize;
279             gMainTable.tagList = table + currOffset;
280 
281             currOffset += gMainTable.tagListSize;
282             gMainTable.aliasList = table + currOffset;
283 
284             currOffset += gMainTable.aliasListSize;
285             gMainTable.untaggedConvArray = table + currOffset;
286 
287             currOffset += gMainTable.untaggedConvArraySize;
288             gMainTable.taggedAliasArray = table + currOffset;
289 
290             /* aliasLists is a 1's based array, but it has a padding character */
291             currOffset += gMainTable.taggedAliasArraySize;
292             gMainTable.taggedAliasLists = table + currOffset;
293 
294             currOffset += gMainTable.taggedAliasListsSize;
295             if (gMainTable.optionTableSize > 0
296                 && ((const UConverterAliasOptions *)(table + currOffset))->stringNormalizationType < UCNV_IO_NORM_TYPE_COUNT)
297             {
298                 /* Faster table */
299                 gMainTable.optionTable = (const UConverterAliasOptions *)(table + currOffset);
300             }
301             else {
302                 /* Smaller table, or I can't handle this normalization mode!
303                 Use the original slower table lookup. */
304                 gMainTable.optionTable = &defaultTableOptions;
305             }
306 
307             currOffset += gMainTable.optionTableSize;
308             gMainTable.stringTable = table + currOffset;
309 
310             currOffset += gMainTable.stringTableSize;
311             gMainTable.normalizedStringTable = ((gMainTable.optionTable->stringNormalizationType == UCNV_IO_UNNORMALIZED)
312                 ? gMainTable.stringTable : (table + currOffset));
313 
314             ucln_common_registerCleanup(UCLN_COMMON_UCNV_IO, ucnv_io_cleanup);
315 
316             gAliasData = data;
317             data=NULL;
318         }
319         umtx_unlock(NULL);
320 
321         /* if a different thread set it first, then close the extra data */
322         if(data!=NULL) {
323             udata_close(data); /* NULL if it was set correctly */
324         }
325     }
326 
327     return TRUE;
328 }
329 
330 static inline UBool
isAlias(const char * alias,UErrorCode * pErrorCode)331 isAlias(const char *alias, UErrorCode *pErrorCode) {
332     if(alias==NULL) {
333         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
334         return FALSE;
335     }
336     return (UBool)(*alias!=0);
337 }
338 
getTagNumber(const char * tagname)339 static uint32_t getTagNumber(const char *tagname) {
340     if (gMainTable.tagList) {
341         uint32_t tagNum;
342         for (tagNum = 0; tagNum < gMainTable.tagListSize; tagNum++) {
343             if (!uprv_stricmp(GET_STRING(gMainTable.tagList[tagNum]), tagname)) {
344                 return tagNum;
345             }
346         }
347     }
348 
349     return UINT32_MAX;
350 }
351 
352 /* character types relevant for ucnv_compareNames() */
353 enum {
354     IGNORE,
355     ZERO,
356     NONZERO,
357     MINLETTER /* any values from here on are lowercase letter mappings */
358 };
359 
360 /* character types for ASCII 00..7F */
361 static const uint8_t asciiTypes[128] = {
362     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
363     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
364     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
365     ZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, 0, 0, 0, 0, 0, 0,
366     0, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
367     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0, 0, 0, 0, 0,
368     0, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
369     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0, 0, 0, 0, 0
370 };
371 
372 #define GET_ASCII_TYPE(c) ((int8_t)(c) >= 0 ? asciiTypes[(uint8_t)c] : (uint8_t)IGNORE)
373 
374 /* character types for EBCDIC 80..FF */
375 static const uint8_t ebcdicTypes[128] = {
376     0,    0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0, 0, 0, 0, 0, 0,
377     0,    0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0, 0, 0, 0, 0, 0,
378     0,    0,    0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0, 0, 0, 0, 0, 0,
379     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
380     0,    0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0, 0, 0, 0, 0, 0,
381     0,    0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0, 0, 0, 0, 0, 0,
382     0,    0,    0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0, 0, 0, 0, 0, 0,
383     ZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, 0, 0, 0, 0, 0, 0
384 };
385 
386 #define GET_EBCDIC_TYPE(c) ((int8_t)(c) < 0 ? ebcdicTypes[(c)&0x7f] : (uint8_t)IGNORE)
387 
388 #if U_CHARSET_FAMILY==U_ASCII_FAMILY
389 #   define GET_CHAR_TYPE(c) GET_ASCII_TYPE(c)
390 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
391 #   define GET_CHAR_TYPE(c) GET_EBCDIC_TYPE(c)
392 #else
393 #   error U_CHARSET_FAMILY is not valid
394 #endif
395 
396 /* @see ucnv_compareNames */
397 U_CFUNC char * U_EXPORT2
ucnv_io_stripASCIIForCompare(char * dst,const char * name)398 ucnv_io_stripASCIIForCompare(char *dst, const char *name) {
399     char *dstItr = dst;
400     uint8_t type, nextType;
401     char c1;
402     UBool afterDigit = FALSE;
403 
404     while ((c1 = *name++) != 0) {
405         type = GET_ASCII_TYPE(c1);
406         switch (type) {
407         case IGNORE:
408             afterDigit = FALSE;
409             continue; /* ignore all but letters and digits */
410         case ZERO:
411             if (!afterDigit) {
412                 nextType = GET_ASCII_TYPE(*name);
413                 if (nextType == ZERO || nextType == NONZERO) {
414                     continue; /* ignore leading zero before another digit */
415                 }
416             }
417             break;
418         case NONZERO:
419             afterDigit = TRUE;
420             break;
421         default:
422             c1 = (char)type; /* lowercased letter */
423             afterDigit = FALSE;
424             break;
425         }
426         *dstItr++ = c1;
427     }
428     *dstItr = 0;
429     return dst;
430 }
431 
432 U_CFUNC char * U_EXPORT2
ucnv_io_stripEBCDICForCompare(char * dst,const char * name)433 ucnv_io_stripEBCDICForCompare(char *dst, const char *name) {
434     char *dstItr = dst;
435     uint8_t type, nextType;
436     char c1;
437     UBool afterDigit = FALSE;
438 
439     while ((c1 = *name++) != 0) {
440         type = GET_EBCDIC_TYPE(c1);
441         switch (type) {
442         case IGNORE:
443             afterDigit = FALSE;
444             continue; /* ignore all but letters and digits */
445         case ZERO:
446             if (!afterDigit) {
447                 nextType = GET_EBCDIC_TYPE(*name);
448                 if (nextType == ZERO || nextType == NONZERO) {
449                     continue; /* ignore leading zero before another digit */
450                 }
451             }
452             break;
453         case NONZERO:
454             afterDigit = TRUE;
455             break;
456         default:
457             c1 = (char)type; /* lowercased letter */
458             afterDigit = FALSE;
459             break;
460         }
461         *dstItr++ = c1;
462     }
463     *dstItr = 0;
464     return dst;
465 }
466 
467 /**
468  * Do a fuzzy compare of two converter/alias names.
469  * The comparison is case-insensitive, ignores leading zeroes if they are not
470  * followed by further digits, and ignores all but letters and digits.
471  * Thus the strings "UTF-8", "utf_8", "u*T@f08" and "Utf 8" are exactly equivalent.
472  * See section 1.4, Charset Alias Matching in Unicode Technical Standard #22
473  * at http://www.unicode.org/reports/tr22/
474  *
475  * This is a symmetrical (commutative) operation; order of arguments
476  * is insignificant.  This is an important property for sorting the
477  * list (when the list is preprocessed into binary form) and for
478  * performing binary searches on it at run time.
479  *
480  * @param name1 a converter name or alias, zero-terminated
481  * @param name2 a converter name or alias, zero-terminated
482  * @return 0 if the names match, or a negative value if the name1
483  * lexically precedes name2, or a positive value if the name1
484  * lexically follows name2.
485  *
486  * @see ucnv_io_stripForCompare
487  */
488 U_CAPI int U_EXPORT2
ucnv_compareNames(const char * name1,const char * name2)489 ucnv_compareNames(const char *name1, const char *name2) {
490     int rc;
491     uint8_t type, nextType;
492     char c1, c2;
493     UBool afterDigit1 = FALSE, afterDigit2 = FALSE;
494 
495     for (;;) {
496         while ((c1 = *name1++) != 0) {
497             type = GET_CHAR_TYPE(c1);
498             switch (type) {
499             case IGNORE:
500                 afterDigit1 = FALSE;
501                 continue; /* ignore all but letters and digits */
502             case ZERO:
503                 if (!afterDigit1) {
504                     nextType = GET_CHAR_TYPE(*name1);
505                     if (nextType == ZERO || nextType == NONZERO) {
506                         continue; /* ignore leading zero before another digit */
507                     }
508                 }
509                 break;
510             case NONZERO:
511                 afterDigit1 = TRUE;
512                 break;
513             default:
514                 c1 = (char)type; /* lowercased letter */
515                 afterDigit1 = FALSE;
516                 break;
517             }
518             break; /* deliver c1 */
519         }
520         while ((c2 = *name2++) != 0) {
521             type = GET_CHAR_TYPE(c2);
522             switch (type) {
523             case IGNORE:
524                 afterDigit2 = FALSE;
525                 continue; /* ignore all but letters and digits */
526             case ZERO:
527                 if (!afterDigit2) {
528                     nextType = GET_CHAR_TYPE(*name2);
529                     if (nextType == ZERO || nextType == NONZERO) {
530                         continue; /* ignore leading zero before another digit */
531                     }
532                 }
533                 break;
534             case NONZERO:
535                 afterDigit2 = TRUE;
536                 break;
537             default:
538                 c2 = (char)type; /* lowercased letter */
539                 afterDigit2 = FALSE;
540                 break;
541             }
542             break; /* deliver c2 */
543         }
544 
545         /* If we reach the ends of both strings then they match */
546         if ((c1|c2)==0) {
547             return 0;
548         }
549 
550         /* Case-insensitive comparison */
551         rc = (int)(unsigned char)c1 - (int)(unsigned char)c2;
552         if (rc != 0) {
553             return rc;
554         }
555     }
556 }
557 
558 /*
559  * search for an alias
560  * return the converter number index for gConverterList
561  */
562 static inline uint32_t
findConverter(const char * alias,UBool * containsOption,UErrorCode * pErrorCode)563 findConverter(const char *alias, UBool *containsOption, UErrorCode *pErrorCode) {
564     uint32_t mid, start, limit;
565     uint32_t lastMid;
566     int result;
567     int isUnnormalized = (gMainTable.optionTable->stringNormalizationType == UCNV_IO_UNNORMALIZED);
568     char strippedName[UCNV_MAX_CONVERTER_NAME_LENGTH];
569 
570     if (!isUnnormalized) {
571         if (uprv_strlen(alias) >= UCNV_MAX_CONVERTER_NAME_LENGTH) {
572             *pErrorCode = U_BUFFER_OVERFLOW_ERROR;
573             return UINT32_MAX;
574         }
575 
576         /* Lower case and remove ignoreable characters. */
577         ucnv_io_stripForCompare(strippedName, alias);
578         alias = strippedName;
579     }
580 
581     /* do a binary search for the alias */
582     start = 0;
583     limit = gMainTable.untaggedConvArraySize;
584     mid = limit;
585     lastMid = UINT32_MAX;
586 
587     for (;;) {
588         mid = (uint32_t)((start + limit) / 2);
589         if (lastMid == mid) {   /* Have we moved? */
590             break;  /* We haven't moved, and it wasn't found. */
591         }
592         lastMid = mid;
593         if (isUnnormalized) {
594             result = ucnv_compareNames(alias, GET_STRING(gMainTable.aliasList[mid]));
595         }
596         else {
597             result = uprv_strcmp(alias, GET_NORMALIZED_STRING(gMainTable.aliasList[mid]));
598         }
599 
600         if (result < 0) {
601             limit = mid;
602         } else if (result > 0) {
603             start = mid;
604         } else {
605             /* Since the gencnval tool folds duplicates into one entry,
606              * this alias in gAliasList is unique, but different standards
607              * may map an alias to different converters.
608              */
609             if (gMainTable.untaggedConvArray[mid] & UCNV_AMBIGUOUS_ALIAS_MAP_BIT) {
610                 *pErrorCode = U_AMBIGUOUS_ALIAS_WARNING;
611             }
612             /* State whether the canonical converter name contains an option.
613             This information is contained in this list in order to maintain backward & forward compatibility. */
614             if (containsOption) {
615                 UBool containsCnvOptionInfo = (UBool)gMainTable.optionTable->containsCnvOptionInfo;
616                 *containsOption = (UBool)((containsCnvOptionInfo
617                     && ((gMainTable.untaggedConvArray[mid] & UCNV_CONTAINS_OPTION_BIT) != 0))
618                     || !containsCnvOptionInfo);
619             }
620             return gMainTable.untaggedConvArray[mid] & UCNV_CONVERTER_INDEX_MASK;
621         }
622     }
623 
624     return UINT32_MAX;
625 }
626 
627 /*
628  * Is this alias in this list?
629  * alias and listOffset should be non-NULL.
630  */
631 static inline UBool
isAliasInList(const char * alias,uint32_t listOffset)632 isAliasInList(const char *alias, uint32_t listOffset) {
633     if (listOffset) {
634         uint32_t currAlias;
635         uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
636         /* +1 to skip listCount */
637         const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
638         for (currAlias = 0; currAlias < listCount; currAlias++) {
639             if (currList[currAlias]
640                 && ucnv_compareNames(alias, GET_STRING(currList[currAlias]))==0)
641             {
642                 return TRUE;
643             }
644         }
645     }
646     return FALSE;
647 }
648 
649 /*
650  * Search for an standard name of an alias (what is the default name
651  * that this standard uses?)
652  * return the listOffset for gTaggedAliasLists. If it's 0,
653  * the it couldn't be found, but the parameters are valid.
654  */
655 static uint32_t
findTaggedAliasListsOffset(const char * alias,const char * standard,UErrorCode * pErrorCode)656 findTaggedAliasListsOffset(const char *alias, const char *standard, UErrorCode *pErrorCode) {
657     uint32_t idx;
658     uint32_t listOffset;
659     uint32_t convNum;
660     UErrorCode myErr = U_ZERO_ERROR;
661     uint32_t tagNum = getTagNumber(standard);
662 
663     /* Make a quick guess. Hopefully they used a TR22 canonical alias. */
664     convNum = findConverter(alias, NULL, &myErr);
665     if (myErr != U_ZERO_ERROR) {
666         *pErrorCode = myErr;
667     }
668 
669     if (tagNum < (gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) && convNum < gMainTable.converterListSize) {
670         listOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + convNum];
671         if (listOffset && gMainTable.taggedAliasLists[listOffset + 1]) {
672             return listOffset;
673         }
674         if (myErr == U_AMBIGUOUS_ALIAS_WARNING) {
675             /* Uh Oh! They used an ambiguous alias.
676                We have to search the whole swiss cheese starting
677                at the highest standard affinity.
678                This may take a while.
679             */
680             for (idx = 0; idx < gMainTable.taggedAliasArraySize; idx++) {
681                 listOffset = gMainTable.taggedAliasArray[idx];
682                 if (listOffset && isAliasInList(alias, listOffset)) {
683                     uint32_t currTagNum = idx/gMainTable.converterListSize;
684                     uint32_t currConvNum = (idx - currTagNum*gMainTable.converterListSize);
685                     uint32_t tempListOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + currConvNum];
686                     if (tempListOffset && gMainTable.taggedAliasLists[tempListOffset + 1]) {
687                         return tempListOffset;
688                     }
689                     /* else keep on looking */
690                     /* We could speed this up by starting on the next row
691                        because an alias is unique per row, right now.
692                        This would change if alias versioning appears. */
693                 }
694             }
695             /* The standard doesn't know about the alias */
696         }
697         /* else no default name */
698         return 0;
699     }
700     /* else converter or tag not found */
701 
702     return UINT32_MAX;
703 }
704 
705 /* Return the canonical name */
706 static uint32_t
findTaggedConverterNum(const char * alias,const char * standard,UErrorCode * pErrorCode)707 findTaggedConverterNum(const char *alias, const char *standard, UErrorCode *pErrorCode) {
708     uint32_t idx;
709     uint32_t listOffset;
710     uint32_t convNum;
711     UErrorCode myErr = U_ZERO_ERROR;
712     uint32_t tagNum = getTagNumber(standard);
713 
714     /* Make a quick guess. Hopefully they used a TR22 canonical alias. */
715     convNum = findConverter(alias, NULL, &myErr);
716     if (myErr != U_ZERO_ERROR) {
717         *pErrorCode = myErr;
718     }
719 
720     if (tagNum < (gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) && convNum < gMainTable.converterListSize) {
721         listOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + convNum];
722         if (listOffset && isAliasInList(alias, listOffset)) {
723             return convNum;
724         }
725         if (myErr == U_AMBIGUOUS_ALIAS_WARNING) {
726             /* Uh Oh! They used an ambiguous alias.
727                We have to search one slice of the swiss cheese.
728                We search only in the requested tag, not the whole thing.
729                This may take a while.
730             */
731             uint32_t convStart = (tagNum)*gMainTable.converterListSize;
732             uint32_t convLimit = (tagNum+1)*gMainTable.converterListSize;
733             for (idx = convStart; idx < convLimit; idx++) {
734                 listOffset = gMainTable.taggedAliasArray[idx];
735                 if (listOffset && isAliasInList(alias, listOffset)) {
736                     return idx-convStart;
737                 }
738             }
739             /* The standard doesn't know about the alias */
740         }
741         /* else no canonical name */
742     }
743     /* else converter or tag not found */
744 
745     return UINT32_MAX;
746 }
747 
748 
749 
750 U_CFUNC const char *
ucnv_io_getConverterName(const char * alias,UBool * containsOption,UErrorCode * pErrorCode)751 ucnv_io_getConverterName(const char *alias, UBool *containsOption, UErrorCode *pErrorCode) {
752     const char *aliasTmp = alias;
753     int32_t i = 0;
754     for (i = 0; i < 2; i++) {
755         if (i == 1) {
756             /*
757              * After the first unsuccess converter lookup, check to see if
758              * the name begins with 'x-'. If it does, strip it off and try
759              * again.  This behaviour is similar to how ICU4J does it.
760              */
761             if (aliasTmp[0] == 'x' || aliasTmp[1] == '-') {
762                 aliasTmp = aliasTmp+2;
763             } else {
764                 break;
765             }
766         }
767         if(haveAliasData(pErrorCode) && isAlias(aliasTmp, pErrorCode)) {
768             uint32_t convNum = findConverter(aliasTmp, containsOption, pErrorCode);
769             if (convNum < gMainTable.converterListSize) {
770                 return GET_STRING(gMainTable.converterList[convNum]);
771             }
772             /* else converter not found */
773         } else {
774             break;
775         }
776     }
777 
778     return NULL;
779 }
780 
781 static int32_t U_CALLCONV
ucnv_io_countStandardAliases(UEnumeration * enumerator,UErrorCode *)782 ucnv_io_countStandardAliases(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) {
783     int32_t value = 0;
784     UAliasContext *myContext = (UAliasContext *)(enumerator->context);
785     uint32_t listOffset = myContext->listOffset;
786 
787     if (listOffset) {
788         value = gMainTable.taggedAliasLists[listOffset];
789     }
790     return value;
791 }
792 
793 static const char* U_CALLCONV
ucnv_io_nextStandardAliases(UEnumeration * enumerator,int32_t * resultLength,UErrorCode *)794 ucnv_io_nextStandardAliases(UEnumeration *enumerator,
795                             int32_t* resultLength,
796                             UErrorCode * /*pErrorCode*/)
797 {
798     UAliasContext *myContext = (UAliasContext *)(enumerator->context);
799     uint32_t listOffset = myContext->listOffset;
800 
801     if (listOffset) {
802         uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
803         const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
804 
805         if (myContext->listIdx < listCount) {
806             const char *myStr = GET_STRING(currList[myContext->listIdx++]);
807             if (resultLength) {
808                 *resultLength = (int32_t)uprv_strlen(myStr);
809             }
810             return myStr;
811         }
812     }
813     /* Either we accessed a zero length list, or we enumerated too far. */
814     if (resultLength) {
815         *resultLength = 0;
816     }
817     return NULL;
818 }
819 
820 static void U_CALLCONV
ucnv_io_resetStandardAliases(UEnumeration * enumerator,UErrorCode *)821 ucnv_io_resetStandardAliases(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) {
822     ((UAliasContext *)(enumerator->context))->listIdx = 0;
823 }
824 
825 static void U_CALLCONV
ucnv_io_closeUEnumeration(UEnumeration * enumerator)826 ucnv_io_closeUEnumeration(UEnumeration *enumerator) {
827     uprv_free(enumerator->context);
828     uprv_free(enumerator);
829 }
830 
831 /* Enumerate the aliases for the specified converter and standard tag */
832 static const UEnumeration gEnumAliases = {
833     NULL,
834     NULL,
835     ucnv_io_closeUEnumeration,
836     ucnv_io_countStandardAliases,
837     uenum_unextDefault,
838     ucnv_io_nextStandardAliases,
839     ucnv_io_resetStandardAliases
840 };
841 
842 U_CAPI UEnumeration * U_EXPORT2
ucnv_openStandardNames(const char * convName,const char * standard,UErrorCode * pErrorCode)843 ucnv_openStandardNames(const char *convName,
844                        const char *standard,
845                        UErrorCode *pErrorCode)
846 {
847     UEnumeration *myEnum = NULL;
848     if (haveAliasData(pErrorCode) && isAlias(convName, pErrorCode)) {
849         uint32_t listOffset = findTaggedAliasListsOffset(convName, standard, pErrorCode);
850 
851         /* When listOffset == 0, we want to acknowledge that the
852            converter name and standard are okay, but there
853            is nothing to enumerate. */
854         if (listOffset < gMainTable.taggedAliasListsSize) {
855             UAliasContext *myContext;
856 
857             myEnum = static_cast<UEnumeration *>(uprv_malloc(sizeof(UEnumeration)));
858             if (myEnum == NULL) {
859                 *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
860                 return NULL;
861             }
862             uprv_memcpy(myEnum, &gEnumAliases, sizeof(UEnumeration));
863             myContext = static_cast<UAliasContext *>(uprv_malloc(sizeof(UAliasContext)));
864             if (myContext == NULL) {
865                 *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
866                 uprv_free(myEnum);
867                 return NULL;
868             }
869             myContext->listOffset = listOffset;
870             myContext->listIdx = 0;
871             myEnum->context = myContext;
872         }
873         /* else converter or tag not found */
874     }
875     return myEnum;
876 }
877 
878 static uint16_t
ucnv_io_countAliases(const char * alias,UErrorCode * pErrorCode)879 ucnv_io_countAliases(const char *alias, UErrorCode *pErrorCode) {
880     if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
881         uint32_t convNum = findConverter(alias, NULL, pErrorCode);
882         if (convNum < gMainTable.converterListSize) {
883             /* tagListNum - 1 is the ALL tag */
884             int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum];
885 
886             if (listOffset) {
887                 return gMainTable.taggedAliasLists[listOffset];
888             }
889             /* else this shouldn't happen. internal program error */
890         }
891         /* else converter not found */
892     }
893     return 0;
894 }
895 
896 static uint16_t
ucnv_io_getAliases(const char * alias,uint16_t start,const char ** aliases,UErrorCode * pErrorCode)897 ucnv_io_getAliases(const char *alias, uint16_t start, const char **aliases, UErrorCode *pErrorCode) {
898     if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
899         uint32_t currAlias;
900         uint32_t convNum = findConverter(alias, NULL, pErrorCode);
901         if (convNum < gMainTable.converterListSize) {
902             /* tagListNum - 1 is the ALL tag */
903             int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum];
904 
905             if (listOffset) {
906                 uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
907                 /* +1 to skip listCount */
908                 const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
909 
910                 for (currAlias = start; currAlias < listCount; currAlias++) {
911                     aliases[currAlias] = GET_STRING(currList[currAlias]);
912                 }
913             }
914             /* else this shouldn't happen. internal program error */
915         }
916         /* else converter not found */
917     }
918     return 0;
919 }
920 
921 static const char *
ucnv_io_getAlias(const char * alias,uint16_t n,UErrorCode * pErrorCode)922 ucnv_io_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode) {
923     if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
924         uint32_t convNum = findConverter(alias, NULL, pErrorCode);
925         if (convNum < gMainTable.converterListSize) {
926             /* tagListNum - 1 is the ALL tag */
927             int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum];
928 
929             if (listOffset) {
930                 uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
931                 /* +1 to skip listCount */
932                 const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
933 
934                 if (n < listCount)  {
935                     return GET_STRING(currList[n]);
936                 }
937                 *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
938             }
939             /* else this shouldn't happen. internal program error */
940         }
941         /* else converter not found */
942     }
943     return NULL;
944 }
945 
946 static uint16_t
ucnv_io_countStandards(UErrorCode * pErrorCode)947 ucnv_io_countStandards(UErrorCode *pErrorCode) {
948     if (haveAliasData(pErrorCode)) {
949         /* Don't include the empty list */
950         return (uint16_t)(gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS);
951     }
952 
953     return 0;
954 }
955 
956 U_CAPI const char * U_EXPORT2
ucnv_getStandard(uint16_t n,UErrorCode * pErrorCode)957 ucnv_getStandard(uint16_t n, UErrorCode *pErrorCode) {
958     if (haveAliasData(pErrorCode)) {
959         if (n < gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) {
960             return GET_STRING(gMainTable.tagList[n]);
961         }
962         *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
963     }
964 
965     return NULL;
966 }
967 
968 U_CAPI const char * U_EXPORT2
ucnv_getStandardName(const char * alias,const char * standard,UErrorCode * pErrorCode)969 ucnv_getStandardName(const char *alias, const char *standard, UErrorCode *pErrorCode) {
970     if (haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
971         uint32_t listOffset = findTaggedAliasListsOffset(alias, standard, pErrorCode);
972 
973         if (0 < listOffset && listOffset < gMainTable.taggedAliasListsSize) {
974             const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
975 
976             /* Get the preferred name from this list */
977             if (currList[0]) {
978                 return GET_STRING(currList[0]);
979             }
980             /* else someone screwed up the alias table. */
981             /* *pErrorCode = U_INVALID_FORMAT_ERROR */
982         }
983     }
984 
985     return NULL;
986 }
987 
988 U_CAPI uint16_t U_EXPORT2
ucnv_countAliases(const char * alias,UErrorCode * pErrorCode)989 ucnv_countAliases(const char *alias, UErrorCode *pErrorCode)
990 {
991     return ucnv_io_countAliases(alias, pErrorCode);
992 }
993 
994 
995 U_CAPI const char* U_EXPORT2
ucnv_getAlias(const char * alias,uint16_t n,UErrorCode * pErrorCode)996 ucnv_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode)
997 {
998     return ucnv_io_getAlias(alias, n, pErrorCode);
999 }
1000 
1001 U_CAPI void U_EXPORT2
ucnv_getAliases(const char * alias,const char ** aliases,UErrorCode * pErrorCode)1002 ucnv_getAliases(const char *alias, const char **aliases, UErrorCode *pErrorCode)
1003 {
1004     ucnv_io_getAliases(alias, 0, aliases, pErrorCode);
1005 }
1006 
1007 U_CAPI uint16_t U_EXPORT2
ucnv_countStandards(void)1008 ucnv_countStandards(void)
1009 {
1010     UErrorCode err = U_ZERO_ERROR;
1011     return ucnv_io_countStandards(&err);
1012 }
1013 
1014 U_CAPI const char * U_EXPORT2
ucnv_getCanonicalName(const char * alias,const char * standard,UErrorCode * pErrorCode)1015 ucnv_getCanonicalName(const char *alias, const char *standard, UErrorCode *pErrorCode) {
1016     if (haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
1017         uint32_t convNum = findTaggedConverterNum(alias, standard, pErrorCode);
1018 
1019         if (convNum < gMainTable.converterListSize) {
1020             return GET_STRING(gMainTable.converterList[convNum]);
1021         }
1022     }
1023 
1024     return NULL;
1025 }
1026 
1027 static int32_t U_CALLCONV
ucnv_io_countAllConverters(UEnumeration *,UErrorCode *)1028 ucnv_io_countAllConverters(UEnumeration * /*enumerator*/, UErrorCode * /*pErrorCode*/) {
1029     return gMainTable.converterListSize;
1030 }
1031 
1032 static const char* U_CALLCONV
ucnv_io_nextAllConverters(UEnumeration * enumerator,int32_t * resultLength,UErrorCode *)1033 ucnv_io_nextAllConverters(UEnumeration *enumerator,
1034                             int32_t* resultLength,
1035                             UErrorCode * /*pErrorCode*/)
1036 {
1037     uint16_t *myContext = (uint16_t *)(enumerator->context);
1038 
1039     if (*myContext < gMainTable.converterListSize) {
1040         const char *myStr = GET_STRING(gMainTable.converterList[(*myContext)++]);
1041         if (resultLength) {
1042             *resultLength = (int32_t)uprv_strlen(myStr);
1043         }
1044         return myStr;
1045     }
1046     /* Either we accessed a zero length list, or we enumerated too far. */
1047     if (resultLength) {
1048         *resultLength = 0;
1049     }
1050     return NULL;
1051 }
1052 
1053 static void U_CALLCONV
ucnv_io_resetAllConverters(UEnumeration * enumerator,UErrorCode *)1054 ucnv_io_resetAllConverters(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) {
1055     *((uint16_t *)(enumerator->context)) = 0;
1056 }
1057 
1058 static const UEnumeration gEnumAllConverters = {
1059     NULL,
1060     NULL,
1061     ucnv_io_closeUEnumeration,
1062     ucnv_io_countAllConverters,
1063     uenum_unextDefault,
1064     ucnv_io_nextAllConverters,
1065     ucnv_io_resetAllConverters
1066 };
1067 
1068 U_CAPI UEnumeration * U_EXPORT2
ucnv_openAllNames(UErrorCode * pErrorCode)1069 ucnv_openAllNames(UErrorCode *pErrorCode) {
1070     UEnumeration *myEnum = NULL;
1071     if (haveAliasData(pErrorCode)) {
1072         uint16_t *myContext;
1073 
1074         myEnum = static_cast<UEnumeration *>(uprv_malloc(sizeof(UEnumeration)));
1075         if (myEnum == NULL) {
1076             *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
1077             return NULL;
1078         }
1079         uprv_memcpy(myEnum, &gEnumAllConverters, sizeof(UEnumeration));
1080         myContext = static_cast<uint16_t *>(uprv_malloc(sizeof(uint16_t)));
1081         if (myContext == NULL) {
1082             *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
1083             uprv_free(myEnum);
1084             return NULL;
1085         }
1086         *myContext = 0;
1087         myEnum->context = myContext;
1088     }
1089     return myEnum;
1090 }
1091 
1092 U_CFUNC uint16_t
ucnv_io_countKnownConverters(UErrorCode * pErrorCode)1093 ucnv_io_countKnownConverters(UErrorCode *pErrorCode) {
1094     if (haveAliasData(pErrorCode)) {
1095         return (uint16_t)gMainTable.converterListSize;
1096     }
1097     return 0;
1098 }
1099 
1100 /* alias table swapping ----------------------------------------------------- */
1101 
1102 typedef char * U_CALLCONV StripForCompareFn(char *dst, const char *name);
1103 
1104 /*
1105  * row of a temporary array
1106  *
1107  * gets platform-endian charset string indexes and sorting indexes;
1108  * after sorting this array by strings, the actual arrays are permutated
1109  * according to the sorting indexes
1110  */
1111 typedef struct TempRow {
1112     uint16_t strIndex, sortIndex;
1113 } TempRow;
1114 
1115 typedef struct TempAliasTable {
1116     const char *chars;
1117     TempRow *rows;
1118     uint16_t *resort;
1119     StripForCompareFn *stripForCompare;
1120 } TempAliasTable;
1121 
1122 enum {
1123     STACK_ROW_CAPACITY=500
1124 };
1125 
1126 static int32_t
io_compareRows(const void * context,const void * left,const void * right)1127 io_compareRows(const void *context, const void *left, const void *right) {
1128     char strippedLeft[UCNV_MAX_CONVERTER_NAME_LENGTH],
1129          strippedRight[UCNV_MAX_CONVERTER_NAME_LENGTH];
1130 
1131     TempAliasTable *tempTable=(TempAliasTable *)context;
1132     const char *chars=tempTable->chars;
1133 
1134     return (int32_t)uprv_strcmp(tempTable->stripForCompare(strippedLeft, chars+2*((const TempRow *)left)->strIndex),
1135                                 tempTable->stripForCompare(strippedRight, chars+2*((const TempRow *)right)->strIndex));
1136 }
1137 
1138 U_CAPI int32_t U_EXPORT2
ucnv_swapAliases(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)1139 ucnv_swapAliases(const UDataSwapper *ds,
1140                  const void *inData, int32_t length, void *outData,
1141                  UErrorCode *pErrorCode) {
1142     const UDataInfo *pInfo;
1143     int32_t headerSize;
1144 
1145     const uint16_t *inTable;
1146     const uint32_t *inSectionSizes;
1147     uint32_t toc[offsetsCount];
1148     uint32_t offsets[offsetsCount]; /* 16-bit-addressed offsets from inTable/outTable */
1149     uint32_t i, count, tocLength, topOffset;
1150 
1151     TempRow rows[STACK_ROW_CAPACITY];
1152     uint16_t resort[STACK_ROW_CAPACITY];
1153     TempAliasTable tempTable;
1154 
1155     /* udata_swapDataHeader checks the arguments */
1156     headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
1157     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1158         return 0;
1159     }
1160 
1161     /* check data format and format version */
1162     pInfo=(const UDataInfo *)((const char *)inData+4);
1163     if(!(
1164         pInfo->dataFormat[0]==0x43 &&   /* dataFormat="CvAl" */
1165         pInfo->dataFormat[1]==0x76 &&
1166         pInfo->dataFormat[2]==0x41 &&
1167         pInfo->dataFormat[3]==0x6c &&
1168         pInfo->formatVersion[0]==3
1169     )) {
1170         udata_printError(ds, "ucnv_swapAliases(): data format %02x.%02x.%02x.%02x (format version %02x) is not an alias table\n",
1171                          pInfo->dataFormat[0], pInfo->dataFormat[1],
1172                          pInfo->dataFormat[2], pInfo->dataFormat[3],
1173                          pInfo->formatVersion[0]);
1174         *pErrorCode=U_UNSUPPORTED_ERROR;
1175         return 0;
1176     }
1177 
1178     /* an alias table must contain at least the table of contents array */
1179     if(length>=0 && (length-headerSize)<4*(1+minTocLength)) {
1180         udata_printError(ds, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n",
1181                          length-headerSize);
1182         *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
1183         return 0;
1184     }
1185 
1186     inSectionSizes=(const uint32_t *)((const char *)inData+headerSize);
1187     inTable=(const uint16_t *)inSectionSizes;
1188     uprv_memset(toc, 0, sizeof(toc));
1189     toc[tocLengthIndex]=tocLength=ds->readUInt32(inSectionSizes[tocLengthIndex]);
1190     if(tocLength<minTocLength || offsetsCount<=tocLength) {
1191         udata_printError(ds, "ucnv_swapAliases(): table of contents contains unsupported number of sections (%u sections)\n", tocLength);
1192         *pErrorCode=U_INVALID_FORMAT_ERROR;
1193         return 0;
1194     }
1195 
1196     /* read the known part of the table of contents */
1197     for(i=converterListIndex; i<=tocLength; ++i) {
1198         toc[i]=ds->readUInt32(inSectionSizes[i]);
1199     }
1200 
1201     /* compute offsets */
1202     uprv_memset(offsets, 0, sizeof(offsets));
1203     offsets[converterListIndex]=2*(1+tocLength); /* count two 16-bit units per toc entry */
1204     for(i=tagListIndex; i<=tocLength; ++i) {
1205         offsets[i]=offsets[i-1]+toc[i-1];
1206     }
1207 
1208     /* compute the overall size of the after-header data, in numbers of 16-bit units */
1209     topOffset=offsets[i-1]+toc[i-1];
1210 
1211     if(length>=0) {
1212         uint16_t *outTable;
1213         const uint16_t *p, *p2;
1214         uint16_t *q, *q2;
1215         uint16_t oldIndex;
1216 
1217         if((length-headerSize)<(2*(int32_t)topOffset)) {
1218             udata_printError(ds, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n",
1219                              length-headerSize);
1220             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
1221             return 0;
1222         }
1223 
1224         outTable=(uint16_t *)((char *)outData+headerSize);
1225 
1226         /* swap the entire table of contents */
1227         ds->swapArray32(ds, inTable, 4*(1+tocLength), outTable, pErrorCode);
1228 
1229         /* swap unormalized strings & normalized strings */
1230         ds->swapInvChars(ds, inTable+offsets[stringTableIndex], 2*(int32_t)(toc[stringTableIndex]+toc[normalizedStringTableIndex]),
1231                              outTable+offsets[stringTableIndex], pErrorCode);
1232         if(U_FAILURE(*pErrorCode)) {
1233             udata_printError(ds, "ucnv_swapAliases().swapInvChars(charset names) failed\n");
1234             return 0;
1235         }
1236 
1237         if(ds->inCharset==ds->outCharset) {
1238             /* no need to sort, just swap all 16-bit values together */
1239             ds->swapArray16(ds,
1240                             inTable+offsets[converterListIndex],
1241                             2*(int32_t)(offsets[stringTableIndex]-offsets[converterListIndex]),
1242                             outTable+offsets[converterListIndex],
1243                             pErrorCode);
1244         } else {
1245             /* allocate the temporary table for sorting */
1246             count=toc[aliasListIndex];
1247 
1248             tempTable.chars=(const char *)(outTable+offsets[stringTableIndex]); /* sort by outCharset */
1249 
1250             if(count<=STACK_ROW_CAPACITY) {
1251                 tempTable.rows=rows;
1252                 tempTable.resort=resort;
1253             } else {
1254                 tempTable.rows=(TempRow *)uprv_malloc(count*sizeof(TempRow)+count*2);
1255                 if(tempTable.rows==NULL) {
1256                     udata_printError(ds, "ucnv_swapAliases(): unable to allocate memory for sorting tables (max length: %u)\n",
1257                                      count);
1258                     *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
1259                     return 0;
1260                 }
1261                 tempTable.resort=(uint16_t *)(tempTable.rows+count);
1262             }
1263 
1264             if(ds->outCharset==U_ASCII_FAMILY) {
1265                 tempTable.stripForCompare=ucnv_io_stripASCIIForCompare;
1266             } else /* U_EBCDIC_FAMILY */ {
1267                 tempTable.stripForCompare=ucnv_io_stripEBCDICForCompare;
1268             }
1269 
1270             /*
1271              * Sort unique aliases+mapped names.
1272              *
1273              * We need to sort the list again by outCharset strings because they
1274              * sort differently for different charset families.
1275              * First we set up a temporary table with the string indexes and
1276              * sorting indexes and sort that.
1277              * Then we permutate and copy/swap the actual values.
1278              */
1279             p=inTable+offsets[aliasListIndex];
1280             q=outTable+offsets[aliasListIndex];
1281 
1282             p2=inTable+offsets[untaggedConvArrayIndex];
1283             q2=outTable+offsets[untaggedConvArrayIndex];
1284 
1285             for(i=0; i<count; ++i) {
1286                 tempTable.rows[i].strIndex=ds->readUInt16(p[i]);
1287                 tempTable.rows[i].sortIndex=(uint16_t)i;
1288             }
1289 
1290             uprv_sortArray(tempTable.rows, (int32_t)count, sizeof(TempRow),
1291                            io_compareRows, &tempTable,
1292                            FALSE, pErrorCode);
1293 
1294             if(U_SUCCESS(*pErrorCode)) {
1295                 /* copy/swap/permutate items */
1296                 if(p!=q) {
1297                     for(i=0; i<count; ++i) {
1298                         oldIndex=tempTable.rows[i].sortIndex;
1299                         ds->swapArray16(ds, p+oldIndex, 2, q+i, pErrorCode);
1300                         ds->swapArray16(ds, p2+oldIndex, 2, q2+i, pErrorCode);
1301                     }
1302                 } else {
1303                     /*
1304                      * If we swap in-place, then the permutation must use another
1305                      * temporary array (tempTable.resort)
1306                      * before the results are copied to the outBundle.
1307                      */
1308                     uint16_t *r=tempTable.resort;
1309 
1310                     for(i=0; i<count; ++i) {
1311                         oldIndex=tempTable.rows[i].sortIndex;
1312                         ds->swapArray16(ds, p+oldIndex, 2, r+i, pErrorCode);
1313                     }
1314                     uprv_memcpy(q, r, 2*count);
1315 
1316                     for(i=0; i<count; ++i) {
1317                         oldIndex=tempTable.rows[i].sortIndex;
1318                         ds->swapArray16(ds, p2+oldIndex, 2, r+i, pErrorCode);
1319                     }
1320                     uprv_memcpy(q2, r, 2*count);
1321                 }
1322             }
1323 
1324             if(tempTable.rows!=rows) {
1325                 uprv_free(tempTable.rows);
1326             }
1327 
1328             if(U_FAILURE(*pErrorCode)) {
1329                 udata_printError(ds, "ucnv_swapAliases().uprv_sortArray(%u items) failed\n",
1330                                  count);
1331                 return 0;
1332             }
1333 
1334             /* swap remaining 16-bit values */
1335             ds->swapArray16(ds,
1336                             inTable+offsets[converterListIndex],
1337                             2*(int32_t)(offsets[aliasListIndex]-offsets[converterListIndex]),
1338                             outTable+offsets[converterListIndex],
1339                             pErrorCode);
1340             ds->swapArray16(ds,
1341                             inTable+offsets[taggedAliasArrayIndex],
1342                             2*(int32_t)(offsets[stringTableIndex]-offsets[taggedAliasArrayIndex]),
1343                             outTable+offsets[taggedAliasArrayIndex],
1344                             pErrorCode);
1345         }
1346     }
1347 
1348     return headerSize+2*(int32_t)topOffset;
1349 }
1350 
1351 #endif
1352 
1353 /*
1354  * Hey, Emacs, please set the following:
1355  *
1356  * Local Variables:
1357  * indent-tabs-mode: nil
1358  * End:
1359  *
1360  */
1361