1 /*
2 ******************************************************************************
3 *
4 * Copyright (C) 1999-2007, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 ******************************************************************************
8 *
9 *
10 * ucnv_io.c:
11 * initializes global variables and defines functions pertaining to converter
12 * name resolution aspect of the conversion code.
13 *
14 * new implementation:
15 *
16 * created on: 1999nov22
17 * created by: Markus W. Scherer
18 *
19 * Use the binary cnvalias.icu (created from convrtrs.txt) to work
20 * with aliases for converter names.
21 *
22 * Date Name Description
23 * 11/22/1999 markus Created
24 * 06/28/2002 grhoten Major overhaul of the converter alias design.
25 * Now an alias can map to different converters
26 * depending on the specified standard.
27 *******************************************************************************
28 */
29
30 #include "unicode/utypes.h"
31
32 #if !UCONFIG_NO_CONVERSION
33
34 #include "unicode/ucnv.h"
35 #include "unicode/udata.h"
36
37 #include "umutex.h"
38 #include "uarrsort.h"
39 #include "udataswp.h"
40 #include "cstring.h"
41 #include "cmemory.h"
42 #include "ucnv_io.h"
43 #include "uenumimp.h"
44 #include "ucln_cmn.h"
45
46 /* Format of cnvalias.icu -----------------------------------------------------
47 *
48 * cnvalias.icu is a binary, memory-mappable form of convrtrs.txt.
49 * This binary form contains several tables. All indexes are to uint16_t
50 * units, and not to the bytes (uint8_t units). Addressing everything on
51 * 16-bit boundaries allows us to store more information with small index
52 * numbers, which are also 16-bit in size. The majority of the table (except
53 * the string table) are 16-bit numbers.
54 *
55 * First there is the size of the Table of Contents (TOC). The TOC
56 * entries contain the size of each section. In order to find the offset
57 * you just need to sum up the previous offsets.
58 * The TOC length and entries are an array of uint32_t values.
59 * The first section after the TOC starts immediately after the TOC.
60 *
61 * 1) This section contains a list of converters. This list contains indexes
62 * into the string table for the converter name. The index of this list is
63 * also used by other sections, which are mentioned later on.
64 * This list is not sorted.
65 *
66 * 2) This section contains a list of tags. This list contains indexes
67 * into the string table for the tag name. The index of this list is
68 * also used by other sections, which are mentioned later on.
69 * This list is in priority order of standards.
70 *
71 * 3) This section contains a list of sorted unique aliases. This
72 * list contains indexes into the string table for the alias name. The
73 * index of this list is also used by other sections, like the 4th section.
74 * The index for the 3rd and 4th section is used to get the
75 * alias -> converter name mapping. Section 3 and 4 form a two column table.
76 * Some of the most significant bits of each index may contain other
77 * information (see findConverter for details).
78 *
79 * 4) This section contains a list of mapped converter names. Consider this
80 * as a table that maps the 3rd section to the 1st section. This list contains
81 * indexes into the 1st section. The index of this list is the same index in
82 * the 3rd section. There is also some extra information in the high bits of
83 * each converter index in this table. Currently it's only used to say that
84 * an alias mapped to this converter is ambiguous. See UCNV_CONVERTER_INDEX_MASK
85 * and UCNV_AMBIGUOUS_ALIAS_MAP_BIT for more information. This section is
86 * the predigested form of the 5th section so that an alias lookup can be fast.
87 *
88 * 5) This section contains a 2D array with indexes to the 6th section. This
89 * section is the full form of all alias mappings. The column index is the
90 * index into the converter list (column header). The row index is the index
91 * to tag list (row header). This 2D array is the top part a 3D array. The
92 * third dimension is in the 6th section.
93 *
94 * 6) This is blob of variable length arrays. Each array starts with a size,
95 * and is followed by indexes to alias names in the string table. This is
96 * the third dimension to the section 5. No other section should be referencing
97 * this section.
98 *
99 * 7) Starting in ICU 3.6, this can be a UConverterAliasOptions struct. Its
100 * presence indicates that a section 9 exists. UConverterAliasOptions specifies
101 * what type of string normalization is used among other potential things in the
102 * future.
103 *
104 * 8) This is the string table. All strings are indexed on an even address.
105 * There are two reasons for this. First many chip architectures locate strings
106 * faster on even address boundaries. Second, since all indexes are 16-bit
107 * numbers, this string table can be 128KB in size instead of 64KB when we
108 * only have strings starting on an even address.
109 *
110 * 9) When present this is a set of prenormalized strings from section 8. This
111 * table contains normalized strings with the dashes and spaces stripped out,
112 * and all strings lowercased. In the future, the options in section 7 may state
113 * other types of normalization.
114 *
115 * Here is the concept of section 5 and 6. It's a 3D cube. Each tag
116 * has a unique alias among all converters. That same alias can
117 * be mentioned in other standards on different converters,
118 * but only one alias per tag can be unique.
119 *
120 *
121 * Converter Names (Usually in TR22 form)
122 * -------------------------------------------.
123 * T / /|
124 * a / / |
125 * g / / |
126 * s / / |
127 * / / |
128 * ------------------------------------------/ |
129 * A | | |
130 * l | | |
131 * i | | /
132 * a | | /
133 * s | | /
134 * e | | /
135 * s | |/
136 * -------------------------------------------
137 *
138 *
139 *
140 * Here is what it really looks like. It's like swiss cheese.
141 * There are holes. Some converters aren't recognized by
142 * a standard, or they are really old converters that the
143 * standard doesn't recognize anymore.
144 *
145 * Converter Names (Usually in TR22 form)
146 * -------------------------------------------.
147 * T /##########################################/|
148 * a / # # /#
149 * g / # ## ## ### # ### ### ### #/
150 * s / # ##### #### ## ## #/#
151 * / ### # # ## # # # ### # # #/##
152 * ------------------------------------------/# #
153 * A |### # # ## # # # ### # # #|# #
154 * l |# # # # # ## # #|# #
155 * i |# # # # # # #|#
156 * a |# #|#
157 * s | #|#
158 * e
159 * s
160 *
161 */
162
163 /**
164 * Used by the UEnumeration API
165 */
166 typedef struct UAliasContext {
167 uint32_t listOffset;
168 uint32_t listIdx;
169 } UAliasContext;
170
171 static const char DATA_NAME[] = "cnvalias";
172 static const char DATA_TYPE[] = "icu";
173
174 static UDataMemory *gAliasData=NULL;
175
176 enum {
177 tocLengthIndex=0,
178 converterListIndex=1,
179 tagListIndex=2,
180 aliasListIndex=3,
181 untaggedConvArrayIndex=4,
182 taggedAliasArrayIndex=5,
183 taggedAliasListsIndex=6,
184 tableOptionsIndex=7,
185 stringTableIndex=8,
186 normalizedStringTableIndex=9,
187 offsetsCount, /* length of the swapper's temporary offsets[] */
188 minTocLength=8 /* min. tocLength in the file, does not count the tocLengthIndex! */
189 };
190
191 static const UConverterAliasOptions defaultTableOptions = {
192 UCNV_IO_UNNORMALIZED,
193 0 /* containsCnvOptionInfo */
194 };
195 static UConverterAlias gMainTable;
196
197 #define GET_STRING(idx) (const char *)(gMainTable.stringTable + (idx))
198 #define GET_NORMALIZED_STRING(idx) (const char *)(gMainTable.normalizedStringTable + (idx))
199
200 static UBool U_CALLCONV
isAcceptable(void * context,const char * type,const char * name,const UDataInfo * pInfo)201 isAcceptable(void *context,
202 const char *type, const char *name,
203 const UDataInfo *pInfo) {
204 return (UBool)(
205 pInfo->size>=20 &&
206 pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
207 pInfo->charsetFamily==U_CHARSET_FAMILY &&
208 pInfo->dataFormat[0]==0x43 && /* dataFormat="CvAl" */
209 pInfo->dataFormat[1]==0x76 &&
210 pInfo->dataFormat[2]==0x41 &&
211 pInfo->dataFormat[3]==0x6c &&
212 pInfo->formatVersion[0]==3);
213 }
214
ucnv_io_cleanup(void)215 static UBool U_CALLCONV ucnv_io_cleanup(void)
216 {
217 if (gAliasData) {
218 udata_close(gAliasData);
219 gAliasData = NULL;
220 }
221
222 uprv_memset(&gMainTable, 0, sizeof(gMainTable));
223
224 return TRUE; /* Everything was cleaned up */
225 }
226
227 static UBool
haveAliasData(UErrorCode * pErrorCode)228 haveAliasData(UErrorCode *pErrorCode) {
229 int needInit;
230
231 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
232 return FALSE;
233 }
234
235 UMTX_CHECK(NULL, (gAliasData==NULL), needInit);
236
237 /* load converter alias data from file if necessary */
238 if (needInit) {
239 UDataMemory *data;
240 const uint16_t *table;
241 const uint32_t *sectionSizes;
242 uint32_t tableStart;
243 uint32_t currOffset;
244
245 data = udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, pErrorCode);
246 if(U_FAILURE(*pErrorCode)) {
247 return FALSE;
248 }
249
250 sectionSizes = (const uint32_t *)udata_getMemory(data);
251 table = (const uint16_t *)sectionSizes;
252
253 tableStart = sectionSizes[0];
254 if (tableStart < minTocLength) {
255 *pErrorCode = U_INVALID_FORMAT_ERROR;
256 udata_close(data);
257 return FALSE;
258 }
259
260 umtx_lock(NULL);
261 if(gAliasData==NULL) {
262 gAliasData = data;
263 data=NULL;
264
265 gMainTable.converterListSize = sectionSizes[1];
266 gMainTable.tagListSize = sectionSizes[2];
267 gMainTable.aliasListSize = sectionSizes[3];
268 gMainTable.untaggedConvArraySize = sectionSizes[4];
269 gMainTable.taggedAliasArraySize = sectionSizes[5];
270 gMainTable.taggedAliasListsSize = sectionSizes[6];
271 gMainTable.optionTableSize = sectionSizes[7];
272 gMainTable.stringTableSize = sectionSizes[8];
273
274 if (tableStart > 8) {
275 gMainTable.normalizedStringTableSize = sectionSizes[9];
276 }
277
278 currOffset = tableStart * (sizeof(uint32_t)/sizeof(uint16_t)) + (sizeof(uint32_t)/sizeof(uint16_t));
279 gMainTable.converterList = table + currOffset;
280
281 currOffset += gMainTable.converterListSize;
282 gMainTable.tagList = table + currOffset;
283
284 currOffset += gMainTable.tagListSize;
285 gMainTable.aliasList = table + currOffset;
286
287 currOffset += gMainTable.aliasListSize;
288 gMainTable.untaggedConvArray = table + currOffset;
289
290 currOffset += gMainTable.untaggedConvArraySize;
291 gMainTable.taggedAliasArray = table + currOffset;
292
293 /* aliasLists is a 1's based array, but it has a padding character */
294 currOffset += gMainTable.taggedAliasArraySize;
295 gMainTable.taggedAliasLists = table + currOffset;
296
297 currOffset += gMainTable.taggedAliasListsSize;
298 if (gMainTable.optionTableSize > 0
299 && ((const UConverterAliasOptions *)(table + currOffset))->stringNormalizationType < UCNV_IO_NORM_TYPE_COUNT)
300 {
301 /* Faster table */
302 gMainTable.optionTable = (const UConverterAliasOptions *)(table + currOffset);
303 }
304 else {
305 /* Smaller table, or I can't handle this normalization mode!
306 Use the original slower table lookup. */
307 gMainTable.optionTable = &defaultTableOptions;
308 }
309
310 currOffset += gMainTable.optionTableSize;
311 gMainTable.stringTable = table + currOffset;
312
313 currOffset += gMainTable.stringTableSize;
314 gMainTable.normalizedStringTable = ((gMainTable.optionTable->stringNormalizationType == UCNV_IO_UNNORMALIZED)
315 ? gMainTable.stringTable : (table + currOffset));
316
317 ucln_common_registerCleanup(UCLN_COMMON_UCNV_IO, ucnv_io_cleanup);
318 }
319 umtx_unlock(NULL);
320
321 /* if a different thread set it first, then close the extra data */
322 if(data!=NULL) {
323 udata_close(data); /* NULL if it was set correctly */
324 }
325 }
326
327 return TRUE;
328 }
329
330 static U_INLINE UBool
isAlias(const char * alias,UErrorCode * pErrorCode)331 isAlias(const char *alias, UErrorCode *pErrorCode) {
332 if(alias==NULL) {
333 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
334 return FALSE;
335 }
336 return (UBool)(*alias!=0);
337 }
338
getTagNumber(const char * tagname)339 static uint32_t getTagNumber(const char *tagname) {
340 if (gMainTable.tagList) {
341 uint32_t tagNum;
342 for (tagNum = 0; tagNum < gMainTable.tagListSize; tagNum++) {
343 if (!uprv_stricmp(GET_STRING(gMainTable.tagList[tagNum]), tagname)) {
344 return tagNum;
345 }
346 }
347 }
348
349 return UINT32_MAX;
350 }
351
352 /* character types relevant for ucnv_compareNames() */
353 enum {
354 IGNORE,
355 ZERO,
356 NONZERO,
357 MINLETTER /* any values from here on are lowercase letter mappings */
358 };
359
360 /* character types for ASCII 00..7F */
361 static const uint8_t asciiTypes[128] = {
362 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
363 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
364 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
365 ZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, 0, 0, 0, 0, 0, 0,
366 0, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
367 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0, 0, 0, 0, 0,
368 0, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
369 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0, 0, 0, 0, 0
370 };
371
372 #define GET_ASCII_TYPE(c) ((int8_t)(c) >= 0 ? asciiTypes[(uint8_t)c] : (uint8_t)IGNORE)
373
374 /* character types for EBCDIC 80..FF */
375 static const uint8_t ebcdicTypes[128] = {
376 0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0, 0, 0, 0, 0, 0,
377 0, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0, 0, 0, 0, 0, 0,
378 0, 0, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0, 0, 0, 0, 0, 0,
379 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
380 0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0, 0, 0, 0, 0, 0,
381 0, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0, 0, 0, 0, 0, 0,
382 0, 0, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0, 0, 0, 0, 0, 0,
383 ZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, 0, 0, 0, 0, 0, 0
384 };
385
386 #define GET_EBCDIC_TYPE(c) ((int8_t)(c) < 0 ? ebcdicTypes[(c)&0x7f] : (uint8_t)IGNORE)
387
388 #if U_CHARSET_FAMILY==U_ASCII_FAMILY
389 # define GET_CHAR_TYPE(c) GET_ASCII_TYPE(c)
390 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
391 # define GET_CHAR_TYPE(c) GET_EBCDIC_TYPE(c)
392 #else
393 # error U_CHARSET_FAMILY is not valid
394 #endif
395
396 /* @see ucnv_compareNames */
397 U_CFUNC char * U_EXPORT2
ucnv_io_stripASCIIForCompare(char * dst,const char * name)398 ucnv_io_stripASCIIForCompare(char *dst, const char *name) {
399 char *dstItr = dst;
400 uint8_t type, nextType;
401 char c1;
402 UBool afterDigit = FALSE;
403
404 while ((c1 = *name++) != 0) {
405 type = GET_ASCII_TYPE(c1);
406 switch (type) {
407 case IGNORE:
408 afterDigit = FALSE;
409 continue; /* ignore all but letters and digits */
410 case ZERO:
411 if (!afterDigit) {
412 nextType = GET_ASCII_TYPE(*name);
413 if (nextType == ZERO || nextType == NONZERO) {
414 continue; /* ignore leading zero before another digit */
415 }
416 }
417 break;
418 case NONZERO:
419 afterDigit = TRUE;
420 break;
421 default:
422 c1 = (char)type; /* lowercased letter */
423 afterDigit = FALSE;
424 break;
425 }
426 *dstItr++ = c1;
427 }
428 *dstItr = 0;
429 return dst;
430 }
431
432 U_CFUNC char * U_EXPORT2
ucnv_io_stripEBCDICForCompare(char * dst,const char * name)433 ucnv_io_stripEBCDICForCompare(char *dst, const char *name) {
434 char *dstItr = dst;
435 uint8_t type, nextType;
436 char c1;
437 UBool afterDigit = FALSE;
438
439 while ((c1 = *name++) != 0) {
440 type = GET_EBCDIC_TYPE(c1);
441 switch (type) {
442 case IGNORE:
443 afterDigit = FALSE;
444 continue; /* ignore all but letters and digits */
445 case ZERO:
446 if (!afterDigit) {
447 nextType = GET_EBCDIC_TYPE(*name);
448 if (nextType == ZERO || nextType == NONZERO) {
449 continue; /* ignore leading zero before another digit */
450 }
451 }
452 break;
453 case NONZERO:
454 afterDigit = TRUE;
455 break;
456 default:
457 c1 = (char)type; /* lowercased letter */
458 afterDigit = FALSE;
459 break;
460 }
461 *dstItr++ = c1;
462 }
463 *dstItr = 0;
464 return dst;
465 }
466
467 /**
468 * Do a fuzzy compare of two converter/alias names.
469 * The comparison is case-insensitive, ignores leading zeroes if they are not
470 * followed by further digits, and ignores all but letters and digits.
471 * Thus the strings "UTF-8", "utf_8", "u*T@f08" and "Utf 8" are exactly equivalent.
472 * See section 1.4, Charset Alias Matching in Unicode Technical Standard #22
473 * at http://www.unicode.org/reports/tr22/
474 *
475 * This is a symmetrical (commutative) operation; order of arguments
476 * is insignificant. This is an important property for sorting the
477 * list (when the list is preprocessed into binary form) and for
478 * performing binary searches on it at run time.
479 *
480 * @param name1 a converter name or alias, zero-terminated
481 * @param name2 a converter name or alias, zero-terminated
482 * @return 0 if the names match, or a negative value if the name1
483 * lexically precedes name2, or a positive value if the name1
484 * lexically follows name2.
485 *
486 * @see ucnv_io_stripForCompare
487 */
488 U_CAPI int U_EXPORT2
ucnv_compareNames(const char * name1,const char * name2)489 ucnv_compareNames(const char *name1, const char *name2) {
490 int rc;
491 uint8_t type, nextType;
492 char c1, c2;
493 UBool afterDigit1 = FALSE, afterDigit2 = FALSE;
494
495 for (;;) {
496 while ((c1 = *name1++) != 0) {
497 type = GET_CHAR_TYPE(c1);
498 switch (type) {
499 case IGNORE:
500 afterDigit1 = FALSE;
501 continue; /* ignore all but letters and digits */
502 case ZERO:
503 if (!afterDigit1) {
504 nextType = GET_CHAR_TYPE(*name1);
505 if (nextType == ZERO || nextType == NONZERO) {
506 continue; /* ignore leading zero before another digit */
507 }
508 }
509 break;
510 case NONZERO:
511 afterDigit1 = TRUE;
512 break;
513 default:
514 c1 = (char)type; /* lowercased letter */
515 afterDigit1 = FALSE;
516 break;
517 }
518 break; /* deliver c1 */
519 }
520 while ((c2 = *name2++) != 0) {
521 type = GET_CHAR_TYPE(c2);
522 switch (type) {
523 case IGNORE:
524 afterDigit2 = FALSE;
525 continue; /* ignore all but letters and digits */
526 case ZERO:
527 if (!afterDigit2) {
528 nextType = GET_CHAR_TYPE(*name2);
529 if (nextType == ZERO || nextType == NONZERO) {
530 continue; /* ignore leading zero before another digit */
531 }
532 }
533 break;
534 case NONZERO:
535 afterDigit2 = TRUE;
536 break;
537 default:
538 c2 = (char)type; /* lowercased letter */
539 afterDigit2 = FALSE;
540 break;
541 }
542 break; /* deliver c2 */
543 }
544
545 /* If we reach the ends of both strings then they match */
546 if ((c1|c2)==0) {
547 return 0;
548 }
549
550 /* Case-insensitive comparison */
551 rc = (int)(unsigned char)c1 - (int)(unsigned char)c2;
552 if (rc != 0) {
553 return rc;
554 }
555 }
556 }
557
558 /*
559 * search for an alias
560 * return the converter number index for gConverterList
561 */
562 static U_INLINE uint32_t
findConverter(const char * alias,UBool * containsOption,UErrorCode * pErrorCode)563 findConverter(const char *alias, UBool *containsOption, UErrorCode *pErrorCode) {
564 uint32_t mid, start, limit;
565 uint32_t lastMid;
566 int result;
567 int isUnnormalized = (gMainTable.optionTable->stringNormalizationType == UCNV_IO_UNNORMALIZED);
568 char strippedName[UCNV_MAX_CONVERTER_NAME_LENGTH];
569
570 if (!isUnnormalized) {
571 if (uprv_strlen(alias) >= UCNV_MAX_CONVERTER_NAME_LENGTH) {
572 *pErrorCode = U_BUFFER_OVERFLOW_ERROR;
573 return UINT32_MAX;
574 }
575
576 /* Lower case and remove ignoreable characters. */
577 ucnv_io_stripForCompare(strippedName, alias);
578 alias = strippedName;
579 }
580
581 /* do a binary search for the alias */
582 start = 0;
583 limit = gMainTable.untaggedConvArraySize;
584 mid = limit;
585 lastMid = UINT32_MAX;
586
587 for (;;) {
588 mid = (uint32_t)((start + limit) / 2);
589 if (lastMid == mid) { /* Have we moved? */
590 break; /* We haven't moved, and it wasn't found. */
591 }
592 lastMid = mid;
593 if (isUnnormalized) {
594 result = ucnv_compareNames(alias, GET_STRING(gMainTable.aliasList[mid]));
595 }
596 else {
597 result = uprv_strcmp(alias, GET_NORMALIZED_STRING(gMainTable.aliasList[mid]));
598 }
599
600 if (result < 0) {
601 limit = mid;
602 } else if (result > 0) {
603 start = mid;
604 } else {
605 /* Since the gencnval tool folds duplicates into one entry,
606 * this alias in gAliasList is unique, but different standards
607 * may map an alias to different converters.
608 */
609 if (gMainTable.untaggedConvArray[mid] & UCNV_AMBIGUOUS_ALIAS_MAP_BIT) {
610 *pErrorCode = U_AMBIGUOUS_ALIAS_WARNING;
611 }
612 /* State whether the canonical converter name contains an option.
613 This information is contained in this list in order to maintain backward & forward compatibility. */
614 if (containsOption) {
615 UBool containsCnvOptionInfo = (UBool)gMainTable.optionTable->containsCnvOptionInfo;
616 *containsOption = (UBool)((containsCnvOptionInfo
617 && ((gMainTable.untaggedConvArray[mid] & UCNV_CONTAINS_OPTION_BIT) != 0))
618 || !containsCnvOptionInfo);
619 }
620 return gMainTable.untaggedConvArray[mid] & UCNV_CONVERTER_INDEX_MASK;
621 }
622 }
623
624 return UINT32_MAX;
625 }
626
627 /*
628 * Is this alias in this list?
629 * alias and listOffset should be non-NULL.
630 */
631 static U_INLINE UBool
isAliasInList(const char * alias,uint32_t listOffset)632 isAliasInList(const char *alias, uint32_t listOffset) {
633 if (listOffset) {
634 uint32_t currAlias;
635 uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
636 /* +1 to skip listCount */
637 const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
638 for (currAlias = 0; currAlias < listCount; currAlias++) {
639 if (currList[currAlias]
640 && ucnv_compareNames(alias, GET_STRING(currList[currAlias]))==0)
641 {
642 return TRUE;
643 }
644 }
645 }
646 return FALSE;
647 }
648
649 /*
650 * Search for an standard name of an alias (what is the default name
651 * that this standard uses?)
652 * return the listOffset for gTaggedAliasLists. If it's 0,
653 * the it couldn't be found, but the parameters are valid.
654 */
655 static uint32_t
findTaggedAliasListsOffset(const char * alias,const char * standard,UErrorCode * pErrorCode)656 findTaggedAliasListsOffset(const char *alias, const char *standard, UErrorCode *pErrorCode) {
657 uint32_t idx;
658 uint32_t listOffset;
659 uint32_t convNum;
660 UErrorCode myErr = U_ZERO_ERROR;
661 uint32_t tagNum = getTagNumber(standard);
662
663 /* Make a quick guess. Hopefully they used a TR22 canonical alias. */
664 convNum = findConverter(alias, NULL, &myErr);
665 if (myErr != U_ZERO_ERROR) {
666 *pErrorCode = myErr;
667 }
668
669 if (tagNum < (gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) && convNum < gMainTable.converterListSize) {
670 listOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + convNum];
671 if (listOffset && gMainTable.taggedAliasLists[listOffset + 1]) {
672 return listOffset;
673 }
674 if (myErr == U_AMBIGUOUS_ALIAS_WARNING) {
675 /* Uh Oh! They used an ambiguous alias.
676 We have to search the whole swiss cheese starting
677 at the highest standard affinity.
678 This may take a while.
679 */
680 for (idx = 0; idx < gMainTable.taggedAliasArraySize; idx++) {
681 listOffset = gMainTable.taggedAliasArray[idx];
682 if (listOffset && isAliasInList(alias, listOffset)) {
683 uint32_t currTagNum = idx/gMainTable.converterListSize;
684 uint32_t currConvNum = (idx - currTagNum*gMainTable.converterListSize);
685 uint32_t tempListOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + currConvNum];
686 if (tempListOffset && gMainTable.taggedAliasLists[tempListOffset + 1]) {
687 return tempListOffset;
688 }
689 /* else keep on looking */
690 /* We could speed this up by starting on the next row
691 because an alias is unique per row, right now.
692 This would change if alias versioning appears. */
693 }
694 }
695 /* The standard doesn't know about the alias */
696 }
697 /* else no default name */
698 return 0;
699 }
700 /* else converter or tag not found */
701
702 return UINT32_MAX;
703 }
704
705 /* Return the canonical name */
706 static uint32_t
findTaggedConverterNum(const char * alias,const char * standard,UErrorCode * pErrorCode)707 findTaggedConverterNum(const char *alias, const char *standard, UErrorCode *pErrorCode) {
708 uint32_t idx;
709 uint32_t listOffset;
710 uint32_t convNum;
711 UErrorCode myErr = U_ZERO_ERROR;
712 uint32_t tagNum = getTagNumber(standard);
713
714 /* Make a quick guess. Hopefully they used a TR22 canonical alias. */
715 convNum = findConverter(alias, NULL, &myErr);
716 if (myErr != U_ZERO_ERROR) {
717 *pErrorCode = myErr;
718 }
719
720 if (tagNum < (gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) && convNum < gMainTable.converterListSize) {
721 listOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + convNum];
722 if (listOffset && isAliasInList(alias, listOffset)) {
723 return convNum;
724 }
725 if (myErr == U_AMBIGUOUS_ALIAS_WARNING) {
726 /* Uh Oh! They used an ambiguous alias.
727 We have to search one slice of the swiss cheese.
728 We search only in the requested tag, not the whole thing.
729 This may take a while.
730 */
731 uint32_t convStart = (tagNum)*gMainTable.converterListSize;
732 uint32_t convLimit = (tagNum+1)*gMainTable.converterListSize;
733 for (idx = convStart; idx < convLimit; idx++) {
734 listOffset = gMainTable.taggedAliasArray[idx];
735 if (listOffset && isAliasInList(alias, listOffset)) {
736 return idx-convStart;
737 }
738 }
739 /* The standard doesn't know about the alias */
740 }
741 /* else no canonical name */
742 }
743 /* else converter or tag not found */
744
745 return UINT32_MAX;
746 }
747
748
749
750 U_CFUNC const char *
ucnv_io_getConverterName(const char * alias,UBool * containsOption,UErrorCode * pErrorCode)751 ucnv_io_getConverterName(const char *alias, UBool *containsOption, UErrorCode *pErrorCode) {
752 if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
753 uint32_t convNum = findConverter(alias, containsOption, pErrorCode);
754 if (convNum < gMainTable.converterListSize) {
755 return GET_STRING(gMainTable.converterList[convNum]);
756 }
757 /* else converter not found */
758 }
759 return NULL;
760 }
761
762 static int32_t U_CALLCONV
ucnv_io_countStandardAliases(UEnumeration * enumerator,UErrorCode * pErrorCode)763 ucnv_io_countStandardAliases(UEnumeration *enumerator, UErrorCode *pErrorCode) {
764 int32_t value = 0;
765 UAliasContext *myContext = (UAliasContext *)(enumerator->context);
766 uint32_t listOffset = myContext->listOffset;
767
768 if (listOffset) {
769 value = gMainTable.taggedAliasLists[listOffset];
770 }
771 return value;
772 }
773
774 static const char* U_CALLCONV
ucnv_io_nextStandardAliases(UEnumeration * enumerator,int32_t * resultLength,UErrorCode * pErrorCode)775 ucnv_io_nextStandardAliases(UEnumeration *enumerator,
776 int32_t* resultLength,
777 UErrorCode *pErrorCode)
778 {
779 UAliasContext *myContext = (UAliasContext *)(enumerator->context);
780 uint32_t listOffset = myContext->listOffset;
781
782 if (listOffset) {
783 uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
784 const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
785
786 if (myContext->listIdx < listCount) {
787 const char *myStr = GET_STRING(currList[myContext->listIdx++]);
788 if (resultLength) {
789 *resultLength = (int32_t)uprv_strlen(myStr);
790 }
791 return myStr;
792 }
793 }
794 /* Either we accessed a zero length list, or we enumerated too far. */
795 if (resultLength) {
796 *resultLength = 0;
797 }
798 return NULL;
799 }
800
801 static void U_CALLCONV
ucnv_io_resetStandardAliases(UEnumeration * enumerator,UErrorCode * pErrorCode)802 ucnv_io_resetStandardAliases(UEnumeration *enumerator, UErrorCode *pErrorCode) {
803 ((UAliasContext *)(enumerator->context))->listIdx = 0;
804 }
805
806 static void U_CALLCONV
ucnv_io_closeUEnumeration(UEnumeration * enumerator)807 ucnv_io_closeUEnumeration(UEnumeration *enumerator) {
808 uprv_free(enumerator->context);
809 uprv_free(enumerator);
810 }
811
812 /* Enumerate the aliases for the specified converter and standard tag */
813 static const UEnumeration gEnumAliases = {
814 NULL,
815 NULL,
816 ucnv_io_closeUEnumeration,
817 ucnv_io_countStandardAliases,
818 uenum_unextDefault,
819 ucnv_io_nextStandardAliases,
820 ucnv_io_resetStandardAliases
821 };
822
823 U_CAPI UEnumeration * U_EXPORT2
ucnv_openStandardNames(const char * convName,const char * standard,UErrorCode * pErrorCode)824 ucnv_openStandardNames(const char *convName,
825 const char *standard,
826 UErrorCode *pErrorCode)
827 {
828 UEnumeration *myEnum = NULL;
829 if (haveAliasData(pErrorCode) && isAlias(convName, pErrorCode)) {
830 uint32_t listOffset = findTaggedAliasListsOffset(convName, standard, pErrorCode);
831
832 /* When listOffset == 0, we want to acknowledge that the
833 converter name and standard are okay, but there
834 is nothing to enumerate. */
835 if (listOffset < gMainTable.taggedAliasListsSize) {
836 UAliasContext *myContext;
837
838 myEnum = uprv_malloc(sizeof(UEnumeration));
839 if (myEnum == NULL) {
840 *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
841 return NULL;
842 }
843 uprv_memcpy(myEnum, &gEnumAliases, sizeof(UEnumeration));
844 myContext = uprv_malloc(sizeof(UAliasContext));
845 if (myContext == NULL) {
846 *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
847 uprv_free(myEnum);
848 return NULL;
849 }
850 myContext->listOffset = listOffset;
851 myContext->listIdx = 0;
852 myEnum->context = myContext;
853 }
854 /* else converter or tag not found */
855 }
856 return myEnum;
857 }
858
859 static uint16_t
ucnv_io_countAliases(const char * alias,UErrorCode * pErrorCode)860 ucnv_io_countAliases(const char *alias, UErrorCode *pErrorCode) {
861 if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
862 uint32_t convNum = findConverter(alias, NULL, pErrorCode);
863 if (convNum < gMainTable.converterListSize) {
864 /* tagListNum - 1 is the ALL tag */
865 int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum];
866
867 if (listOffset) {
868 return gMainTable.taggedAliasLists[listOffset];
869 }
870 /* else this shouldn't happen. internal program error */
871 }
872 /* else converter not found */
873 }
874 return 0;
875 }
876
877 static uint16_t
ucnv_io_getAliases(const char * alias,uint16_t start,const char ** aliases,UErrorCode * pErrorCode)878 ucnv_io_getAliases(const char *alias, uint16_t start, const char **aliases, UErrorCode *pErrorCode) {
879 if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
880 uint32_t currAlias;
881 uint32_t convNum = findConverter(alias, NULL, pErrorCode);
882 if (convNum < gMainTable.converterListSize) {
883 /* tagListNum - 1 is the ALL tag */
884 int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum];
885
886 if (listOffset) {
887 uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
888 /* +1 to skip listCount */
889 const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
890
891 for (currAlias = start; currAlias < listCount; currAlias++) {
892 aliases[currAlias] = GET_STRING(currList[currAlias]);
893 }
894 }
895 /* else this shouldn't happen. internal program error */
896 }
897 /* else converter not found */
898 }
899 return 0;
900 }
901
902 static const char *
ucnv_io_getAlias(const char * alias,uint16_t n,UErrorCode * pErrorCode)903 ucnv_io_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode) {
904 if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
905 uint32_t convNum = findConverter(alias, NULL, pErrorCode);
906 if (convNum < gMainTable.converterListSize) {
907 /* tagListNum - 1 is the ALL tag */
908 int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum];
909
910 if (listOffset) {
911 uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
912 /* +1 to skip listCount */
913 const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
914
915 if (n < listCount) {
916 return GET_STRING(currList[n]);
917 }
918 *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
919 }
920 /* else this shouldn't happen. internal program error */
921 }
922 /* else converter not found */
923 }
924 return NULL;
925 }
926
927 static uint16_t
ucnv_io_countStandards(UErrorCode * pErrorCode)928 ucnv_io_countStandards(UErrorCode *pErrorCode) {
929 if (haveAliasData(pErrorCode)) {
930 /* Don't include the empty list */
931 return (uint16_t)(gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS);
932 }
933
934 return 0;
935 }
936
937 U_CAPI const char * U_EXPORT2
ucnv_getStandard(uint16_t n,UErrorCode * pErrorCode)938 ucnv_getStandard(uint16_t n, UErrorCode *pErrorCode) {
939 if (haveAliasData(pErrorCode)) {
940 if (n < gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) {
941 return GET_STRING(gMainTable.tagList[n]);
942 }
943 *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
944 }
945
946 return NULL;
947 }
948
949 U_CAPI const char * U_EXPORT2
ucnv_getStandardName(const char * alias,const char * standard,UErrorCode * pErrorCode)950 ucnv_getStandardName(const char *alias, const char *standard, UErrorCode *pErrorCode) {
951 if (haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
952 uint32_t listOffset = findTaggedAliasListsOffset(alias, standard, pErrorCode);
953
954 if (0 < listOffset && listOffset < gMainTable.taggedAliasListsSize) {
955 const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
956
957 /* Get the preferred name from this list */
958 if (currList[0]) {
959 return GET_STRING(currList[0]);
960 }
961 /* else someone screwed up the alias table. */
962 /* *pErrorCode = U_INVALID_FORMAT_ERROR */
963 }
964 }
965
966 return NULL;
967 }
968
969 U_CAPI uint16_t U_EXPORT2
ucnv_countAliases(const char * alias,UErrorCode * pErrorCode)970 ucnv_countAliases(const char *alias, UErrorCode *pErrorCode)
971 {
972 return ucnv_io_countAliases(alias, pErrorCode);
973 }
974
975
976 U_CAPI const char* U_EXPORT2
ucnv_getAlias(const char * alias,uint16_t n,UErrorCode * pErrorCode)977 ucnv_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode)
978 {
979 return ucnv_io_getAlias(alias, n, pErrorCode);
980 }
981
982 U_CAPI void U_EXPORT2
ucnv_getAliases(const char * alias,const char ** aliases,UErrorCode * pErrorCode)983 ucnv_getAliases(const char *alias, const char **aliases, UErrorCode *pErrorCode)
984 {
985 ucnv_io_getAliases(alias, 0, aliases, pErrorCode);
986 }
987
988 U_CAPI uint16_t U_EXPORT2
ucnv_countStandards(void)989 ucnv_countStandards(void)
990 {
991 UErrorCode err = U_ZERO_ERROR;
992 return ucnv_io_countStandards(&err);
993 }
994
995 U_CAPI const char * U_EXPORT2
ucnv_getCanonicalName(const char * alias,const char * standard,UErrorCode * pErrorCode)996 ucnv_getCanonicalName(const char *alias, const char *standard, UErrorCode *pErrorCode) {
997 if (haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
998 uint32_t convNum = findTaggedConverterNum(alias, standard, pErrorCode);
999
1000 if (convNum < gMainTable.converterListSize) {
1001 return GET_STRING(gMainTable.converterList[convNum]);
1002 }
1003 }
1004
1005 return NULL;
1006 }
1007
1008 static int32_t U_CALLCONV
ucnv_io_countAllConverters(UEnumeration * enumerator,UErrorCode * pErrorCode)1009 ucnv_io_countAllConverters(UEnumeration *enumerator, UErrorCode *pErrorCode) {
1010 return gMainTable.converterListSize;
1011 }
1012
1013 static const char* U_CALLCONV
ucnv_io_nextAllConverters(UEnumeration * enumerator,int32_t * resultLength,UErrorCode * pErrorCode)1014 ucnv_io_nextAllConverters(UEnumeration *enumerator,
1015 int32_t* resultLength,
1016 UErrorCode *pErrorCode)
1017 {
1018 uint16_t *myContext = (uint16_t *)(enumerator->context);
1019
1020 if (*myContext < gMainTable.converterListSize) {
1021 const char *myStr = GET_STRING(gMainTable.converterList[(*myContext)++]);
1022 if (resultLength) {
1023 *resultLength = (int32_t)uprv_strlen(myStr);
1024 }
1025 return myStr;
1026 }
1027 /* Either we accessed a zero length list, or we enumerated too far. */
1028 if (resultLength) {
1029 *resultLength = 0;
1030 }
1031 return NULL;
1032 }
1033
1034 static void U_CALLCONV
ucnv_io_resetAllConverters(UEnumeration * enumerator,UErrorCode * pErrorCode)1035 ucnv_io_resetAllConverters(UEnumeration *enumerator, UErrorCode *pErrorCode) {
1036 *((uint16_t *)(enumerator->context)) = 0;
1037 }
1038
1039 static const UEnumeration gEnumAllConverters = {
1040 NULL,
1041 NULL,
1042 ucnv_io_closeUEnumeration,
1043 ucnv_io_countAllConverters,
1044 uenum_unextDefault,
1045 ucnv_io_nextAllConverters,
1046 ucnv_io_resetAllConverters
1047 };
1048
1049 U_CAPI UEnumeration * U_EXPORT2
ucnv_openAllNames(UErrorCode * pErrorCode)1050 ucnv_openAllNames(UErrorCode *pErrorCode) {
1051 UEnumeration *myEnum = NULL;
1052 if (haveAliasData(pErrorCode)) {
1053 uint16_t *myContext;
1054
1055 myEnum = uprv_malloc(sizeof(UEnumeration));
1056 if (myEnum == NULL) {
1057 *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
1058 return NULL;
1059 }
1060 uprv_memcpy(myEnum, &gEnumAllConverters, sizeof(UEnumeration));
1061 myContext = uprv_malloc(sizeof(uint16_t));
1062 if (myContext == NULL) {
1063 *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
1064 uprv_free(myEnum);
1065 return NULL;
1066 }
1067 *myContext = 0;
1068 myEnum->context = myContext;
1069 }
1070 return myEnum;
1071 }
1072
1073 U_CFUNC uint16_t
ucnv_io_countKnownConverters(UErrorCode * pErrorCode)1074 ucnv_io_countKnownConverters(UErrorCode *pErrorCode) {
1075 if (haveAliasData(pErrorCode)) {
1076 return (uint16_t)gMainTable.converterListSize;
1077 }
1078 return 0;
1079 }
1080
1081 /* alias table swapping ----------------------------------------------------- */
1082
1083 typedef char * U_CALLCONV StripForCompareFn(char *dst, const char *name);
1084
1085 /*
1086 * row of a temporary array
1087 *
1088 * gets platform-endian charset string indexes and sorting indexes;
1089 * after sorting this array by strings, the actual arrays are permutated
1090 * according to the sorting indexes
1091 */
1092 typedef struct TempRow {
1093 uint16_t strIndex, sortIndex;
1094 } TempRow;
1095
1096 typedef struct TempAliasTable {
1097 const char *chars;
1098 TempRow *rows;
1099 uint16_t *resort;
1100 StripForCompareFn *stripForCompare;
1101 } TempAliasTable;
1102
1103 enum {
1104 STACK_ROW_CAPACITY=500
1105 };
1106
1107 static int32_t
io_compareRows(const void * context,const void * left,const void * right)1108 io_compareRows(const void *context, const void *left, const void *right) {
1109 char strippedLeft[UCNV_MAX_CONVERTER_NAME_LENGTH],
1110 strippedRight[UCNV_MAX_CONVERTER_NAME_LENGTH];
1111
1112 TempAliasTable *tempTable=(TempAliasTable *)context;
1113 const char *chars=tempTable->chars;
1114
1115 return (int32_t)uprv_strcmp(tempTable->stripForCompare(strippedLeft, chars+2*((const TempRow *)left)->strIndex),
1116 tempTable->stripForCompare(strippedRight, chars+2*((const TempRow *)right)->strIndex));
1117 }
1118
1119 U_CAPI int32_t U_EXPORT2
ucnv_swapAliases(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)1120 ucnv_swapAliases(const UDataSwapper *ds,
1121 const void *inData, int32_t length, void *outData,
1122 UErrorCode *pErrorCode) {
1123 const UDataInfo *pInfo;
1124 int32_t headerSize;
1125
1126 const uint16_t *inTable;
1127 const uint32_t *inSectionSizes;
1128 uint32_t toc[offsetsCount];
1129 uint32_t offsets[offsetsCount]; /* 16-bit-addressed offsets from inTable/outTable */
1130 uint32_t i, count, tocLength, topOffset;
1131
1132 TempRow rows[STACK_ROW_CAPACITY];
1133 uint16_t resort[STACK_ROW_CAPACITY];
1134 TempAliasTable tempTable;
1135
1136 /* udata_swapDataHeader checks the arguments */
1137 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
1138 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1139 return 0;
1140 }
1141
1142 /* check data format and format version */
1143 pInfo=(const UDataInfo *)((const char *)inData+4);
1144 if(!(
1145 pInfo->dataFormat[0]==0x43 && /* dataFormat="CvAl" */
1146 pInfo->dataFormat[1]==0x76 &&
1147 pInfo->dataFormat[2]==0x41 &&
1148 pInfo->dataFormat[3]==0x6c &&
1149 pInfo->formatVersion[0]==3
1150 )) {
1151 udata_printError(ds, "ucnv_swapAliases(): data format %02x.%02x.%02x.%02x (format version %02x) is not an alias table\n",
1152 pInfo->dataFormat[0], pInfo->dataFormat[1],
1153 pInfo->dataFormat[2], pInfo->dataFormat[3],
1154 pInfo->formatVersion[0]);
1155 *pErrorCode=U_UNSUPPORTED_ERROR;
1156 return 0;
1157 }
1158
1159 /* an alias table must contain at least the table of contents array */
1160 if(length>=0 && (length-headerSize)<4*(1+minTocLength)) {
1161 udata_printError(ds, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n",
1162 length-headerSize);
1163 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
1164 return 0;
1165 }
1166
1167 inSectionSizes=(const uint32_t *)((const char *)inData+headerSize);
1168 inTable=(const uint16_t *)inSectionSizes;
1169 uprv_memset(toc, 0, sizeof(toc));
1170 toc[tocLengthIndex]=tocLength=ds->readUInt32(inSectionSizes[tocLengthIndex]);
1171 if(tocLength<minTocLength || offsetsCount<=tocLength) {
1172 udata_printError(ds, "ucnv_swapAliases(): table of contents contains unsupported number of sections (%u sections)\n", tocLength);
1173 *pErrorCode=U_INVALID_FORMAT_ERROR;
1174 return 0;
1175 }
1176
1177 /* read the known part of the table of contents */
1178 for(i=converterListIndex; i<=tocLength; ++i) {
1179 toc[i]=ds->readUInt32(inSectionSizes[i]);
1180 }
1181
1182 /* compute offsets */
1183 uprv_memset(offsets, 0, sizeof(offsets));
1184 offsets[converterListIndex]=2*(1+tocLength); /* count two 16-bit units per toc entry */
1185 for(i=tagListIndex; i<=tocLength; ++i) {
1186 offsets[i]=offsets[i-1]+toc[i-1];
1187 }
1188
1189 /* compute the overall size of the after-header data, in numbers of 16-bit units */
1190 topOffset=offsets[i-1]+toc[i-1];
1191
1192 if(length>=0) {
1193 uint16_t *outTable;
1194 const uint16_t *p, *p2;
1195 uint16_t *q, *q2;
1196 uint16_t oldIndex;
1197
1198 if((length-headerSize)<(2*(int32_t)topOffset)) {
1199 udata_printError(ds, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n",
1200 length-headerSize);
1201 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
1202 return 0;
1203 }
1204
1205 outTable=(uint16_t *)((char *)outData+headerSize);
1206
1207 /* swap the entire table of contents */
1208 ds->swapArray32(ds, inTable, 4*(1+tocLength), outTable, pErrorCode);
1209
1210 /* swap unormalized strings & normalized strings */
1211 ds->swapInvChars(ds, inTable+offsets[stringTableIndex], 2*(int32_t)(toc[stringTableIndex]+toc[normalizedStringTableIndex]),
1212 outTable+offsets[stringTableIndex], pErrorCode);
1213 if(U_FAILURE(*pErrorCode)) {
1214 udata_printError(ds, "ucnv_swapAliases().swapInvChars(charset names) failed\n");
1215 return 0;
1216 }
1217
1218 if(ds->inCharset==ds->outCharset) {
1219 /* no need to sort, just swap all 16-bit values together */
1220 ds->swapArray16(ds,
1221 inTable+offsets[converterListIndex],
1222 2*(int32_t)(offsets[stringTableIndex]-offsets[converterListIndex]),
1223 outTable+offsets[converterListIndex],
1224 pErrorCode);
1225 } else {
1226 /* allocate the temporary table for sorting */
1227 count=toc[aliasListIndex];
1228
1229 tempTable.chars=(const char *)(outTable+offsets[stringTableIndex]); /* sort by outCharset */
1230
1231 if(count<=STACK_ROW_CAPACITY) {
1232 tempTable.rows=rows;
1233 tempTable.resort=resort;
1234 } else {
1235 tempTable.rows=(TempRow *)uprv_malloc(count*sizeof(TempRow)+count*2);
1236 if(tempTable.rows==NULL) {
1237 udata_printError(ds, "ucnv_swapAliases(): unable to allocate memory for sorting tables (max length: %u)\n",
1238 count);
1239 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
1240 return 0;
1241 }
1242 tempTable.resort=(uint16_t *)(tempTable.rows+count);
1243 }
1244
1245 if(ds->outCharset==U_ASCII_FAMILY) {
1246 tempTable.stripForCompare=ucnv_io_stripASCIIForCompare;
1247 } else /* U_EBCDIC_FAMILY */ {
1248 tempTable.stripForCompare=ucnv_io_stripEBCDICForCompare;
1249 }
1250
1251 /*
1252 * Sort unique aliases+mapped names.
1253 *
1254 * We need to sort the list again by outCharset strings because they
1255 * sort differently for different charset families.
1256 * First we set up a temporary table with the string indexes and
1257 * sorting indexes and sort that.
1258 * Then we permutate and copy/swap the actual values.
1259 */
1260 p=inTable+offsets[aliasListIndex];
1261 q=outTable+offsets[aliasListIndex];
1262
1263 p2=inTable+offsets[untaggedConvArrayIndex];
1264 q2=outTable+offsets[untaggedConvArrayIndex];
1265
1266 for(i=0; i<count; ++i) {
1267 tempTable.rows[i].strIndex=ds->readUInt16(p[i]);
1268 tempTable.rows[i].sortIndex=(uint16_t)i;
1269 }
1270
1271 uprv_sortArray(tempTable.rows, (int32_t)count, sizeof(TempRow),
1272 io_compareRows, &tempTable,
1273 FALSE, pErrorCode);
1274
1275 if(U_SUCCESS(*pErrorCode)) {
1276 /* copy/swap/permutate items */
1277 if(p!=q) {
1278 for(i=0; i<count; ++i) {
1279 oldIndex=tempTable.rows[i].sortIndex;
1280 ds->swapArray16(ds, p+oldIndex, 2, q+i, pErrorCode);
1281 ds->swapArray16(ds, p2+oldIndex, 2, q2+i, pErrorCode);
1282 }
1283 } else {
1284 /*
1285 * If we swap in-place, then the permutation must use another
1286 * temporary array (tempTable.resort)
1287 * before the results are copied to the outBundle.
1288 */
1289 uint16_t *r=tempTable.resort;
1290
1291 for(i=0; i<count; ++i) {
1292 oldIndex=tempTable.rows[i].sortIndex;
1293 ds->swapArray16(ds, p+oldIndex, 2, r+i, pErrorCode);
1294 }
1295 uprv_memcpy(q, r, 2*count);
1296
1297 for(i=0; i<count; ++i) {
1298 oldIndex=tempTable.rows[i].sortIndex;
1299 ds->swapArray16(ds, p2+oldIndex, 2, r+i, pErrorCode);
1300 }
1301 uprv_memcpy(q2, r, 2*count);
1302 }
1303 }
1304
1305 if(tempTable.rows!=rows) {
1306 uprv_free(tempTable.rows);
1307 }
1308
1309 if(U_FAILURE(*pErrorCode)) {
1310 udata_printError(ds, "ucnv_swapAliases().uprv_sortArray(%u items) failed\n",
1311 count);
1312 return 0;
1313 }
1314
1315 /* swap remaining 16-bit values */
1316 ds->swapArray16(ds,
1317 inTable+offsets[converterListIndex],
1318 2*(int32_t)(offsets[aliasListIndex]-offsets[converterListIndex]),
1319 outTable+offsets[converterListIndex],
1320 pErrorCode);
1321 ds->swapArray16(ds,
1322 inTable+offsets[taggedAliasArrayIndex],
1323 2*(int32_t)(offsets[stringTableIndex]-offsets[taggedAliasArrayIndex]),
1324 outTable+offsets[taggedAliasArrayIndex],
1325 pErrorCode);
1326 }
1327 }
1328
1329 return headerSize+2*(int32_t)topOffset;
1330 }
1331
1332 #endif
1333
1334 /*
1335 * Hey, Emacs, please set the following:
1336 *
1337 * Local Variables:
1338 * indent-tabs-mode: nil
1339 * End:
1340 *
1341 */
1342