1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 * Copyright (C) 2003-2015, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 *******************************************************************************
10 * file name: ucol_swp.cpp
11 * encoding: US-ASCII
12 * tab size: 8 (not used)
13 * indentation:4
14 *
15 * created on: 2003sep10
16 * created by: Markus W. Scherer
17 *
18 * Swap collation binaries.
19 */
20
21 #include "unicode/udata.h" /* UDataInfo */
22 #include "utrie.h"
23 #include "utrie2.h"
24 #include "udataswp.h"
25 #include "cmemory.h"
26 #include "ucol_data.h"
27 #include "ucol_swp.h"
28
29 /* swapping ----------------------------------------------------------------- */
30
31 /*
32 * This performs data swapping for a folded trie (see utrie.c for details).
33 */
34
35 U_CAPI int32_t U_EXPORT2
utrie_swap(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)36 utrie_swap(const UDataSwapper *ds,
37 const void *inData, int32_t length, void *outData,
38 UErrorCode *pErrorCode) {
39 const UTrieHeader *inTrie;
40 UTrieHeader trie;
41 int32_t size;
42 UBool dataIs32;
43
44 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
45 return 0;
46 }
47 if(ds==NULL || inData==NULL || (length>=0 && outData==NULL)) {
48 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
49 return 0;
50 }
51
52 /* setup and swapping */
53 if(length>=0 && (uint32_t)length<sizeof(UTrieHeader)) {
54 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
55 return 0;
56 }
57
58 inTrie=(const UTrieHeader *)inData;
59 trie.signature=ds->readUInt32(inTrie->signature);
60 trie.options=ds->readUInt32(inTrie->options);
61 trie.indexLength=udata_readInt32(ds, inTrie->indexLength);
62 trie.dataLength=udata_readInt32(ds, inTrie->dataLength);
63
64 if( trie.signature!=0x54726965 ||
65 (trie.options&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_SHIFT ||
66 ((trie.options>>UTRIE_OPTIONS_INDEX_SHIFT)&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_INDEX_SHIFT ||
67 trie.indexLength<UTRIE_BMP_INDEX_LENGTH ||
68 (trie.indexLength&(UTRIE_SURROGATE_BLOCK_COUNT-1))!=0 ||
69 trie.dataLength<UTRIE_DATA_BLOCK_LENGTH ||
70 (trie.dataLength&(UTRIE_DATA_GRANULARITY-1))!=0 ||
71 ((trie.options&UTRIE_OPTIONS_LATIN1_IS_LINEAR)!=0 && trie.dataLength<(UTRIE_DATA_BLOCK_LENGTH+0x100))
72 ) {
73 *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UTrie */
74 return 0;
75 }
76
77 dataIs32=(UBool)((trie.options&UTRIE_OPTIONS_DATA_IS_32_BIT)!=0);
78 size=sizeof(UTrieHeader)+trie.indexLength*2+trie.dataLength*(dataIs32?4:2);
79
80 if(length>=0) {
81 UTrieHeader *outTrie;
82
83 if(length<size) {
84 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
85 return 0;
86 }
87
88 outTrie=(UTrieHeader *)outData;
89
90 /* swap the header */
91 ds->swapArray32(ds, inTrie, sizeof(UTrieHeader), outTrie, pErrorCode);
92
93 /* swap the index and the data */
94 if(dataIs32) {
95 ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode);
96 ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, trie.dataLength*4,
97 (uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode);
98 } else {
99 ds->swapArray16(ds, inTrie+1, (trie.indexLength+trie.dataLength)*2, outTrie+1, pErrorCode);
100 }
101 }
102
103 return size;
104 }
105
106 #if !UCONFIG_NO_COLLATION
107
108 U_CAPI UBool U_EXPORT2
ucol_looksLikeCollationBinary(const UDataSwapper * ds,const void * inData,int32_t length)109 ucol_looksLikeCollationBinary(const UDataSwapper *ds,
110 const void *inData, int32_t length) {
111 if(ds==NULL || inData==NULL || length<-1) {
112 return FALSE;
113 }
114
115 // First check for format version 4+ which has a standard data header.
116 UErrorCode errorCode=U_ZERO_ERROR;
117 (void)udata_swapDataHeader(ds, inData, -1, NULL, &errorCode);
118 if(U_SUCCESS(errorCode)) {
119 const UDataInfo &info=*(const UDataInfo *)((const char *)inData+4);
120 if(info.dataFormat[0]==0x55 && // dataFormat="UCol"
121 info.dataFormat[1]==0x43 &&
122 info.dataFormat[2]==0x6f &&
123 info.dataFormat[3]==0x6c) {
124 return TRUE;
125 }
126 }
127
128 // Else check for format version 3.
129 const UCATableHeader *inHeader=(const UCATableHeader *)inData;
130
131 /*
132 * The collation binary must contain at least the UCATableHeader,
133 * starting with its size field.
134 * sizeof(UCATableHeader)==42*4 in ICU 2.8
135 * check the length against the header size before reading the size field
136 */
137 UCATableHeader header;
138 uprv_memset(&header, 0, sizeof(header));
139 if(length<0) {
140 header.size=udata_readInt32(ds, inHeader->size);
141 } else if((length<(42*4) || length<(header.size=udata_readInt32(ds, inHeader->size)))) {
142 return FALSE;
143 }
144
145 header.magic=ds->readUInt32(inHeader->magic);
146 if(!(
147 header.magic==UCOL_HEADER_MAGIC &&
148 inHeader->formatVersion[0]==3 /*&&
149 inHeader->formatVersion[1]>=0*/
150 )) {
151 return FALSE;
152 }
153
154 if(inHeader->isBigEndian!=ds->inIsBigEndian || inHeader->charSetFamily!=ds->inCharset) {
155 return FALSE;
156 }
157
158 return TRUE;
159 }
160
161 namespace {
162
163 /* swap a header-less collation formatVersion=3 binary, inside a resource bundle or ucadata.icu */
164 int32_t
swapFormatVersion3(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)165 swapFormatVersion3(const UDataSwapper *ds,
166 const void *inData, int32_t length, void *outData,
167 UErrorCode *pErrorCode) {
168 const uint8_t *inBytes;
169 uint8_t *outBytes;
170
171 const UCATableHeader *inHeader;
172 UCATableHeader *outHeader;
173 UCATableHeader header;
174
175 uint32_t count;
176
177 /* argument checking in case we were not called from ucol_swap() */
178 if(U_FAILURE(*pErrorCode)) {
179 return 0;
180 }
181 if(ds==NULL || inData==NULL || length<-1 || (length>0 && outData==NULL)) {
182 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
183 return 0;
184 }
185
186 inBytes=(const uint8_t *)inData;
187 outBytes=(uint8_t *)outData;
188
189 inHeader=(const UCATableHeader *)inData;
190 outHeader=(UCATableHeader *)outData;
191
192 /*
193 * The collation binary must contain at least the UCATableHeader,
194 * starting with its size field.
195 * sizeof(UCATableHeader)==42*4 in ICU 2.8
196 * check the length against the header size before reading the size field
197 */
198 uprv_memset(&header, 0, sizeof(header));
199 if(length<0) {
200 header.size=udata_readInt32(ds, inHeader->size);
201 } else if((length<(42*4) || length<(header.size=udata_readInt32(ds, inHeader->size)))) {
202 udata_printError(ds, "ucol_swap(formatVersion=3): too few bytes (%d after header) for collation data\n",
203 length);
204 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
205 return 0;
206 }
207
208 header.magic=ds->readUInt32(inHeader->magic);
209 if(!(
210 header.magic==UCOL_HEADER_MAGIC &&
211 inHeader->formatVersion[0]==3 /*&&
212 inHeader->formatVersion[1]>=0*/
213 )) {
214 udata_printError(ds, "ucol_swap(formatVersion=3): magic 0x%08x or format version %02x.%02x is not a collation binary\n",
215 header.magic,
216 inHeader->formatVersion[0], inHeader->formatVersion[1]);
217 *pErrorCode=U_UNSUPPORTED_ERROR;
218 return 0;
219 }
220
221 if(inHeader->isBigEndian!=ds->inIsBigEndian || inHeader->charSetFamily!=ds->inCharset) {
222 udata_printError(ds, "ucol_swap(formatVersion=3): endianness %d or charset %d does not match the swapper\n",
223 inHeader->isBigEndian, inHeader->charSetFamily);
224 *pErrorCode=U_INVALID_FORMAT_ERROR;
225 return 0;
226 }
227
228 if(length>=0) {
229 /* copy everything, takes care of data that needs no swapping */
230 if(inBytes!=outBytes) {
231 uprv_memcpy(outBytes, inBytes, header.size);
232 }
233
234 /* swap the necessary pieces in the order of their occurrence in the data */
235
236 /* read more of the UCATableHeader (the size field was read above) */
237 header.options= ds->readUInt32(inHeader->options);
238 header.UCAConsts= ds->readUInt32(inHeader->UCAConsts);
239 header.contractionUCACombos= ds->readUInt32(inHeader->contractionUCACombos);
240 header.mappingPosition= ds->readUInt32(inHeader->mappingPosition);
241 header.expansion= ds->readUInt32(inHeader->expansion);
242 header.contractionIndex= ds->readUInt32(inHeader->contractionIndex);
243 header.contractionCEs= ds->readUInt32(inHeader->contractionCEs);
244 header.contractionSize= ds->readUInt32(inHeader->contractionSize);
245 header.endExpansionCE= ds->readUInt32(inHeader->endExpansionCE);
246 header.expansionCESize= ds->readUInt32(inHeader->expansionCESize);
247 header.endExpansionCECount= udata_readInt32(ds, inHeader->endExpansionCECount);
248 header.contractionUCACombosSize=udata_readInt32(ds, inHeader->contractionUCACombosSize);
249 header.scriptToLeadByte= ds->readUInt32(inHeader->scriptToLeadByte);
250 header.leadByteToScript= ds->readUInt32(inHeader->leadByteToScript);
251
252 /* swap the 32-bit integers in the header */
253 ds->swapArray32(ds, inHeader, (int32_t)((const char *)&inHeader->jamoSpecial-(const char *)inHeader),
254 outHeader, pErrorCode);
255 ds->swapArray32(ds, &(inHeader->scriptToLeadByte), sizeof(header.scriptToLeadByte) + sizeof(header.leadByteToScript),
256 &(outHeader->scriptToLeadByte), pErrorCode);
257 /* set the output platform properties */
258 outHeader->isBigEndian=ds->outIsBigEndian;
259 outHeader->charSetFamily=ds->outCharset;
260
261 /* swap the options */
262 if(header.options!=0) {
263 ds->swapArray32(ds, inBytes+header.options, header.expansion-header.options,
264 outBytes+header.options, pErrorCode);
265 }
266
267 /* swap the expansions */
268 if(header.mappingPosition!=0 && header.expansion!=0) {
269 if(header.contractionIndex!=0) {
270 /* expansions bounded by contractions */
271 count=header.contractionIndex-header.expansion;
272 } else {
273 /* no contractions: expansions bounded by the main trie */
274 count=header.mappingPosition-header.expansion;
275 }
276 ds->swapArray32(ds, inBytes+header.expansion, (int32_t)count,
277 outBytes+header.expansion, pErrorCode);
278 }
279
280 /* swap the contractions */
281 if(header.contractionSize!=0) {
282 /* contractionIndex: UChar[] */
283 ds->swapArray16(ds, inBytes+header.contractionIndex, header.contractionSize*2,
284 outBytes+header.contractionIndex, pErrorCode);
285
286 /* contractionCEs: CEs[] */
287 ds->swapArray32(ds, inBytes+header.contractionCEs, header.contractionSize*4,
288 outBytes+header.contractionCEs, pErrorCode);
289 }
290
291 /* swap the main trie */
292 if(header.mappingPosition!=0) {
293 count=header.endExpansionCE-header.mappingPosition;
294 utrie_swap(ds, inBytes+header.mappingPosition, (int32_t)count,
295 outBytes+header.mappingPosition, pErrorCode);
296 }
297
298 /* swap the max expansion table */
299 if(header.endExpansionCECount!=0) {
300 ds->swapArray32(ds, inBytes+header.endExpansionCE, header.endExpansionCECount*4,
301 outBytes+header.endExpansionCE, pErrorCode);
302 }
303
304 /* expansionCESize, unsafeCP, contrEndCP: uint8_t[], no need to swap */
305
306 /* swap UCA constants */
307 if(header.UCAConsts!=0) {
308 /*
309 * if UCAConsts!=0 then contractionUCACombos because we are swapping
310 * the UCA data file, and we know that the UCA contains contractions
311 */
312 ds->swapArray32(ds, inBytes+header.UCAConsts, header.contractionUCACombos-header.UCAConsts,
313 outBytes+header.UCAConsts, pErrorCode);
314 }
315
316 /* swap UCA contractions */
317 if(header.contractionUCACombosSize!=0) {
318 count=header.contractionUCACombosSize*inHeader->contractionUCACombosWidth*U_SIZEOF_UCHAR;
319 ds->swapArray16(ds, inBytes+header.contractionUCACombos, (int32_t)count,
320 outBytes+header.contractionUCACombos, pErrorCode);
321 }
322
323 /* swap the script to lead bytes */
324 if(header.scriptToLeadByte!=0) {
325 int indexCount = ds->readUInt16(*((uint16_t*)(inBytes+header.scriptToLeadByte))); // each entry = 2 * uint16
326 int dataCount = ds->readUInt16(*((uint16_t*)(inBytes+header.scriptToLeadByte + 2))); // each entry = uint16
327 ds->swapArray16(ds, inBytes+header.scriptToLeadByte,
328 4 + (4 * indexCount) + (2 * dataCount),
329 outBytes+header.scriptToLeadByte, pErrorCode);
330 }
331
332 /* swap the lead byte to scripts */
333 if(header.leadByteToScript!=0) {
334 int indexCount = ds->readUInt16(*((uint16_t*)(inBytes+header.leadByteToScript))); // each entry = uint16
335 int dataCount = ds->readUInt16(*((uint16_t*)(inBytes+header.leadByteToScript + 2))); // each entry = uint16
336 ds->swapArray16(ds, inBytes+header.leadByteToScript,
337 4 + (2 * indexCount) + (2 * dataCount),
338 outBytes+header.leadByteToScript, pErrorCode);
339 }
340 }
341
342 return header.size;
343 }
344
345 // swap formatVersion 4 or 5 ----------------------------------------------- ***
346
347 // The following are copied from CollationDataReader, trading an awkward copy of constants
348 // for an awkward relocation of the i18n collationdatareader.h file into the common library.
349 // Keep them in sync!
350
351 enum {
352 IX_INDEXES_LENGTH, // 0
353 IX_OPTIONS,
354 IX_RESERVED2,
355 IX_RESERVED3,
356
357 IX_JAMO_CE32S_START, // 4
358 IX_REORDER_CODES_OFFSET,
359 IX_REORDER_TABLE_OFFSET,
360 IX_TRIE_OFFSET,
361
362 IX_RESERVED8_OFFSET, // 8
363 IX_CES_OFFSET,
364 IX_RESERVED10_OFFSET,
365 IX_CE32S_OFFSET,
366
367 IX_ROOT_ELEMENTS_OFFSET, // 12
368 IX_CONTEXTS_OFFSET,
369 IX_UNSAFE_BWD_OFFSET,
370 IX_FAST_LATIN_TABLE_OFFSET,
371
372 IX_SCRIPTS_OFFSET, // 16
373 IX_COMPRESSIBLE_BYTES_OFFSET,
374 IX_RESERVED18_OFFSET,
375 IX_TOTAL_SIZE
376 };
377
378 int32_t
swapFormatVersion4(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode & errorCode)379 swapFormatVersion4(const UDataSwapper *ds,
380 const void *inData, int32_t length, void *outData,
381 UErrorCode &errorCode) {
382 if(U_FAILURE(errorCode)) { return 0; }
383
384 const uint8_t *inBytes=(const uint8_t *)inData;
385 uint8_t *outBytes=(uint8_t *)outData;
386
387 const int32_t *inIndexes=(const int32_t *)inBytes;
388 int32_t indexes[IX_TOTAL_SIZE+1];
389
390 // Need at least IX_INDEXES_LENGTH and IX_OPTIONS.
391 if(0<=length && length<8) {
392 udata_printError(ds, "ucol_swap(formatVersion=4): too few bytes "
393 "(%d after header) for collation data\n",
394 length);
395 errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
396 return 0;
397 }
398
399 int32_t indexesLength=indexes[0]=udata_readInt32(ds, inIndexes[0]);
400 if(0<=length && length<(indexesLength*4)) {
401 udata_printError(ds, "ucol_swap(formatVersion=4): too few bytes "
402 "(%d after header) for collation data\n",
403 length);
404 errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
405 return 0;
406 }
407
408 for(int32_t i=1; i<=IX_TOTAL_SIZE && i<indexesLength; ++i) {
409 indexes[i]=udata_readInt32(ds, inIndexes[i]);
410 }
411 for(int32_t i=indexesLength; i<=IX_TOTAL_SIZE; ++i) {
412 indexes[i]=-1;
413 }
414 inIndexes=NULL; // Make sure we do not accidentally use these instead of indexes[].
415
416 // Get the total length of the data.
417 int32_t size;
418 if(indexesLength>IX_TOTAL_SIZE) {
419 size=indexes[IX_TOTAL_SIZE];
420 } else if(indexesLength>IX_REORDER_CODES_OFFSET) {
421 size=indexes[indexesLength-1];
422 } else {
423 size=indexesLength*4;
424 }
425 if(length<0) { return size; }
426
427 if(length<size) {
428 udata_printError(ds, "ucol_swap(formatVersion=4): too few bytes "
429 "(%d after header) for collation data\n",
430 length);
431 errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
432 return 0;
433 }
434
435 // Copy the data for inaccessible bytes and arrays of bytes.
436 if(inBytes!=outBytes) {
437 uprv_memcpy(outBytes, inBytes, size);
438 }
439
440 // Swap the int32_t indexes[].
441 ds->swapArray32(ds, inBytes, indexesLength * 4, outBytes, &errorCode);
442
443 // The following is a modified version of CollationDataReader::read().
444 // Here we use indexes[] not inIndexes[] because
445 // the inIndexes[] may not be in this machine's endianness.
446 int32_t index; // one of the indexes[] slots
447 int32_t offset; // byte offset for the index part
448 // int32_t length; // number of bytes in the index part
449
450 index = IX_REORDER_CODES_OFFSET;
451 offset = indexes[index];
452 length = indexes[index + 1] - offset;
453 if(length > 0) {
454 ds->swapArray32(ds, inBytes + offset, length, outBytes + offset, &errorCode);
455 }
456
457 // Skip the IX_REORDER_TABLE_OFFSET byte array.
458
459 index = IX_TRIE_OFFSET;
460 offset = indexes[index];
461 length = indexes[index + 1] - offset;
462 if(length > 0) {
463 utrie2_swap(ds, inBytes + offset, length, outBytes + offset, &errorCode);
464 }
465
466 index = IX_RESERVED8_OFFSET;
467 offset = indexes[index];
468 length = indexes[index + 1] - offset;
469 if(length > 0) {
470 udata_printError(ds, "ucol_swap(formatVersion=4): unknown data at IX_RESERVED8_OFFSET\n", length);
471 errorCode = U_UNSUPPORTED_ERROR;
472 return 0;
473 }
474
475 index = IX_CES_OFFSET;
476 offset = indexes[index];
477 length = indexes[index + 1] - offset;
478 if(length > 0) {
479 ds->swapArray64(ds, inBytes + offset, length, outBytes + offset, &errorCode);
480 }
481
482 index = IX_RESERVED10_OFFSET;
483 offset = indexes[index];
484 length = indexes[index + 1] - offset;
485 if(length > 0) {
486 udata_printError(ds, "ucol_swap(formatVersion=4): unknown data at IX_RESERVED10_OFFSET\n", length);
487 errorCode = U_UNSUPPORTED_ERROR;
488 return 0;
489 }
490
491 index = IX_CE32S_OFFSET;
492 offset = indexes[index];
493 length = indexes[index + 1] - offset;
494 if(length > 0) {
495 ds->swapArray32(ds, inBytes + offset, length, outBytes + offset, &errorCode);
496 }
497
498 index = IX_ROOT_ELEMENTS_OFFSET;
499 offset = indexes[index];
500 length = indexes[index + 1] - offset;
501 if(length > 0) {
502 ds->swapArray32(ds, inBytes + offset, length, outBytes + offset, &errorCode);
503 }
504
505 index = IX_CONTEXTS_OFFSET;
506 offset = indexes[index];
507 length = indexes[index + 1] - offset;
508 if(length > 0) {
509 ds->swapArray16(ds, inBytes + offset, length, outBytes + offset, &errorCode);
510 }
511
512 index = IX_UNSAFE_BWD_OFFSET;
513 offset = indexes[index];
514 length = indexes[index + 1] - offset;
515 if(length > 0) {
516 ds->swapArray16(ds, inBytes + offset, length, outBytes + offset, &errorCode);
517 }
518
519 index = IX_FAST_LATIN_TABLE_OFFSET;
520 offset = indexes[index];
521 length = indexes[index + 1] - offset;
522 if(length > 0) {
523 ds->swapArray16(ds, inBytes + offset, length, outBytes + offset, &errorCode);
524 }
525
526 index = IX_SCRIPTS_OFFSET;
527 offset = indexes[index];
528 length = indexes[index + 1] - offset;
529 if(length > 0) {
530 ds->swapArray16(ds, inBytes + offset, length, outBytes + offset, &errorCode);
531 }
532
533 // Skip the IX_COMPRESSIBLE_BYTES_OFFSET byte array.
534
535 index = IX_RESERVED18_OFFSET;
536 offset = indexes[index];
537 length = indexes[index + 1] - offset;
538 if(length > 0) {
539 udata_printError(ds, "ucol_swap(formatVersion=4): unknown data at IX_RESERVED18_OFFSET\n", length);
540 errorCode = U_UNSUPPORTED_ERROR;
541 return 0;
542 }
543
544 return size;
545 }
546
547 } // namespace
548
549 /* swap ICU collation data like ucadata.icu */
550 U_CAPI int32_t U_EXPORT2
ucol_swap(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)551 ucol_swap(const UDataSwapper *ds,
552 const void *inData, int32_t length, void *outData,
553 UErrorCode *pErrorCode) {
554 if(U_FAILURE(*pErrorCode)) { return 0; }
555
556 /* udata_swapDataHeader checks the arguments */
557 int32_t headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
558 if(U_FAILURE(*pErrorCode)) {
559 // Try to swap the old format version which did not have a standard data header.
560 *pErrorCode=U_ZERO_ERROR;
561 return swapFormatVersion3(ds, inData, length, outData, pErrorCode);
562 }
563
564 /* check data format and format version */
565 const UDataInfo &info=*(const UDataInfo *)((const char *)inData+4);
566 if(!(
567 info.dataFormat[0]==0x55 && // dataFormat="UCol"
568 info.dataFormat[1]==0x43 &&
569 info.dataFormat[2]==0x6f &&
570 info.dataFormat[3]==0x6c &&
571 (3<=info.formatVersion[0] && info.formatVersion[0]<=5)
572 )) {
573 udata_printError(ds, "ucol_swap(): data format %02x.%02x.%02x.%02x "
574 "(format version %02x.%02x) is not recognized as collation data\n",
575 info.dataFormat[0], info.dataFormat[1],
576 info.dataFormat[2], info.dataFormat[3],
577 info.formatVersion[0], info.formatVersion[1]);
578 *pErrorCode=U_UNSUPPORTED_ERROR;
579 return 0;
580 }
581
582 inData=(const char *)inData+headerSize;
583 if(length>=0) { length-=headerSize; }
584 outData=(char *)outData+headerSize;
585 int32_t collationSize;
586 if(info.formatVersion[0]>=4) {
587 collationSize=swapFormatVersion4(ds, inData, length, outData, *pErrorCode);
588 } else {
589 collationSize=swapFormatVersion3(ds, inData, length, outData, pErrorCode);
590 }
591 if(U_SUCCESS(*pErrorCode)) {
592 return headerSize+collationSize;
593 } else {
594 return 0;
595 }
596 }
597
598 /* swap inverse UCA collation data (invuca.icu) */
599 U_CAPI int32_t U_EXPORT2
ucol_swapInverseUCA(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)600 ucol_swapInverseUCA(const UDataSwapper *ds,
601 const void *inData, int32_t length, void *outData,
602 UErrorCode *pErrorCode) {
603 const UDataInfo *pInfo;
604 int32_t headerSize;
605
606 const uint8_t *inBytes;
607 uint8_t *outBytes;
608
609 const InverseUCATableHeader *inHeader;
610 InverseUCATableHeader *outHeader;
611 InverseUCATableHeader header={ 0,0,0,0,0,{0,0,0,0},{0,0,0,0,0,0,0,0} };
612
613 /* udata_swapDataHeader checks the arguments */
614 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
615 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
616 return 0;
617 }
618
619 /* check data format and format version */
620 pInfo=(const UDataInfo *)((const char *)inData+4);
621 if(!(
622 pInfo->dataFormat[0]==0x49 && /* dataFormat="InvC" */
623 pInfo->dataFormat[1]==0x6e &&
624 pInfo->dataFormat[2]==0x76 &&
625 pInfo->dataFormat[3]==0x43 &&
626 pInfo->formatVersion[0]==2 &&
627 pInfo->formatVersion[1]>=1
628 )) {
629 udata_printError(ds, "ucol_swapInverseUCA(): data format %02x.%02x.%02x.%02x (format version %02x.%02x) is not an inverse UCA collation file\n",
630 pInfo->dataFormat[0], pInfo->dataFormat[1],
631 pInfo->dataFormat[2], pInfo->dataFormat[3],
632 pInfo->formatVersion[0], pInfo->formatVersion[1]);
633 *pErrorCode=U_UNSUPPORTED_ERROR;
634 return 0;
635 }
636
637 inBytes=(const uint8_t *)inData+headerSize;
638 outBytes=(uint8_t *)outData+headerSize;
639
640 inHeader=(const InverseUCATableHeader *)inBytes;
641 outHeader=(InverseUCATableHeader *)outBytes;
642
643 /*
644 * The inverse UCA collation binary must contain at least the InverseUCATableHeader,
645 * starting with its size field.
646 * sizeof(UCATableHeader)==8*4 in ICU 2.8
647 * check the length against the header size before reading the size field
648 */
649 if(length<0) {
650 header.byteSize=udata_readInt32(ds, inHeader->byteSize);
651 } else if(
652 ((length-headerSize)<(8*4) ||
653 (uint32_t)(length-headerSize)<(header.byteSize=udata_readInt32(ds, inHeader->byteSize)))
654 ) {
655 udata_printError(ds, "ucol_swapInverseUCA(): too few bytes (%d after header) for inverse UCA collation data\n",
656 length);
657 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
658 return 0;
659 }
660
661 if(length>=0) {
662 /* copy everything, takes care of data that needs no swapping */
663 if(inBytes!=outBytes) {
664 uprv_memcpy(outBytes, inBytes, header.byteSize);
665 }
666
667 /* swap the necessary pieces in the order of their occurrence in the data */
668
669 /* read more of the InverseUCATableHeader (the byteSize field was read above) */
670 header.tableSize= ds->readUInt32(inHeader->tableSize);
671 header.contsSize= ds->readUInt32(inHeader->contsSize);
672 header.table= ds->readUInt32(inHeader->table);
673 header.conts= ds->readUInt32(inHeader->conts);
674
675 /* swap the 32-bit integers in the header */
676 ds->swapArray32(ds, inHeader, 5*4, outHeader, pErrorCode);
677
678 /* swap the inverse table; tableSize counts uint32_t[3] rows */
679 ds->swapArray32(ds, inBytes+header.table, header.tableSize*3*4,
680 outBytes+header.table, pErrorCode);
681
682 /* swap the continuation table; contsSize counts UChars */
683 ds->swapArray16(ds, inBytes+header.conts, header.contsSize*U_SIZEOF_UCHAR,
684 outBytes+header.conts, pErrorCode);
685 }
686
687 return headerSize+header.byteSize;
688 }
689
690 #endif /* #if !UCONFIG_NO_COLLATION */
691