1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 2005-2010, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 * file name: swapimpl.cpp
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2005may05
14 * created by: Markus W. Scherer
15 *
16 * Data file swapping functions moved here from the common library
17 * because some data is hardcoded in ICU4C and needs not be swapped any more.
18 * Moving the functions here simplifies testing (for code coverage) because
19 * we need not jump through hoops (like adding snapshots of these files
20 * to testdata).
21 *
22 * The declarations for these functions remain in the internal header files
23 * in icu/source/common/
24 */
25
26 #include "unicode/utypes.h"
27 #include "unicode/putil.h"
28 #include "unicode/udata.h"
29
30 /* Explicit include statement for std_string.h is needed
31 * for compilation on certain platforms. (e.g. AIX/VACPP)
32 */
33 #include "unicode/std_string.h"
34
35 #include "cmemory.h"
36 #include "cstring.h"
37 #include "uinvchar.h"
38 #include "uassert.h"
39 #include "uarrsort.h"
40 #include "ucmndata.h"
41 #include "udataswp.h"
42
43 /* swapping implementations in common */
44
45 #include "uresdata.h"
46 #include "ucnv_io.h"
47 #include "uprops.h"
48 #include "ucase.h"
49 #include "ubidi_props.h"
50 #include "ucol_swp.h"
51 #include "ucnv_bld.h"
52 #include "unormimp.h"
53 #include "normalizer2impl.h"
54 #include "sprpimpl.h"
55 #include "propname.h"
56 #include "rbbidata.h"
57 #include "triedict.h"
58 #include "utrie2.h"
59
60 /* swapping implementations in i18n */
61
62 #if !UCONFIG_NO_NORMALIZATION
63 #include "uspoof_impl.h"
64 #endif
65
66
67 /* definitions */
68
69 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
70
71 /* Unicode properties data swapping ----------------------------------------- */
72
73 U_CAPI int32_t U_EXPORT2
uprops_swap(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)74 uprops_swap(const UDataSwapper *ds,
75 const void *inData, int32_t length, void *outData,
76 UErrorCode *pErrorCode) {
77 const UDataInfo *pInfo;
78 int32_t headerSize, i;
79
80 int32_t dataIndexes[UPROPS_INDEX_COUNT];
81 const int32_t *inData32;
82
83 /* udata_swapDataHeader checks the arguments */
84 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
85 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
86 return 0;
87 }
88
89 /* check data format and format version */
90 pInfo=(const UDataInfo *)((const char *)inData+4);
91 if(!(
92 pInfo->dataFormat[0]==0x55 && /* dataFormat="UPro" */
93 pInfo->dataFormat[1]==0x50 &&
94 pInfo->dataFormat[2]==0x72 &&
95 pInfo->dataFormat[3]==0x6f &&
96 (3<=pInfo->formatVersion[0] && pInfo->formatVersion[0]<=7) &&
97 (pInfo->formatVersion[0]>=7 ||
98 (pInfo->formatVersion[2]==UTRIE_SHIFT &&
99 pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT))
100 )) {
101 udata_printError(ds, "uprops_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not a Unicode properties file\n",
102 pInfo->dataFormat[0], pInfo->dataFormat[1],
103 pInfo->dataFormat[2], pInfo->dataFormat[3],
104 pInfo->formatVersion[0]);
105 *pErrorCode=U_UNSUPPORTED_ERROR;
106 return 0;
107 }
108
109 /* the properties file must contain at least the indexes array */
110 if(length>=0 && (length-headerSize)<(int32_t)sizeof(dataIndexes)) {
111 udata_printError(ds, "uprops_swap(): too few bytes (%d after header) for a Unicode properties file\n",
112 length-headerSize);
113 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
114 return 0;
115 }
116
117 /* read the indexes */
118 inData32=(const int32_t *)((const char *)inData+headerSize);
119 for(i=0; i<UPROPS_INDEX_COUNT; ++i) {
120 dataIndexes[i]=udata_readInt32(ds, inData32[i]);
121 }
122
123 /*
124 * comments are copied from the data format description in genprops/store.c
125 * indexes[] constants are in uprops.h
126 */
127 int32_t dataTop;
128 if(length>=0) {
129 int32_t *outData32;
130
131 /*
132 * In formatVersion 7, UPROPS_DATA_TOP_INDEX has the post-header data size.
133 * In earlier formatVersions, it is 0 and a lower dataIndexes entry
134 * has the top of the last item.
135 */
136 for(i=UPROPS_DATA_TOP_INDEX; i>0 && (dataTop=dataIndexes[i])==0; --i) {}
137
138 if((length-headerSize)<(4*dataTop)) {
139 udata_printError(ds, "uprops_swap(): too few bytes (%d after header) for a Unicode properties file\n",
140 length-headerSize);
141 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
142 return 0;
143 }
144
145 outData32=(int32_t *)((char *)outData+headerSize);
146
147 /* copy everything for inaccessible data (padding) */
148 if(inData32!=outData32) {
149 uprv_memcpy(outData32, inData32, 4*dataTop);
150 }
151
152 /* swap the indexes[16] */
153 ds->swapArray32(ds, inData32, 4*UPROPS_INDEX_COUNT, outData32, pErrorCode);
154
155 /*
156 * swap the main properties UTrie
157 * PT serialized properties trie, see utrie.h (byte size: 4*(i0-16))
158 */
159 utrie2_swapAnyVersion(ds,
160 inData32+UPROPS_INDEX_COUNT,
161 4*(dataIndexes[UPROPS_PROPS32_INDEX]-UPROPS_INDEX_COUNT),
162 outData32+UPROPS_INDEX_COUNT,
163 pErrorCode);
164
165 /*
166 * swap the properties and exceptions words
167 * P const uint32_t props32[i1-i0];
168 * E const uint32_t exceptions[i2-i1];
169 */
170 ds->swapArray32(ds,
171 inData32+dataIndexes[UPROPS_PROPS32_INDEX],
172 4*(dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX]-dataIndexes[UPROPS_PROPS32_INDEX]),
173 outData32+dataIndexes[UPROPS_PROPS32_INDEX],
174 pErrorCode);
175
176 /*
177 * swap the UChars
178 * U const UChar uchars[2*(i3-i2)];
179 */
180 ds->swapArray16(ds,
181 inData32+dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX],
182 4*(dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX]-dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX]),
183 outData32+dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX],
184 pErrorCode);
185
186 /*
187 * swap the additional UTrie
188 * i3 additionalTrieIndex; -- 32-bit unit index to the additional trie for more properties
189 */
190 utrie2_swapAnyVersion(ds,
191 inData32+dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX],
192 4*(dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX]-dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX]),
193 outData32+dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX],
194 pErrorCode);
195
196 /*
197 * swap the properties vectors
198 * PV const uint32_t propsVectors[(i6-i4)/i5][i5]==uint32_t propsVectors[i6-i4];
199 */
200 ds->swapArray32(ds,
201 inData32+dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX],
202 4*(dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX]-dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX]),
203 outData32+dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX],
204 pErrorCode);
205
206 // swap the Script_Extensions data
207 // SCX const uint16_t scriptExtensions[2*(i7-i6)];
208 ds->swapArray16(ds,
209 inData32+dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX],
210 4*(dataIndexes[UPROPS_RESERVED_INDEX_7]-dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX]),
211 outData32+dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX],
212 pErrorCode);
213 }
214
215 /* i7 reservedIndex7; -- 32-bit unit index to the top of the Script_Extensions data */
216 return headerSize+4*dataIndexes[UPROPS_RESERVED_INDEX_7];
217 }
218
219 /* Unicode case mapping data swapping --------------------------------------- */
220
221 U_CAPI int32_t U_EXPORT2
ucase_swap(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)222 ucase_swap(const UDataSwapper *ds,
223 const void *inData, int32_t length, void *outData,
224 UErrorCode *pErrorCode) {
225 const UDataInfo *pInfo;
226 int32_t headerSize;
227
228 const uint8_t *inBytes;
229 uint8_t *outBytes;
230
231 const int32_t *inIndexes;
232 int32_t indexes[16];
233
234 int32_t i, offset, count, size;
235
236 /* udata_swapDataHeader checks the arguments */
237 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
238 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
239 return 0;
240 }
241
242 /* check data format and format version */
243 pInfo=(const UDataInfo *)((const char *)inData+4);
244 if(!(
245 pInfo->dataFormat[0]==UCASE_FMT_0 && /* dataFormat="cAsE" */
246 pInfo->dataFormat[1]==UCASE_FMT_1 &&
247 pInfo->dataFormat[2]==UCASE_FMT_2 &&
248 pInfo->dataFormat[3]==UCASE_FMT_3 &&
249 ((pInfo->formatVersion[0]==1 &&
250 pInfo->formatVersion[2]==UTRIE_SHIFT &&
251 pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT) ||
252 pInfo->formatVersion[0]==2)
253 )) {
254 udata_printError(ds, "ucase_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as case mapping data\n",
255 pInfo->dataFormat[0], pInfo->dataFormat[1],
256 pInfo->dataFormat[2], pInfo->dataFormat[3],
257 pInfo->formatVersion[0]);
258 *pErrorCode=U_UNSUPPORTED_ERROR;
259 return 0;
260 }
261
262 inBytes=(const uint8_t *)inData+headerSize;
263 outBytes=(uint8_t *)outData+headerSize;
264
265 inIndexes=(const int32_t *)inBytes;
266
267 if(length>=0) {
268 length-=headerSize;
269 if(length<16*4) {
270 udata_printError(ds, "ucase_swap(): too few bytes (%d after header) for case mapping data\n",
271 length);
272 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
273 return 0;
274 }
275 }
276
277 /* read the first 16 indexes (ICU 3.2/format version 1: UCASE_IX_TOP==16, might grow) */
278 for(i=0; i<16; ++i) {
279 indexes[i]=udata_readInt32(ds, inIndexes[i]);
280 }
281
282 /* get the total length of the data */
283 size=indexes[UCASE_IX_LENGTH];
284
285 if(length>=0) {
286 if(length<size) {
287 udata_printError(ds, "ucase_swap(): too few bytes (%d after header) for all of case mapping data\n",
288 length);
289 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
290 return 0;
291 }
292
293 /* copy the data for inaccessible bytes */
294 if(inBytes!=outBytes) {
295 uprv_memcpy(outBytes, inBytes, size);
296 }
297
298 offset=0;
299
300 /* swap the int32_t indexes[] */
301 count=indexes[UCASE_IX_INDEX_TOP]*4;
302 ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
303 offset+=count;
304
305 /* swap the UTrie */
306 count=indexes[UCASE_IX_TRIE_SIZE];
307 utrie2_swapAnyVersion(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
308 offset+=count;
309
310 /* swap the uint16_t exceptions[] and unfold[] */
311 count=(indexes[UCASE_IX_EXC_LENGTH]+indexes[UCASE_IX_UNFOLD_LENGTH])*2;
312 ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
313 offset+=count;
314
315 U_ASSERT(offset==size);
316 }
317
318 return headerSize+size;
319 }
320
321 /* Unicode bidi/shaping data swapping --------------------------------------- */
322
323 U_CAPI int32_t U_EXPORT2
ubidi_swap(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)324 ubidi_swap(const UDataSwapper *ds,
325 const void *inData, int32_t length, void *outData,
326 UErrorCode *pErrorCode) {
327 const UDataInfo *pInfo;
328 int32_t headerSize;
329
330 const uint8_t *inBytes;
331 uint8_t *outBytes;
332
333 const int32_t *inIndexes;
334 int32_t indexes[16];
335
336 int32_t i, offset, count, size;
337
338 /* udata_swapDataHeader checks the arguments */
339 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
340 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
341 return 0;
342 }
343
344 /* check data format and format version */
345 pInfo=(const UDataInfo *)((const char *)inData+4);
346 if(!(
347 pInfo->dataFormat[0]==UBIDI_FMT_0 && /* dataFormat="BiDi" */
348 pInfo->dataFormat[1]==UBIDI_FMT_1 &&
349 pInfo->dataFormat[2]==UBIDI_FMT_2 &&
350 pInfo->dataFormat[3]==UBIDI_FMT_3 &&
351 ((pInfo->formatVersion[0]==1 &&
352 pInfo->formatVersion[2]==UTRIE_SHIFT &&
353 pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT) ||
354 pInfo->formatVersion[0]==2)
355 )) {
356 udata_printError(ds, "ubidi_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as bidi/shaping data\n",
357 pInfo->dataFormat[0], pInfo->dataFormat[1],
358 pInfo->dataFormat[2], pInfo->dataFormat[3],
359 pInfo->formatVersion[0]);
360 *pErrorCode=U_UNSUPPORTED_ERROR;
361 return 0;
362 }
363
364 inBytes=(const uint8_t *)inData+headerSize;
365 outBytes=(uint8_t *)outData+headerSize;
366
367 inIndexes=(const int32_t *)inBytes;
368
369 if(length>=0) {
370 length-=headerSize;
371 if(length<16*4) {
372 udata_printError(ds, "ubidi_swap(): too few bytes (%d after header) for bidi/shaping data\n",
373 length);
374 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
375 return 0;
376 }
377 }
378
379 /* read the first 16 indexes (ICU 3.4/format version 1: UBIDI_IX_TOP==16, might grow) */
380 for(i=0; i<16; ++i) {
381 indexes[i]=udata_readInt32(ds, inIndexes[i]);
382 }
383
384 /* get the total length of the data */
385 size=indexes[UBIDI_IX_LENGTH];
386
387 if(length>=0) {
388 if(length<size) {
389 udata_printError(ds, "ubidi_swap(): too few bytes (%d after header) for all of bidi/shaping data\n",
390 length);
391 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
392 return 0;
393 }
394
395 /* copy the data for inaccessible bytes */
396 if(inBytes!=outBytes) {
397 uprv_memcpy(outBytes, inBytes, size);
398 }
399
400 offset=0;
401
402 /* swap the int32_t indexes[] */
403 count=indexes[UBIDI_IX_INDEX_TOP]*4;
404 ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
405 offset+=count;
406
407 /* swap the UTrie */
408 count=indexes[UBIDI_IX_TRIE_SIZE];
409 utrie2_swapAnyVersion(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
410 offset+=count;
411
412 /* swap the uint32_t mirrors[] */
413 count=indexes[UBIDI_IX_MIRROR_LENGTH]*4;
414 ds->swapArray32(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
415 offset+=count;
416
417 /* just skip the uint8_t jgArray[] */
418 count=indexes[UBIDI_IX_JG_LIMIT]-indexes[UBIDI_IX_JG_START];
419 offset+=count;
420
421 U_ASSERT(offset==size);
422 }
423
424 return headerSize+size;
425 }
426
427 /* Unicode normalization data swapping -------------------------------------- */
428
429 #if !UCONFIG_NO_NORMALIZATION
430
431 U_CAPI int32_t U_EXPORT2
unorm_swap(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)432 unorm_swap(const UDataSwapper *ds,
433 const void *inData, int32_t length, void *outData,
434 UErrorCode *pErrorCode) {
435 const UDataInfo *pInfo;
436 int32_t headerSize;
437
438 const uint8_t *inBytes;
439 uint8_t *outBytes;
440
441 const int32_t *inIndexes;
442 int32_t indexes[32];
443
444 int32_t i, offset, count, size;
445
446 /* udata_swapDataHeader checks the arguments */
447 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
448 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
449 return 0;
450 }
451
452 /* check data format and format version */
453 pInfo=(const UDataInfo *)((const char *)inData+4);
454 if(!(
455 pInfo->dataFormat[0]==0x4e && /* dataFormat="Norm" */
456 pInfo->dataFormat[1]==0x6f &&
457 pInfo->dataFormat[2]==0x72 &&
458 pInfo->dataFormat[3]==0x6d &&
459 pInfo->formatVersion[0]==2
460 )) {
461 udata_printError(ds, "unorm_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as unorm.icu\n",
462 pInfo->dataFormat[0], pInfo->dataFormat[1],
463 pInfo->dataFormat[2], pInfo->dataFormat[3],
464 pInfo->formatVersion[0]);
465 *pErrorCode=U_UNSUPPORTED_ERROR;
466 return 0;
467 }
468
469 inBytes=(const uint8_t *)inData+headerSize;
470 outBytes=(uint8_t *)outData+headerSize;
471
472 inIndexes=(const int32_t *)inBytes;
473
474 if(length>=0) {
475 length-=headerSize;
476 if(length<32*4) {
477 udata_printError(ds, "unorm_swap(): too few bytes (%d after header) for unorm.icu\n",
478 length);
479 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
480 return 0;
481 }
482 }
483
484 /* read the first 32 indexes (ICU 2.8/format version 2.2: _NORM_INDEX_TOP==32, might grow) */
485 for(i=0; i<32; ++i) {
486 indexes[i]=udata_readInt32(ds, inIndexes[i]);
487 }
488
489 /* calculate the total length of the data */
490 size=
491 32*4+ /* size of indexes[] */
492 indexes[_NORM_INDEX_TRIE_SIZE]+
493 indexes[_NORM_INDEX_UCHAR_COUNT]*2+
494 indexes[_NORM_INDEX_COMBINE_DATA_COUNT]*2+
495 indexes[_NORM_INDEX_FCD_TRIE_SIZE]+
496 indexes[_NORM_INDEX_AUX_TRIE_SIZE]+
497 indexes[_NORM_INDEX_CANON_SET_COUNT]*2;
498
499 if(length>=0) {
500 if(length<size) {
501 udata_printError(ds, "unorm_swap(): too few bytes (%d after header) for all of unorm.icu\n",
502 length);
503 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
504 return 0;
505 }
506
507 /* copy the data for inaccessible bytes */
508 if(inBytes!=outBytes) {
509 uprv_memcpy(outBytes, inBytes, size);
510 }
511
512 offset=0;
513
514 /* swap the indexes[] */
515 count=32*4;
516 ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
517 offset+=count;
518
519 /* swap the main UTrie */
520 count=indexes[_NORM_INDEX_TRIE_SIZE];
521 utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
522 offset+=count;
523
524 /* swap the uint16_t extraData[] and the uint16_t combiningTable[] */
525 count=(indexes[_NORM_INDEX_UCHAR_COUNT]+indexes[_NORM_INDEX_COMBINE_DATA_COUNT])*2;
526 ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
527 offset+=count;
528
529 /* swap the FCD UTrie */
530 count=indexes[_NORM_INDEX_FCD_TRIE_SIZE];
531 if(count!=0) {
532 utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
533 offset+=count;
534 }
535
536 /* swap the aux UTrie */
537 count=indexes[_NORM_INDEX_AUX_TRIE_SIZE];
538 if(count!=0) {
539 utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
540 offset+=count;
541 }
542
543 /* swap the uint16_t combiningTable[] */
544 count=indexes[_NORM_INDEX_CANON_SET_COUNT]*2;
545 ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
546 offset+=count;
547 }
548
549 return headerSize+size;
550 }
551
552 #endif
553
554 /* Swap 'Test' data from gentest */
555 U_CAPI int32_t U_EXPORT2
test_swap(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)556 test_swap(const UDataSwapper *ds,
557 const void *inData, int32_t length, void *outData,
558 UErrorCode *pErrorCode) {
559 const UDataInfo *pInfo;
560 int32_t headerSize;
561
562 const uint8_t *inBytes;
563 uint8_t *outBytes;
564
565 int32_t offset;
566
567 /* udata_swapDataHeader checks the arguments */
568 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
569 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
570 udata_printError(ds, "test_swap(): data header swap failed %s\n", u_errorName(*pErrorCode));
571 return 0;
572 }
573
574 /* check data format and format version */
575 pInfo=(const UDataInfo *)((const char *)inData+4);
576 if(!(
577 pInfo->dataFormat[0]==0x54 && /* dataFormat="Norm" */
578 pInfo->dataFormat[1]==0x65 &&
579 pInfo->dataFormat[2]==0x73 &&
580 pInfo->dataFormat[3]==0x74 &&
581 pInfo->formatVersion[0]==1
582 )) {
583 udata_printError(ds, "test_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as testdata\n",
584 pInfo->dataFormat[0], pInfo->dataFormat[1],
585 pInfo->dataFormat[2], pInfo->dataFormat[3],
586 pInfo->formatVersion[0]);
587 *pErrorCode=U_UNSUPPORTED_ERROR;
588 return 0;
589 }
590
591 inBytes=(const uint8_t *)inData+headerSize;
592 outBytes=(uint8_t *)outData+headerSize;
593
594 int32_t size16 = 2; // 16bit plus padding
595 int32_t sizeStr = 5; // 4 char inv-str plus null
596 int32_t size = size16 + sizeStr;
597
598 if(length>=0) {
599 if(length<size) {
600 udata_printError(ds, "test_swap(): too few bytes (%d after header, wanted %d) for all of testdata\n",
601 length, size);
602 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
603 return 0;
604 }
605
606 offset =0;
607 /* swap a 1 entry array */
608 ds->swapArray16(ds, inBytes+offset, size16, outBytes+offset, pErrorCode);
609 offset+=size16;
610 ds->swapInvChars(ds, inBytes+offset, sizeStr, outBytes+offset, pErrorCode);
611 }
612
613 return headerSize+size;
614 }
615
616 /* swap any data (except a .dat package) ------------------------------------ */
617
618 static const struct {
619 uint8_t dataFormat[4];
620 UDataSwapFn *swapFn;
621 } swapFns[]={
622 { { 0x52, 0x65, 0x73, 0x42 }, ures_swap }, /* dataFormat="ResB" */
623 #if !UCONFIG_NO_LEGACY_CONVERSION
624 { { 0x63, 0x6e, 0x76, 0x74 }, ucnv_swap }, /* dataFormat="cnvt" */
625 #endif
626 #if !UCONFIG_NO_CONVERSION
627 { { 0x43, 0x76, 0x41, 0x6c }, ucnv_swapAliases }, /* dataFormat="CvAl" */
628 #endif
629 #if !UCONFIG_NO_IDNA
630 { { 0x53, 0x50, 0x52, 0x50 }, usprep_swap }, /* dataFormat="SPRP" */
631 #endif
632 /* insert data formats here, descending by expected frequency of occurrence */
633 { { 0x55, 0x50, 0x72, 0x6f }, uprops_swap }, /* dataFormat="UPro" */
634
635 { { UCASE_FMT_0, UCASE_FMT_1, UCASE_FMT_2, UCASE_FMT_3 },
636 ucase_swap }, /* dataFormat="cAsE" */
637
638 { { UBIDI_FMT_0, UBIDI_FMT_1, UBIDI_FMT_2, UBIDI_FMT_3 },
639 ubidi_swap }, /* dataFormat="BiDi" */
640
641 #if !UCONFIG_NO_NORMALIZATION
642 { { 0x4e, 0x6f, 0x72, 0x6d }, unorm_swap }, /* dataFormat="Norm" */
643 { { 0x4e, 0x72, 0x6d, 0x32 }, unorm2_swap }, /* dataFormat="Nrm2" */
644 #endif
645 #if !UCONFIG_NO_COLLATION
646 { { 0x55, 0x43, 0x6f, 0x6c }, ucol_swap }, /* dataFormat="UCol" */
647 { { 0x49, 0x6e, 0x76, 0x43 }, ucol_swapInverseUCA },/* dataFormat="InvC" */
648 #endif
649 #if !UCONFIG_NO_BREAK_ITERATION
650 { { 0x42, 0x72, 0x6b, 0x20 }, ubrk_swap }, /* dataFormat="Brk " */
651 { { 0x54, 0x72, 0x44, 0x63 }, triedict_swap }, /* dataFormat="TrDc " */
652 #endif
653 { { 0x70, 0x6e, 0x61, 0x6d }, upname_swap }, /* dataFormat="pnam" */
654 { { 0x75, 0x6e, 0x61, 0x6d }, uchar_swapNames }, /* dataFormat="unam" */
655 #if !UCONFIG_NO_NORMALIZATION
656 { { 0x43, 0x66, 0x75, 0x20 }, uspoof_swap }, /* dataFormat="Cfu " */
657 #endif
658 { { 0x54, 0x65, 0x73, 0x74 }, test_swap } /* dataFormat="Test" */
659 };
660
661 U_CAPI int32_t U_EXPORT2
udata_swap(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)662 udata_swap(const UDataSwapper *ds,
663 const void *inData, int32_t length, void *outData,
664 UErrorCode *pErrorCode) {
665 char dataFormatChars[4];
666 const UDataInfo *pInfo;
667 int32_t headerSize, i, swappedLength;
668
669 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
670 return 0;
671 }
672
673 /*
674 * Preflight the header first; checks for illegal arguments, too.
675 * Do not swap the header right away because the format-specific swapper
676 * will swap it, get the headerSize again, and also use the header
677 * information. Otherwise we would have to pass some of the information
678 * and not be able to use the UDataSwapFn signature.
679 */
680 headerSize=udata_swapDataHeader(ds, inData, -1, NULL, pErrorCode);
681
682 /*
683 * If we wanted udata_swap() to also handle non-loadable data like a UTrie,
684 * then we could check here for further known magic values and structures.
685 */
686 if(U_FAILURE(*pErrorCode)) {
687 return 0; /* the data format was not recognized */
688 }
689
690 pInfo=(const UDataInfo *)((const char *)inData+4);
691
692 {
693 /* convert the data format from ASCII to Unicode to the system charset */
694 UChar u[4]={
695 pInfo->dataFormat[0], pInfo->dataFormat[1],
696 pInfo->dataFormat[2], pInfo->dataFormat[3]
697 };
698
699 if(uprv_isInvariantUString(u, 4)) {
700 u_UCharsToChars(u, dataFormatChars, 4);
701 } else {
702 dataFormatChars[0]=dataFormatChars[1]=dataFormatChars[2]=dataFormatChars[3]='?';
703 }
704 }
705
706 /* dispatch to the swap function for the dataFormat */
707 for(i=0; i<LENGTHOF(swapFns); ++i) {
708 if(0==memcmp(swapFns[i].dataFormat, pInfo->dataFormat, 4)) {
709 swappedLength=swapFns[i].swapFn(ds, inData, length, outData, pErrorCode);
710
711 if(U_FAILURE(*pErrorCode)) {
712 udata_printError(ds, "udata_swap(): failure swapping data format %02x.%02x.%02x.%02x (\"%c%c%c%c\") - %s\n",
713 pInfo->dataFormat[0], pInfo->dataFormat[1],
714 pInfo->dataFormat[2], pInfo->dataFormat[3],
715 dataFormatChars[0], dataFormatChars[1],
716 dataFormatChars[2], dataFormatChars[3],
717 u_errorName(*pErrorCode));
718 } else if(swappedLength<(length-15)) {
719 /* swapped less than expected */
720 udata_printError(ds, "udata_swap() warning: swapped only %d out of %d bytes - data format %02x.%02x.%02x.%02x (\"%c%c%c%c\")\n",
721 swappedLength, length,
722 pInfo->dataFormat[0], pInfo->dataFormat[1],
723 pInfo->dataFormat[2], pInfo->dataFormat[3],
724 dataFormatChars[0], dataFormatChars[1],
725 dataFormatChars[2], dataFormatChars[3],
726 u_errorName(*pErrorCode));
727 }
728
729 return swappedLength;
730 }
731 }
732
733 /* the dataFormat was not recognized */
734 udata_printError(ds, "udata_swap(): unknown data format %02x.%02x.%02x.%02x (\"%c%c%c%c\")\n",
735 pInfo->dataFormat[0], pInfo->dataFormat[1],
736 pInfo->dataFormat[2], pInfo->dataFormat[3],
737 dataFormatChars[0], dataFormatChars[1],
738 dataFormatChars[2], dataFormatChars[3]);
739
740 *pErrorCode=U_UNSUPPORTED_ERROR;
741 return 0;
742 }
743