1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 * Copyright (C) 2005-2014, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 *******************************************************************************
10 * file name: swapimpl.cpp
11 * encoding: UTF-8
12 * tab size: 8 (not used)
13 * indentation:4
14 *
15 * created on: 2005may05
16 * created by: Markus W. Scherer
17 *
18 * Data file swapping functions moved here from the common library
19 * because some data is hardcoded in ICU4C and needs not be swapped any more.
20 * Moving the functions here simplifies testing (for code coverage) because
21 * we need not jump through hoops (like adding snapshots of these files
22 * to testdata).
23 *
24 * The declarations for these functions remain in the internal header files
25 * in icu/source/common/
26 */
27
28 #include "unicode/utypes.h"
29 #include "unicode/putil.h"
30 #include "unicode/udata.h"
31
32 /* Explicit include statement for std_string.h is needed
33 * for compilation on certain platforms. (e.g. AIX/VACPP)
34 */
35 #include "unicode/std_string.h"
36
37 #include "cmemory.h"
38 #include "cstring.h"
39 #include "uinvchar.h"
40 #include "uassert.h"
41 #include "uarrsort.h"
42 #include "ucmndata.h"
43 #include "udataswp.h"
44 #include "ulayout_props.h"
45
46 /* swapping implementations in common */
47
48 #include "emojiprops.h"
49 #include "uresdata.h"
50 #include "ucnv_io.h"
51 #include "uprops.h"
52 #include "ucase.h"
53 #include "ubidi_props.h"
54 #include "ucol_swp.h"
55 #include "ucnv_bld.h"
56 #include "unormimp.h"
57 #include "normalizer2impl.h"
58 #include "sprpimpl.h"
59 #include "propname.h"
60 #include "rbbidata.h"
61 #include "utrie.h"
62 #include "utrie2.h"
63 #include "dictionarydata.h"
64
65 /* swapping implementations in i18n */
66
67 #if !UCONFIG_NO_NORMALIZATION
68 #include "uspoof_impl.h"
69 #endif
70
71 U_NAMESPACE_USE
72
73 /* definitions */
74
75 /* Unicode property (value) aliases data swapping --------------------------- */
76
77 static int32_t U_CALLCONV
upname_swap(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)78 upname_swap(const UDataSwapper *ds,
79 const void *inData, int32_t length, void *outData,
80 UErrorCode *pErrorCode) {
81 /* udata_swapDataHeader checks the arguments */
82 int32_t headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
83 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
84 return 0;
85 }
86
87 /* check data format and format version */
88 const UDataInfo *pInfo=
89 reinterpret_cast<const UDataInfo *>(
90 static_cast<const char *>(inData)+4);
91 if(!(
92 pInfo->dataFormat[0]==0x70 && /* dataFormat="pnam" */
93 pInfo->dataFormat[1]==0x6e &&
94 pInfo->dataFormat[2]==0x61 &&
95 pInfo->dataFormat[3]==0x6d &&
96 pInfo->formatVersion[0]==2
97 )) {
98 udata_printError(ds, "upname_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as pnames.icu\n",
99 pInfo->dataFormat[0], pInfo->dataFormat[1],
100 pInfo->dataFormat[2], pInfo->dataFormat[3],
101 pInfo->formatVersion[0]);
102 *pErrorCode=U_UNSUPPORTED_ERROR;
103 return 0;
104 }
105
106 const uint8_t *inBytes=static_cast<const uint8_t *>(inData)+headerSize;
107 uint8_t *outBytes=static_cast<uint8_t *>(outData)+headerSize;
108
109 if(length>=0) {
110 length-=headerSize;
111 // formatVersion 2 initially has indexes[8], 32 bytes.
112 if(length<32) {
113 udata_printError(ds, "upname_swap(): too few bytes (%d after header) for pnames.icu\n",
114 (int)length);
115 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
116 return 0;
117 }
118 }
119
120 const int32_t *inIndexes=reinterpret_cast<const int32_t *>(inBytes);
121 int32_t totalSize=udata_readInt32(ds, inIndexes[PropNameData::IX_TOTAL_SIZE]);
122 if(length>=0) {
123 if(length<totalSize) {
124 udata_printError(ds, "upname_swap(): too few bytes (%d after header, should be %d) "
125 "for pnames.icu\n",
126 (int)length, (int)totalSize);
127 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
128 return 0;
129 }
130
131 int32_t numBytesIndexesAndValueMaps=
132 udata_readInt32(ds, inIndexes[PropNameData::IX_BYTE_TRIES_OFFSET]);
133
134 // Swap the indexes[] and the valueMaps[].
135 ds->swapArray32(ds, inBytes, numBytesIndexesAndValueMaps, outBytes, pErrorCode);
136
137 // Copy the rest of the data.
138 if(inBytes!=outBytes) {
139 uprv_memcpy(outBytes+numBytesIndexesAndValueMaps,
140 inBytes+numBytesIndexesAndValueMaps,
141 totalSize-numBytesIndexesAndValueMaps);
142 }
143
144 // We need not swap anything else:
145 //
146 // The ByteTries are already byte-serialized, and are fixed on ASCII.
147 // (On an EBCDIC machine, the input string is converted to lowercase ASCII
148 // while matching.)
149 //
150 // The name groups are mostly invariant characters, but since we only
151 // generate, and keep in subversion, ASCII versions of pnames.icu,
152 // and since only ICU4J uses the pnames.icu data file
153 // (the data is hardcoded in ICU4C) and ICU4J uses ASCII data files,
154 // we just copy those bytes too.
155 }
156
157 return headerSize+totalSize;
158 }
159
160 /* Unicode properties data swapping ----------------------------------------- */
161
162 static int32_t U_CALLCONV
uprops_swap(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)163 uprops_swap(const UDataSwapper *ds,
164 const void *inData, int32_t length, void *outData,
165 UErrorCode *pErrorCode) {
166 const UDataInfo *pInfo;
167 int32_t headerSize, i;
168
169 int32_t dataIndexes[UPROPS_INDEX_COUNT];
170 const int32_t *inData32;
171
172 /* udata_swapDataHeader checks the arguments */
173 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
174 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
175 return 0;
176 }
177
178 /* check data format and format version */
179 pInfo=(const UDataInfo *)((const char *)inData+4);
180 if(!(
181 pInfo->dataFormat[0]==0x55 && /* dataFormat="UPro" */
182 pInfo->dataFormat[1]==0x50 &&
183 pInfo->dataFormat[2]==0x72 &&
184 pInfo->dataFormat[3]==0x6f &&
185 (3<=pInfo->formatVersion[0] && pInfo->formatVersion[0]<=7) &&
186 (pInfo->formatVersion[0]>=7 ||
187 (pInfo->formatVersion[2]==UTRIE_SHIFT &&
188 pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT))
189 )) {
190 udata_printError(ds, "uprops_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not a Unicode properties file\n",
191 pInfo->dataFormat[0], pInfo->dataFormat[1],
192 pInfo->dataFormat[2], pInfo->dataFormat[3],
193 pInfo->formatVersion[0]);
194 *pErrorCode=U_UNSUPPORTED_ERROR;
195 return 0;
196 }
197
198 /* the properties file must contain at least the indexes array */
199 if(length>=0 && (length-headerSize)<(int32_t)sizeof(dataIndexes)) {
200 udata_printError(ds, "uprops_swap(): too few bytes (%d after header) for a Unicode properties file\n",
201 length-headerSize);
202 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
203 return 0;
204 }
205
206 /* read the indexes */
207 inData32=(const int32_t *)((const char *)inData+headerSize);
208 for(i=0; i<UPROPS_INDEX_COUNT; ++i) {
209 dataIndexes[i]=udata_readInt32(ds, inData32[i]);
210 }
211
212 /*
213 * comments are copied from the data format description in genprops/store.c
214 * indexes[] constants are in uprops.h
215 */
216 int32_t dataTop;
217 if(length>=0) {
218 int32_t *outData32;
219
220 /*
221 * In formatVersion 7, UPROPS_DATA_TOP_INDEX has the post-header data size.
222 * In earlier formatVersions, it is 0 and a lower dataIndexes entry
223 * has the top of the last item.
224 */
225 for(i=UPROPS_DATA_TOP_INDEX; i>0 && (dataTop=dataIndexes[i])==0; --i) {}
226
227 if((length-headerSize)<(4*dataTop)) {
228 udata_printError(ds, "uprops_swap(): too few bytes (%d after header) for a Unicode properties file\n",
229 length-headerSize);
230 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
231 return 0;
232 }
233
234 outData32=(int32_t *)((char *)outData+headerSize);
235
236 /* copy everything for inaccessible data (padding) */
237 if(inData32!=outData32) {
238 uprv_memcpy(outData32, inData32, 4*(size_t)dataTop);
239 }
240
241 /* swap the indexes[16] */
242 ds->swapArray32(ds, inData32, 4*UPROPS_INDEX_COUNT, outData32, pErrorCode);
243
244 /*
245 * swap the main properties UTrie
246 * PT serialized properties trie, see utrie.h (byte size: 4*(i0-16))
247 */
248 utrie_swapAnyVersion(ds,
249 inData32+UPROPS_INDEX_COUNT,
250 4*(dataIndexes[UPROPS_PROPS32_INDEX]-UPROPS_INDEX_COUNT),
251 outData32+UPROPS_INDEX_COUNT,
252 pErrorCode);
253
254 /*
255 * swap the properties and exceptions words
256 * P const uint32_t props32[i1-i0];
257 * E const uint32_t exceptions[i2-i1];
258 */
259 ds->swapArray32(ds,
260 inData32+dataIndexes[UPROPS_PROPS32_INDEX],
261 4*(dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX]-dataIndexes[UPROPS_PROPS32_INDEX]),
262 outData32+dataIndexes[UPROPS_PROPS32_INDEX],
263 pErrorCode);
264
265 /*
266 * swap the UChars
267 * U const UChar uchars[2*(i3-i2)];
268 */
269 ds->swapArray16(ds,
270 inData32+dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX],
271 4*(dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX]-dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX]),
272 outData32+dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX],
273 pErrorCode);
274
275 /*
276 * swap the additional UTrie
277 * i3 additionalTrieIndex; -- 32-bit unit index to the additional trie for more properties
278 */
279 utrie_swapAnyVersion(ds,
280 inData32+dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX],
281 4*(dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX]-dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX]),
282 outData32+dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX],
283 pErrorCode);
284
285 /*
286 * swap the properties vectors
287 * PV const uint32_t propsVectors[(i6-i4)/i5][i5]==uint32_t propsVectors[i6-i4];
288 */
289 ds->swapArray32(ds,
290 inData32+dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX],
291 4*(dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX]-dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX]),
292 outData32+dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX],
293 pErrorCode);
294
295 // swap the Script_Extensions data
296 // SCX const uint16_t scriptExtensions[2*(i7-i6)];
297 ds->swapArray16(ds,
298 inData32+dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX],
299 4*(dataIndexes[UPROPS_RESERVED_INDEX_7]-dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX]),
300 outData32+dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX],
301 pErrorCode);
302 }
303
304 /* i7 reservedIndex7; -- 32-bit unit index to the top of the Script_Extensions data */
305 return headerSize+4*dataIndexes[UPROPS_RESERVED_INDEX_7];
306 }
307
308 /* Unicode case mapping data swapping --------------------------------------- */
309
310 static int32_t U_CALLCONV
ucase_swap(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)311 ucase_swap(const UDataSwapper *ds,
312 const void *inData, int32_t length, void *outData,
313 UErrorCode *pErrorCode) {
314 const UDataInfo *pInfo;
315 int32_t headerSize;
316
317 const uint8_t *inBytes;
318 uint8_t *outBytes;
319
320 const int32_t *inIndexes;
321 int32_t indexes[16];
322
323 int32_t i, offset, count, size;
324
325 /* udata_swapDataHeader checks the arguments */
326 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
327 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
328 return 0;
329 }
330
331 /* check data format and format version */
332 pInfo=(const UDataInfo *)((const char *)inData+4);
333 if(!(
334 pInfo->dataFormat[0]==UCASE_FMT_0 && /* dataFormat="cAsE" */
335 pInfo->dataFormat[1]==UCASE_FMT_1 &&
336 pInfo->dataFormat[2]==UCASE_FMT_2 &&
337 pInfo->dataFormat[3]==UCASE_FMT_3 &&
338 ((pInfo->formatVersion[0]==1 &&
339 pInfo->formatVersion[2]==UTRIE_SHIFT &&
340 pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT) ||
341 (2<=pInfo->formatVersion[0] && pInfo->formatVersion[0]<=4))
342 )) {
343 udata_printError(ds, "ucase_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as case mapping data\n",
344 pInfo->dataFormat[0], pInfo->dataFormat[1],
345 pInfo->dataFormat[2], pInfo->dataFormat[3],
346 pInfo->formatVersion[0]);
347 *pErrorCode=U_UNSUPPORTED_ERROR;
348 return 0;
349 }
350
351 inBytes=(const uint8_t *)inData+headerSize;
352 outBytes=(uint8_t *)outData+headerSize;
353
354 inIndexes=(const int32_t *)inBytes;
355
356 if(length>=0) {
357 length-=headerSize;
358 if(length<16*4) {
359 udata_printError(ds, "ucase_swap(): too few bytes (%d after header) for case mapping data\n",
360 length);
361 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
362 return 0;
363 }
364 }
365
366 /* read the first 16 indexes (ICU 3.2/format version 1: UCASE_IX_TOP==16, might grow) */
367 for(i=0; i<16; ++i) {
368 indexes[i]=udata_readInt32(ds, inIndexes[i]);
369 }
370
371 /* get the total length of the data */
372 size=indexes[UCASE_IX_LENGTH];
373
374 if(length>=0) {
375 if(length<size) {
376 udata_printError(ds, "ucase_swap(): too few bytes (%d after header) for all of case mapping data\n",
377 length);
378 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
379 return 0;
380 }
381
382 /* copy the data for inaccessible bytes */
383 if(inBytes!=outBytes) {
384 uprv_memcpy(outBytes, inBytes, size);
385 }
386
387 offset=0;
388
389 /* swap the int32_t indexes[] */
390 count=indexes[UCASE_IX_INDEX_TOP]*4;
391 ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
392 offset+=count;
393
394 /* swap the UTrie */
395 count=indexes[UCASE_IX_TRIE_SIZE];
396 utrie_swapAnyVersion(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
397 offset+=count;
398
399 /* swap the uint16_t exceptions[] and unfold[] */
400 count=(indexes[UCASE_IX_EXC_LENGTH]+indexes[UCASE_IX_UNFOLD_LENGTH])*2;
401 ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
402 offset+=count;
403
404 U_ASSERT(offset==size);
405 }
406
407 return headerSize+size;
408 }
409
410 /* Unicode bidi/shaping data swapping --------------------------------------- */
411
412 static int32_t U_CALLCONV
ubidi_swap(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)413 ubidi_swap(const UDataSwapper *ds,
414 const void *inData, int32_t length, void *outData,
415 UErrorCode *pErrorCode) {
416 const UDataInfo *pInfo;
417 int32_t headerSize;
418
419 const uint8_t *inBytes;
420 uint8_t *outBytes;
421
422 const int32_t *inIndexes;
423 int32_t indexes[16];
424
425 int32_t i, offset, count, size;
426
427 /* udata_swapDataHeader checks the arguments */
428 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
429 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
430 return 0;
431 }
432
433 /* check data format and format version */
434 pInfo=(const UDataInfo *)((const char *)inData+4);
435 if(!(
436 pInfo->dataFormat[0]==UBIDI_FMT_0 && /* dataFormat="BiDi" */
437 pInfo->dataFormat[1]==UBIDI_FMT_1 &&
438 pInfo->dataFormat[2]==UBIDI_FMT_2 &&
439 pInfo->dataFormat[3]==UBIDI_FMT_3 &&
440 ((pInfo->formatVersion[0]==1 &&
441 pInfo->formatVersion[2]==UTRIE_SHIFT &&
442 pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT) ||
443 pInfo->formatVersion[0]==2)
444 )) {
445 udata_printError(ds, "ubidi_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as bidi/shaping data\n",
446 pInfo->dataFormat[0], pInfo->dataFormat[1],
447 pInfo->dataFormat[2], pInfo->dataFormat[3],
448 pInfo->formatVersion[0]);
449 *pErrorCode=U_UNSUPPORTED_ERROR;
450 return 0;
451 }
452
453 inBytes=(const uint8_t *)inData+headerSize;
454 outBytes=(uint8_t *)outData+headerSize;
455
456 inIndexes=(const int32_t *)inBytes;
457
458 if(length>=0) {
459 length-=headerSize;
460 if(length<16*4) {
461 udata_printError(ds, "ubidi_swap(): too few bytes (%d after header) for bidi/shaping data\n",
462 length);
463 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
464 return 0;
465 }
466 }
467
468 /* read the first 16 indexes (ICU 3.4/format version 1: UBIDI_IX_TOP==16, might grow) */
469 for(i=0; i<16; ++i) {
470 indexes[i]=udata_readInt32(ds, inIndexes[i]);
471 }
472
473 /* get the total length of the data */
474 size=indexes[UBIDI_IX_LENGTH];
475
476 if(length>=0) {
477 if(length<size) {
478 udata_printError(ds, "ubidi_swap(): too few bytes (%d after header) for all of bidi/shaping data\n",
479 length);
480 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
481 return 0;
482 }
483
484 /* copy the data for inaccessible bytes */
485 if(inBytes!=outBytes) {
486 uprv_memcpy(outBytes, inBytes, size);
487 }
488
489 offset=0;
490
491 /* swap the int32_t indexes[] */
492 count=indexes[UBIDI_IX_INDEX_TOP]*4;
493 ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
494 offset+=count;
495
496 /* swap the UTrie */
497 count=indexes[UBIDI_IX_TRIE_SIZE];
498 utrie_swapAnyVersion(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
499 offset+=count;
500
501 /* swap the uint32_t mirrors[] */
502 count=indexes[UBIDI_IX_MIRROR_LENGTH]*4;
503 ds->swapArray32(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
504 offset+=count;
505
506 /* just skip the uint8_t jgArray[] and jgArray2[] */
507 count=indexes[UBIDI_IX_JG_LIMIT]-indexes[UBIDI_IX_JG_START];
508 offset+=count;
509 count=indexes[UBIDI_IX_JG_LIMIT2]-indexes[UBIDI_IX_JG_START2];
510 offset+=count;
511
512 U_ASSERT(offset==size);
513 }
514
515 return headerSize+size;
516 }
517
518 /* Unicode normalization data swapping -------------------------------------- */
519
520 #if !UCONFIG_NO_NORMALIZATION
521
522 static int32_t U_CALLCONV
unorm_swap(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)523 unorm_swap(const UDataSwapper *ds,
524 const void *inData, int32_t length, void *outData,
525 UErrorCode *pErrorCode) {
526 const UDataInfo *pInfo;
527 int32_t headerSize;
528
529 const uint8_t *inBytes;
530 uint8_t *outBytes;
531
532 const int32_t *inIndexes;
533 int32_t indexes[32];
534
535 int32_t i, offset, count, size;
536
537 /* udata_swapDataHeader checks the arguments */
538 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
539 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
540 return 0;
541 }
542
543 /* check data format and format version */
544 pInfo=(const UDataInfo *)((const char *)inData+4);
545 if(!(
546 pInfo->dataFormat[0]==0x4e && /* dataFormat="Norm" */
547 pInfo->dataFormat[1]==0x6f &&
548 pInfo->dataFormat[2]==0x72 &&
549 pInfo->dataFormat[3]==0x6d &&
550 pInfo->formatVersion[0]==2
551 )) {
552 udata_printError(ds, "unorm_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as unorm.icu\n",
553 pInfo->dataFormat[0], pInfo->dataFormat[1],
554 pInfo->dataFormat[2], pInfo->dataFormat[3],
555 pInfo->formatVersion[0]);
556 *pErrorCode=U_UNSUPPORTED_ERROR;
557 return 0;
558 }
559
560 inBytes=(const uint8_t *)inData+headerSize;
561 outBytes=(uint8_t *)outData+headerSize;
562
563 inIndexes=(const int32_t *)inBytes;
564
565 if(length>=0) {
566 length-=headerSize;
567 if(length<32*4) {
568 udata_printError(ds, "unorm_swap(): too few bytes (%d after header) for unorm.icu\n",
569 length);
570 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
571 return 0;
572 }
573 }
574
575 /* read the first 32 indexes (ICU 2.8/format version 2.2: _NORM_INDEX_TOP==32, might grow) */
576 for(i=0; i<32; ++i) {
577 indexes[i]=udata_readInt32(ds, inIndexes[i]);
578 }
579
580 /* calculate the total length of the data */
581 size=
582 32*4+ /* size of indexes[] */
583 indexes[_NORM_INDEX_TRIE_SIZE]+
584 indexes[_NORM_INDEX_UCHAR_COUNT]*2+
585 indexes[_NORM_INDEX_COMBINE_DATA_COUNT]*2+
586 indexes[_NORM_INDEX_FCD_TRIE_SIZE]+
587 indexes[_NORM_INDEX_AUX_TRIE_SIZE]+
588 indexes[_NORM_INDEX_CANON_SET_COUNT]*2;
589
590 if(length>=0) {
591 if(length<size) {
592 udata_printError(ds, "unorm_swap(): too few bytes (%d after header) for all of unorm.icu\n",
593 length);
594 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
595 return 0;
596 }
597
598 /* copy the data for inaccessible bytes */
599 if(inBytes!=outBytes) {
600 uprv_memcpy(outBytes, inBytes, size);
601 }
602
603 offset=0;
604
605 /* swap the indexes[] */
606 count=32*4;
607 ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
608 offset+=count;
609
610 /* swap the main UTrie */
611 count=indexes[_NORM_INDEX_TRIE_SIZE];
612 utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
613 offset+=count;
614
615 /* swap the uint16_t extraData[] and the uint16_t combiningTable[] */
616 count=(indexes[_NORM_INDEX_UCHAR_COUNT]+indexes[_NORM_INDEX_COMBINE_DATA_COUNT])*2;
617 ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
618 offset+=count;
619
620 /* swap the FCD UTrie */
621 count=indexes[_NORM_INDEX_FCD_TRIE_SIZE];
622 if(count!=0) {
623 utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
624 offset+=count;
625 }
626
627 /* swap the aux UTrie */
628 count=indexes[_NORM_INDEX_AUX_TRIE_SIZE];
629 if(count!=0) {
630 utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
631 offset+=count;
632 }
633
634 /* swap the uint16_t combiningTable[] */
635 count=indexes[_NORM_INDEX_CANON_SET_COUNT]*2;
636 ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
637 offset+=count;
638 }
639
640 return headerSize+size;
641 }
642
643 #endif
644
645 // Unicode text layout properties data swapping --------------------------------
646
647 static int32_t U_CALLCONV
ulayout_swap(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)648 ulayout_swap(const UDataSwapper *ds,
649 const void *inData, int32_t length, void *outData,
650 UErrorCode *pErrorCode) {
651 // udata_swapDataHeader checks the arguments.
652 int32_t headerSize = udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
653 if (pErrorCode == nullptr || U_FAILURE(*pErrorCode)) {
654 return 0;
655 }
656
657 // Check data format and format version.
658 const UDataInfo *pInfo = (const UDataInfo *)((const char *)inData + 4);
659 if (!(
660 pInfo->dataFormat[0] == ULAYOUT_FMT_0 && // dataFormat="Layo"
661 pInfo->dataFormat[1] == ULAYOUT_FMT_1 &&
662 pInfo->dataFormat[2] == ULAYOUT_FMT_2 &&
663 pInfo->dataFormat[3] == ULAYOUT_FMT_3 &&
664 pInfo->formatVersion[0] == 1)) {
665 udata_printError(ds,
666 "ulayout_swap(): data format %02x.%02x.%02x.%02x (format version %02x) "
667 "is not recognized as text layout properties data\n",
668 pInfo->dataFormat[0], pInfo->dataFormat[1],
669 pInfo->dataFormat[2], pInfo->dataFormat[3],
670 pInfo->formatVersion[0]);
671 *pErrorCode = U_UNSUPPORTED_ERROR;
672 return 0;
673 }
674
675 const uint8_t *inBytes = (const uint8_t *)inData + headerSize;
676 uint8_t *outBytes = (uint8_t *)outData + headerSize;
677
678 const int32_t *inIndexes = (const int32_t *)inBytes;
679
680 if (length >= 0) {
681 length -= headerSize;
682 if (length < 12 * 4) {
683 udata_printError(ds,
684 "ulayout_swap(): too few bytes (%d after header) for text layout properties data\n",
685 length);
686 *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
687 return 0;
688 }
689 }
690
691 int32_t indexesLength = udata_readInt32(ds, inIndexes[ULAYOUT_IX_INDEXES_LENGTH]);
692 if (indexesLength < 12) {
693 udata_printError(ds,
694 "ulayout_swap(): too few indexes (%d) for text layout properties data\n",
695 indexesLength);
696 *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
697 return 0;
698 }
699
700 // Read the data offsets before swapping anything.
701 int32_t indexes[ULAYOUT_IX_TRIES_TOP + 1];
702 for (int32_t i = ULAYOUT_IX_INPC_TRIE_TOP; i <= ULAYOUT_IX_TRIES_TOP; ++i) {
703 indexes[i] = udata_readInt32(ds, inIndexes[i]);
704 }
705 int32_t size = indexes[ULAYOUT_IX_TRIES_TOP];
706
707 if (length >= 0) {
708 if (length < size) {
709 udata_printError(ds,
710 "ulayout_swap(): too few bytes (%d after header) "
711 "for all of text layout properties data\n",
712 length);
713 *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
714 return 0;
715 }
716
717 // Copy the data for inaccessible bytes.
718 if (inBytes != outBytes) {
719 uprv_memcpy(outBytes, inBytes, size);
720 }
721
722 // Swap the int32_t indexes[].
723 int32_t offset = 0;
724 int32_t count = indexesLength * 4;
725 ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
726 offset += count;
727
728 // Swap each trie.
729 for (int32_t i = ULAYOUT_IX_INPC_TRIE_TOP; i <= ULAYOUT_IX_TRIES_TOP; ++i) {
730 int32_t top = indexes[i];
731 count = top - offset;
732 U_ASSERT(count >= 0);
733 if (count >= 16) {
734 utrie_swapAnyVersion(ds, inBytes + offset, count, outBytes + offset, pErrorCode);
735 }
736 offset = top;
737 }
738
739 U_ASSERT(offset == size);
740 }
741
742 return headerSize + size;
743 }
744
745 // Unicode emoji properties data swapping --------------------------------------
746
747 static int32_t U_CALLCONV
uemoji_swap(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)748 uemoji_swap(const UDataSwapper *ds,
749 const void *inData, int32_t length, void *outData,
750 UErrorCode *pErrorCode) {
751 // udata_swapDataHeader checks the arguments.
752 int32_t headerSize = udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
753 if (pErrorCode == nullptr || U_FAILURE(*pErrorCode)) {
754 return 0;
755 }
756
757 // Check data format and format version.
758 const UDataInfo *pInfo = (const UDataInfo *)((const char *)inData + 4);
759 if (!(
760 pInfo->dataFormat[0] == u'E' &&
761 pInfo->dataFormat[1] == u'm' &&
762 pInfo->dataFormat[2] == u'o' &&
763 pInfo->dataFormat[3] == u'j' &&
764 pInfo->formatVersion[0] == 1)) {
765 udata_printError(ds,
766 "uemoji_swap(): data format %02x.%02x.%02x.%02x (format version %02x) "
767 "is not recognized as emoji properties data\n",
768 pInfo->dataFormat[0], pInfo->dataFormat[1],
769 pInfo->dataFormat[2], pInfo->dataFormat[3],
770 pInfo->formatVersion[0]);
771 *pErrorCode = U_UNSUPPORTED_ERROR;
772 return 0;
773 }
774
775 const uint8_t *inBytes = (const uint8_t *)inData + headerSize;
776 uint8_t *outBytes = (uint8_t *)outData + headerSize;
777
778 const int32_t *inIndexes = (const int32_t *)inBytes;
779
780 if (length >= 0) {
781 length -= headerSize;
782 // We expect to read at least EmojiProps::IX_TOTAL_SIZE.
783 if (length < 14 * 4) {
784 udata_printError(ds,
785 "uemoji_swap(): too few bytes (%d after header) for emoji properties data\n",
786 length);
787 *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
788 return 0;
789 }
790 }
791
792 // First offset after indexes[].
793 int32_t cpTrieOffset = udata_readInt32(ds, inIndexes[EmojiProps::IX_CPTRIE_OFFSET]);
794 int32_t indexesLength = cpTrieOffset / 4;
795 if (indexesLength < 14) {
796 udata_printError(ds,
797 "uemoji_swap(): too few indexes (%d) for emoji properties data\n",
798 indexesLength);
799 *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
800 return 0;
801 }
802
803 // Read the data offsets before swapping anything.
804 int32_t indexes[EmojiProps::IX_TOTAL_SIZE + 1];
805 indexes[0] = cpTrieOffset;
806 for (int32_t i = 1; i <= EmojiProps::IX_TOTAL_SIZE; ++i) {
807 indexes[i] = udata_readInt32(ds, inIndexes[i]);
808 }
809 int32_t size = indexes[EmojiProps::IX_TOTAL_SIZE];
810
811 if (length >= 0) {
812 if (length < size) {
813 udata_printError(ds,
814 "uemoji_swap(): too few bytes (%d after header) "
815 "for all of emoji properties data\n",
816 length);
817 *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
818 return 0;
819 }
820
821 // Copy the data for inaccessible bytes.
822 if (inBytes != outBytes) {
823 uprv_memcpy(outBytes, inBytes, size);
824 }
825
826 // Swap the int32_t indexes[].
827 int32_t offset = 0;
828 int32_t top = cpTrieOffset;
829 ds->swapArray32(ds, inBytes, top - offset, outBytes, pErrorCode);
830 offset = top;
831
832 // Swap the code point trie.
833 top = indexes[EmojiProps::IX_CPTRIE_OFFSET + 1];
834 int32_t count = top - offset;
835 U_ASSERT(count >= 0);
836 if (count >= 16) {
837 utrie_swapAnyVersion(ds, inBytes + offset, count, outBytes + offset, pErrorCode);
838 }
839 offset = top;
840
841 // Swap all of the string tries.
842 // They are all serialized as arrays of 16-bit units.
843 offset = indexes[EmojiProps::IX_BASIC_EMOJI_TRIE_OFFSET];
844 top = indexes[EmojiProps::IX_RGI_EMOJI_ZWJ_SEQUENCE_TRIE_OFFSET + 1];
845 ds->swapArray16(ds, inBytes + offset, top - offset, outBytes + offset, pErrorCode);
846 offset = top;
847
848 U_ASSERT(offset == size);
849 }
850
851 return headerSize + size;
852 }
853
854 /* Swap 'Test' data from gentest */
855 static int32_t U_CALLCONV
test_swap(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)856 test_swap(const UDataSwapper *ds,
857 const void *inData, int32_t length, void *outData,
858 UErrorCode *pErrorCode) {
859 const UDataInfo *pInfo;
860 int32_t headerSize;
861
862 const uint8_t *inBytes;
863 uint8_t *outBytes;
864
865 int32_t offset;
866
867 /* udata_swapDataHeader checks the arguments */
868 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
869 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
870 udata_printError(ds, "test_swap(): data header swap failed %s\n", pErrorCode != NULL ? u_errorName(*pErrorCode) : "pErrorCode is NULL");
871 return 0;
872 }
873
874 /* check data format and format version */
875 pInfo=(const UDataInfo *)((const char *)inData+4);
876 if(!(
877 pInfo->dataFormat[0]==0x54 && /* dataFormat="Norm" */
878 pInfo->dataFormat[1]==0x65 &&
879 pInfo->dataFormat[2]==0x73 &&
880 pInfo->dataFormat[3]==0x74 &&
881 pInfo->formatVersion[0]==1
882 )) {
883 udata_printError(ds, "test_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as testdata\n",
884 pInfo->dataFormat[0], pInfo->dataFormat[1],
885 pInfo->dataFormat[2], pInfo->dataFormat[3],
886 pInfo->formatVersion[0]);
887 *pErrorCode=U_UNSUPPORTED_ERROR;
888 return 0;
889 }
890
891 inBytes=(const uint8_t *)inData+headerSize;
892 outBytes=(uint8_t *)outData+headerSize;
893
894 int32_t size16 = 2; // 16bit plus padding
895 int32_t sizeStr = 5; // 4 char inv-str plus null
896 int32_t size = size16 + sizeStr;
897
898 if(length>=0) {
899 if(length<size) {
900 udata_printError(ds, "test_swap(): too few bytes (%d after header, wanted %d) for all of testdata\n",
901 length, size);
902 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
903 return 0;
904 }
905
906 offset =0;
907 /* swap a 1 entry array */
908 ds->swapArray16(ds, inBytes+offset, size16, outBytes+offset, pErrorCode);
909 offset+=size16;
910 ds->swapInvChars(ds, inBytes+offset, sizeStr, outBytes+offset, pErrorCode);
911 }
912
913 return headerSize+size;
914 }
915
916 /* swap any data (except a .dat package) ------------------------------------ */
917
918 static const struct {
919 uint8_t dataFormat[4];
920 UDataSwapFn *swapFn;
921 } swapFns[]={
922 { { 0x52, 0x65, 0x73, 0x42 }, ures_swap }, /* dataFormat="ResB" */
923 #if !UCONFIG_NO_LEGACY_CONVERSION
924 { { 0x63, 0x6e, 0x76, 0x74 }, ucnv_swap }, /* dataFormat="cnvt" */
925 #endif
926 #if !UCONFIG_NO_CONVERSION
927 { { 0x43, 0x76, 0x41, 0x6c }, ucnv_swapAliases }, /* dataFormat="CvAl" */
928 #endif
929 #if !UCONFIG_NO_IDNA
930 { { 0x53, 0x50, 0x52, 0x50 }, usprep_swap }, /* dataFormat="SPRP" */
931 #endif
932 /* insert data formats here, descending by expected frequency of occurrence */
933 { { 0x55, 0x50, 0x72, 0x6f }, uprops_swap }, /* dataFormat="UPro" */
934
935 { { UCASE_FMT_0, UCASE_FMT_1, UCASE_FMT_2, UCASE_FMT_3 },
936 ucase_swap }, /* dataFormat="cAsE" */
937
938 { { UBIDI_FMT_0, UBIDI_FMT_1, UBIDI_FMT_2, UBIDI_FMT_3 },
939 ubidi_swap }, /* dataFormat="BiDi" */
940
941 #if !UCONFIG_NO_NORMALIZATION
942 { { 0x4e, 0x6f, 0x72, 0x6d }, unorm_swap }, /* dataFormat="Norm" */
943 { { 0x4e, 0x72, 0x6d, 0x32 }, unorm2_swap }, /* dataFormat="Nrm2" */
944 #endif
945
946 { { ULAYOUT_FMT_0, ULAYOUT_FMT_1, ULAYOUT_FMT_2, ULAYOUT_FMT_3 },
947 ulayout_swap }, // dataFormat="Layo"
948
949 { { u'E', u'm', u'o', u'j' }, uemoji_swap },
950
951 #if !UCONFIG_NO_COLLATION
952 { { 0x55, 0x43, 0x6f, 0x6c }, ucol_swap }, /* dataFormat="UCol" */
953 { { 0x49, 0x6e, 0x76, 0x43 }, ucol_swapInverseUCA },/* dataFormat="InvC" */
954 #endif
955 #if !UCONFIG_NO_BREAK_ITERATION
956 { { 0x42, 0x72, 0x6b, 0x20 }, ubrk_swap }, /* dataFormat="Brk " */
957 { { 0x44, 0x69, 0x63, 0x74 }, udict_swap }, /* dataFormat="Dict" */
958 #endif
959 { { 0x70, 0x6e, 0x61, 0x6d }, upname_swap }, /* dataFormat="pnam" */
960 { { 0x75, 0x6e, 0x61, 0x6d }, uchar_swapNames }, /* dataFormat="unam" */
961 #if !UCONFIG_NO_NORMALIZATION
962 { { 0x43, 0x66, 0x75, 0x20 }, uspoof_swap }, /* dataFormat="Cfu " */
963 #endif
964 { { 0x54, 0x65, 0x73, 0x74 }, test_swap } /* dataFormat="Test" */
965 };
966
967 U_CAPI int32_t U_EXPORT2
udata_swap(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)968 udata_swap(const UDataSwapper *ds,
969 const void *inData, int32_t length, void *outData,
970 UErrorCode *pErrorCode) {
971 char dataFormatChars[4];
972 const UDataInfo *pInfo;
973 int32_t i, swappedLength;
974
975 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
976 return 0;
977 }
978
979 /*
980 * Preflight the header first; checks for illegal arguments, too.
981 * Do not swap the header right away because the format-specific swapper
982 * will swap it, get the headerSize again, and also use the header
983 * information. Otherwise we would have to pass some of the information
984 * and not be able to use the UDataSwapFn signature.
985 */
986 udata_swapDataHeader(ds, inData, -1, NULL, pErrorCode);
987
988 /*
989 * If we wanted udata_swap() to also handle non-loadable data like a UTrie,
990 * then we could check here for further known magic values and structures.
991 */
992 if(U_FAILURE(*pErrorCode)) {
993 return 0; /* the data format was not recognized */
994 }
995
996 pInfo=(const UDataInfo *)((const char *)inData+4);
997
998 {
999 /* convert the data format from ASCII to Unicode to the system charset */
1000 UChar u[4]={
1001 pInfo->dataFormat[0], pInfo->dataFormat[1],
1002 pInfo->dataFormat[2], pInfo->dataFormat[3]
1003 };
1004
1005 if(uprv_isInvariantUString(u, 4)) {
1006 u_UCharsToChars(u, dataFormatChars, 4);
1007 } else {
1008 dataFormatChars[0]=dataFormatChars[1]=dataFormatChars[2]=dataFormatChars[3]='?';
1009 }
1010 }
1011
1012 /* dispatch to the swap function for the dataFormat */
1013 for(i=0; i<UPRV_LENGTHOF(swapFns); ++i) {
1014 if(0==memcmp(swapFns[i].dataFormat, pInfo->dataFormat, 4)) {
1015 swappedLength=swapFns[i].swapFn(ds, inData, length, outData, pErrorCode);
1016
1017 if(U_FAILURE(*pErrorCode)) {
1018 udata_printError(ds, "udata_swap(): failure swapping data format %02x.%02x.%02x.%02x (\"%c%c%c%c\") - %s\n",
1019 pInfo->dataFormat[0], pInfo->dataFormat[1],
1020 pInfo->dataFormat[2], pInfo->dataFormat[3],
1021 dataFormatChars[0], dataFormatChars[1],
1022 dataFormatChars[2], dataFormatChars[3],
1023 u_errorName(*pErrorCode));
1024 } else if(swappedLength<(length-15)) {
1025 /* swapped less than expected */
1026 udata_printError(ds, "udata_swap() warning: swapped only %d out of %d bytes - data format %02x.%02x.%02x.%02x (\"%c%c%c%c\")\n",
1027 swappedLength, length,
1028 pInfo->dataFormat[0], pInfo->dataFormat[1],
1029 pInfo->dataFormat[2], pInfo->dataFormat[3],
1030 dataFormatChars[0], dataFormatChars[1],
1031 dataFormatChars[2], dataFormatChars[3],
1032 u_errorName(*pErrorCode));
1033 }
1034
1035 return swappedLength;
1036 }
1037 }
1038
1039 /* the dataFormat was not recognized */
1040 udata_printError(ds, "udata_swap(): unknown data format %02x.%02x.%02x.%02x (\"%c%c%c%c\")\n",
1041 pInfo->dataFormat[0], pInfo->dataFormat[1],
1042 pInfo->dataFormat[2], pInfo->dataFormat[3],
1043 dataFormatChars[0], dataFormatChars[1],
1044 dataFormatChars[2], dataFormatChars[3]);
1045
1046 *pErrorCode=U_UNSUPPORTED_ERROR;
1047 return 0;
1048 }
1049