• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 *******************************************************************************
3 *
4 *   Copyright (C) 2003-2012, International Business Machines
5 *   Corporation and others.  All Rights Reserved.
6 *
7 *******************************************************************************
8 *   file name:  ucol_swp.cpp
9 *   encoding:   US-ASCII
10 *   tab size:   8 (not used)
11 *   indentation:4
12 *
13 *   created on: 2003sep10
14 *   created by: Markus W. Scherer
15 *
16 *   Swap collation binaries.
17 */
18 
19 #include "unicode/udata.h" /* UDataInfo */
20 #include "utrie.h"
21 #include "udataswp.h"
22 #include "cmemory.h"
23 #include "ucol_data.h"
24 #include "ucol_swp.h"
25 
26 /* swapping ----------------------------------------------------------------- */
27 
28 /*
29  * This performs data swapping for a folded trie (see utrie.c for details).
30  */
31 
32 U_CAPI int32_t U_EXPORT2
utrie_swap(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)33 utrie_swap(const UDataSwapper *ds,
34            const void *inData, int32_t length, void *outData,
35            UErrorCode *pErrorCode) {
36     const UTrieHeader *inTrie;
37     UTrieHeader trie;
38     int32_t size;
39     UBool dataIs32;
40 
41     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
42         return 0;
43     }
44     if(ds==NULL || inData==NULL || (length>=0 && outData==NULL)) {
45         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
46         return 0;
47     }
48 
49     /* setup and swapping */
50     if(length>=0 && (uint32_t)length<sizeof(UTrieHeader)) {
51         *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
52         return 0;
53     }
54 
55     inTrie=(const UTrieHeader *)inData;
56     trie.signature=ds->readUInt32(inTrie->signature);
57     trie.options=ds->readUInt32(inTrie->options);
58     trie.indexLength=udata_readInt32(ds, inTrie->indexLength);
59     trie.dataLength=udata_readInt32(ds, inTrie->dataLength);
60 
61     if( trie.signature!=0x54726965 ||
62         (trie.options&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_SHIFT ||
63         ((trie.options>>UTRIE_OPTIONS_INDEX_SHIFT)&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_INDEX_SHIFT ||
64         trie.indexLength<UTRIE_BMP_INDEX_LENGTH ||
65         (trie.indexLength&(UTRIE_SURROGATE_BLOCK_COUNT-1))!=0 ||
66         trie.dataLength<UTRIE_DATA_BLOCK_LENGTH ||
67         (trie.dataLength&(UTRIE_DATA_GRANULARITY-1))!=0 ||
68         ((trie.options&UTRIE_OPTIONS_LATIN1_IS_LINEAR)!=0 && trie.dataLength<(UTRIE_DATA_BLOCK_LENGTH+0x100))
69     ) {
70         *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UTrie */
71         return 0;
72     }
73 
74     dataIs32=(UBool)((trie.options&UTRIE_OPTIONS_DATA_IS_32_BIT)!=0);
75     size=sizeof(UTrieHeader)+trie.indexLength*2+trie.dataLength*(dataIs32?4:2);
76 
77     if(length>=0) {
78         UTrieHeader *outTrie;
79 
80         if(length<size) {
81             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
82             return 0;
83         }
84 
85         outTrie=(UTrieHeader *)outData;
86 
87         /* swap the header */
88         ds->swapArray32(ds, inTrie, sizeof(UTrieHeader), outTrie, pErrorCode);
89 
90         /* swap the index and the data */
91         if(dataIs32) {
92             ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode);
93             ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, trie.dataLength*4,
94                                      (uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode);
95         } else {
96             ds->swapArray16(ds, inTrie+1, (trie.indexLength+trie.dataLength)*2, outTrie+1, pErrorCode);
97         }
98     }
99 
100     return size;
101 }
102 
103 #if !UCONFIG_NO_COLLATION
104 
105 /* Modified copy of the beginning of ucol_swapBinary(). */
106 U_CAPI UBool U_EXPORT2
ucol_looksLikeCollationBinary(const UDataSwapper * ds,const void * inData,int32_t length)107 ucol_looksLikeCollationBinary(const UDataSwapper *ds,
108                               const void *inData, int32_t length) {
109     const UCATableHeader *inHeader;
110     UCATableHeader header;
111 
112     if(ds==NULL || inData==NULL || length<-1) {
113         return FALSE;
114     }
115 
116     inHeader=(const UCATableHeader *)inData;
117 
118     /*
119      * The collation binary must contain at least the UCATableHeader,
120      * starting with its size field.
121      * sizeof(UCATableHeader)==42*4 in ICU 2.8
122      * check the length against the header size before reading the size field
123      */
124     uprv_memset(&header, 0, sizeof(header));
125     if(length<0) {
126         header.size=udata_readInt32(ds, inHeader->size);
127     } else if((length<(42*4) || length<(header.size=udata_readInt32(ds, inHeader->size)))) {
128         return FALSE;
129     }
130 
131     header.magic=ds->readUInt32(inHeader->magic);
132     if(!(
133         header.magic==UCOL_HEADER_MAGIC &&
134         inHeader->formatVersion[0]==3 /*&&
135         inHeader->formatVersion[1]>=0*/
136     )) {
137         return FALSE;
138     }
139 
140     if(inHeader->isBigEndian!=ds->inIsBigEndian || inHeader->charSetFamily!=ds->inCharset) {
141         return FALSE;
142     }
143 
144     return TRUE;
145 }
146 
147 /* swap a header-less collation binary, inside a resource bundle or ucadata.icu */
148 U_CAPI int32_t U_EXPORT2
ucol_swapBinary(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)149 ucol_swapBinary(const UDataSwapper *ds,
150                 const void *inData, int32_t length, void *outData,
151                 UErrorCode *pErrorCode) {
152     const uint8_t *inBytes;
153     uint8_t *outBytes;
154 
155     const UCATableHeader *inHeader;
156     UCATableHeader *outHeader;
157     UCATableHeader header;
158 
159     uint32_t count;
160 
161     /* argument checking in case we were not called from ucol_swap() */
162     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
163         return 0;
164     }
165     if(ds==NULL || inData==NULL || length<-1 || (length>0 && outData==NULL)) {
166         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
167         return 0;
168     }
169 
170     inBytes=(const uint8_t *)inData;
171     outBytes=(uint8_t *)outData;
172 
173     inHeader=(const UCATableHeader *)inData;
174     outHeader=(UCATableHeader *)outData;
175 
176     /*
177      * The collation binary must contain at least the UCATableHeader,
178      * starting with its size field.
179      * sizeof(UCATableHeader)==42*4 in ICU 2.8
180      * check the length against the header size before reading the size field
181      */
182     uprv_memset(&header, 0, sizeof(header));
183     if(length<0) {
184         header.size=udata_readInt32(ds, inHeader->size);
185     } else if((length<(42*4) || length<(header.size=udata_readInt32(ds, inHeader->size)))) {
186         udata_printError(ds, "ucol_swapBinary(): too few bytes (%d after header) for collation data\n",
187                          length);
188         *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
189         return 0;
190     }
191 
192     header.magic=ds->readUInt32(inHeader->magic);
193     if(!(
194         header.magic==UCOL_HEADER_MAGIC &&
195         inHeader->formatVersion[0]==3 /*&&
196         inHeader->formatVersion[1]>=0*/
197     )) {
198         udata_printError(ds, "ucol_swapBinary(): magic 0x%08x or format version %02x.%02x is not a collation binary\n",
199                          header.magic,
200                          inHeader->formatVersion[0], inHeader->formatVersion[1]);
201         *pErrorCode=U_UNSUPPORTED_ERROR;
202         return 0;
203     }
204 
205     if(inHeader->isBigEndian!=ds->inIsBigEndian || inHeader->charSetFamily!=ds->inCharset) {
206         udata_printError(ds, "ucol_swapBinary(): endianness %d or charset %d does not match the swapper\n",
207                          inHeader->isBigEndian, inHeader->charSetFamily);
208         *pErrorCode=U_INVALID_FORMAT_ERROR;
209         return 0;
210     }
211 
212     if(length>=0) {
213         /* copy everything, takes care of data that needs no swapping */
214         if(inBytes!=outBytes) {
215             uprv_memcpy(outBytes, inBytes, header.size);
216         }
217 
218         /* swap the necessary pieces in the order of their occurrence in the data */
219 
220         /* read more of the UCATableHeader (the size field was read above) */
221         header.options=                 ds->readUInt32(inHeader->options);
222         header.UCAConsts=               ds->readUInt32(inHeader->UCAConsts);
223         header.contractionUCACombos=    ds->readUInt32(inHeader->contractionUCACombos);
224         header.mappingPosition=         ds->readUInt32(inHeader->mappingPosition);
225         header.expansion=               ds->readUInt32(inHeader->expansion);
226         header.contractionIndex=        ds->readUInt32(inHeader->contractionIndex);
227         header.contractionCEs=          ds->readUInt32(inHeader->contractionCEs);
228         header.contractionSize=         ds->readUInt32(inHeader->contractionSize);
229         header.endExpansionCE=          ds->readUInt32(inHeader->endExpansionCE);
230         header.expansionCESize=         ds->readUInt32(inHeader->expansionCESize);
231         header.endExpansionCECount=     udata_readInt32(ds, inHeader->endExpansionCECount);
232         header.contractionUCACombosSize=udata_readInt32(ds, inHeader->contractionUCACombosSize);
233         header.scriptToLeadByte=        ds->readUInt32(inHeader->scriptToLeadByte);
234         header.leadByteToScript=        ds->readUInt32(inHeader->leadByteToScript);
235 
236         /* swap the 32-bit integers in the header */
237         ds->swapArray32(ds, inHeader, (int32_t)((const char *)&inHeader->jamoSpecial-(const char *)inHeader),
238                            outHeader, pErrorCode);
239         ds->swapArray32(ds, &(inHeader->scriptToLeadByte), sizeof(header.scriptToLeadByte) + sizeof(header.leadByteToScript),
240                            &(outHeader->scriptToLeadByte), pErrorCode);
241         /* set the output platform properties */
242         outHeader->isBigEndian=ds->outIsBigEndian;
243         outHeader->charSetFamily=ds->outCharset;
244 
245         /* swap the options */
246         if(header.options!=0) {
247             ds->swapArray32(ds, inBytes+header.options, header.expansion-header.options,
248                                outBytes+header.options, pErrorCode);
249         }
250 
251         /* swap the expansions */
252         if(header.mappingPosition!=0 && header.expansion!=0) {
253             if(header.contractionIndex!=0) {
254                 /* expansions bounded by contractions */
255                 count=header.contractionIndex-header.expansion;
256             } else {
257                 /* no contractions: expansions bounded by the main trie */
258                 count=header.mappingPosition-header.expansion;
259             }
260             ds->swapArray32(ds, inBytes+header.expansion, (int32_t)count,
261                                outBytes+header.expansion, pErrorCode);
262         }
263 
264         /* swap the contractions */
265         if(header.contractionSize!=0) {
266             /* contractionIndex: UChar[] */
267             ds->swapArray16(ds, inBytes+header.contractionIndex, header.contractionSize*2,
268                                outBytes+header.contractionIndex, pErrorCode);
269 
270             /* contractionCEs: CEs[] */
271             ds->swapArray32(ds, inBytes+header.contractionCEs, header.contractionSize*4,
272                                outBytes+header.contractionCEs, pErrorCode);
273         }
274 
275         /* swap the main trie */
276         if(header.mappingPosition!=0) {
277             count=header.endExpansionCE-header.mappingPosition;
278             utrie_swap(ds, inBytes+header.mappingPosition, (int32_t)count,
279                           outBytes+header.mappingPosition, pErrorCode);
280         }
281 
282         /* swap the max expansion table */
283         if(header.endExpansionCECount!=0) {
284             ds->swapArray32(ds, inBytes+header.endExpansionCE, header.endExpansionCECount*4,
285                                outBytes+header.endExpansionCE, pErrorCode);
286         }
287 
288         /* expansionCESize, unsafeCP, contrEndCP: uint8_t[], no need to swap */
289 
290         /* swap UCA constants */
291         if(header.UCAConsts!=0) {
292             /*
293              * if UCAConsts!=0 then contractionUCACombos because we are swapping
294              * the UCA data file, and we know that the UCA contains contractions
295              */
296             count=header.contractionUCACombos-header.UCAConsts;
297             ds->swapArray32(ds, inBytes+header.UCAConsts, header.contractionUCACombos-header.UCAConsts,
298                                outBytes+header.UCAConsts, pErrorCode);
299         }
300 
301         /* swap UCA contractions */
302         if(header.contractionUCACombosSize!=0) {
303             count=header.contractionUCACombosSize*inHeader->contractionUCACombosWidth*U_SIZEOF_UCHAR;
304             ds->swapArray16(ds, inBytes+header.contractionUCACombos, (int32_t)count,
305                                outBytes+header.contractionUCACombos, pErrorCode);
306         }
307 
308         /* swap the script to lead bytes */
309         if(header.scriptToLeadByte!=0) {
310             int indexCount = ds->readUInt16(*((uint16_t*)(inBytes+header.scriptToLeadByte))); // each entry = 2 * uint16
311             int dataCount = ds->readUInt16(*((uint16_t*)(inBytes+header.scriptToLeadByte + 2))); // each entry = uint16
312             ds->swapArray16(ds, inBytes+header.scriptToLeadByte,
313                                 4 + (4 * indexCount) + (2 * dataCount),
314                                 outBytes+header.scriptToLeadByte, pErrorCode);
315         }
316 
317         /* swap the lead byte to scripts */
318         if(header.leadByteToScript!=0) {
319             int indexCount = ds->readUInt16(*((uint16_t*)(inBytes+header.leadByteToScript))); // each entry = uint16
320             int dataCount = ds->readUInt16(*((uint16_t*)(inBytes+header.leadByteToScript + 2))); // each entry = uint16
321             ds->swapArray16(ds, inBytes+header.leadByteToScript,
322                                 4 + (2 * indexCount) + (2 * dataCount),
323                                 outBytes+header.leadByteToScript, pErrorCode);
324         }
325     }
326 
327     return header.size;
328 }
329 
330 /* swap ICU collation data like ucadata.icu */
331 U_CAPI int32_t U_EXPORT2
ucol_swap(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)332 ucol_swap(const UDataSwapper *ds,
333           const void *inData, int32_t length, void *outData,
334           UErrorCode *pErrorCode) {
335 
336     const UDataInfo *pInfo;
337     int32_t headerSize, collationSize;
338 
339     /* udata_swapDataHeader checks the arguments */
340     headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
341     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
342         return 0;
343     }
344 
345     /* check data format and format version */
346     pInfo=(const UDataInfo *)((const char *)inData+4);
347     if(!(
348         pInfo->dataFormat[0]==0x55 &&   /* dataFormat="UCol" */
349         pInfo->dataFormat[1]==0x43 &&
350         pInfo->dataFormat[2]==0x6f &&
351         pInfo->dataFormat[3]==0x6c &&
352         pInfo->formatVersion[0]==3 /*&&
353         pInfo->formatVersion[1]>=0*/
354     )) {
355         udata_printError(ds, "ucol_swap(): data format %02x.%02x.%02x.%02x (format version %02x.%02x) is not a collation file\n",
356                          pInfo->dataFormat[0], pInfo->dataFormat[1],
357                          pInfo->dataFormat[2], pInfo->dataFormat[3],
358                          pInfo->formatVersion[0], pInfo->formatVersion[1]);
359         *pErrorCode=U_UNSUPPORTED_ERROR;
360         return 0;
361     }
362 
363     collationSize=ucol_swapBinary(ds,
364                         (const char *)inData+headerSize,
365                         length>=0 ? length-headerSize : -1,
366                         (char *)outData+headerSize,
367                         pErrorCode);
368     if(U_SUCCESS(*pErrorCode)) {
369         return headerSize+collationSize;
370     } else {
371         return 0;
372     }
373 }
374 
375 /* swap inverse UCA collation data (invuca.icu) */
376 U_CAPI int32_t U_EXPORT2
ucol_swapInverseUCA(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)377 ucol_swapInverseUCA(const UDataSwapper *ds,
378                     const void *inData, int32_t length, void *outData,
379                     UErrorCode *pErrorCode) {
380     const UDataInfo *pInfo;
381     int32_t headerSize;
382 
383     const uint8_t *inBytes;
384     uint8_t *outBytes;
385 
386     const InverseUCATableHeader *inHeader;
387     InverseUCATableHeader *outHeader;
388     InverseUCATableHeader header={ 0,0,0,0,0,{0,0,0,0},{0,0,0,0,0,0,0,0} };
389 
390     /* udata_swapDataHeader checks the arguments */
391     headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
392     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
393         return 0;
394     }
395 
396     /* check data format and format version */
397     pInfo=(const UDataInfo *)((const char *)inData+4);
398     if(!(
399         pInfo->dataFormat[0]==0x49 &&   /* dataFormat="InvC" */
400         pInfo->dataFormat[1]==0x6e &&
401         pInfo->dataFormat[2]==0x76 &&
402         pInfo->dataFormat[3]==0x43 &&
403         pInfo->formatVersion[0]==2 &&
404         pInfo->formatVersion[1]>=1
405     )) {
406         udata_printError(ds, "ucol_swapInverseUCA(): data format %02x.%02x.%02x.%02x (format version %02x.%02x) is not an inverse UCA collation file\n",
407                          pInfo->dataFormat[0], pInfo->dataFormat[1],
408                          pInfo->dataFormat[2], pInfo->dataFormat[3],
409                          pInfo->formatVersion[0], pInfo->formatVersion[1]);
410         *pErrorCode=U_UNSUPPORTED_ERROR;
411         return 0;
412     }
413 
414     inBytes=(const uint8_t *)inData+headerSize;
415     outBytes=(uint8_t *)outData+headerSize;
416 
417     inHeader=(const InverseUCATableHeader *)inBytes;
418     outHeader=(InverseUCATableHeader *)outBytes;
419 
420     /*
421      * The inverse UCA collation binary must contain at least the InverseUCATableHeader,
422      * starting with its size field.
423      * sizeof(UCATableHeader)==8*4 in ICU 2.8
424      * check the length against the header size before reading the size field
425      */
426     if(length<0) {
427         header.byteSize=udata_readInt32(ds, inHeader->byteSize);
428     } else if(
429         ((length-headerSize)<(8*4) ||
430          (uint32_t)(length-headerSize)<(header.byteSize=udata_readInt32(ds, inHeader->byteSize)))
431     ) {
432         udata_printError(ds, "ucol_swapInverseUCA(): too few bytes (%d after header) for inverse UCA collation data\n",
433                          length);
434         *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
435         return 0;
436     }
437 
438     if(length>=0) {
439         /* copy everything, takes care of data that needs no swapping */
440         if(inBytes!=outBytes) {
441             uprv_memcpy(outBytes, inBytes, header.byteSize);
442         }
443 
444         /* swap the necessary pieces in the order of their occurrence in the data */
445 
446         /* read more of the InverseUCATableHeader (the byteSize field was read above) */
447         header.tableSize=   ds->readUInt32(inHeader->tableSize);
448         header.contsSize=   ds->readUInt32(inHeader->contsSize);
449         header.table=       ds->readUInt32(inHeader->table);
450         header.conts=       ds->readUInt32(inHeader->conts);
451 
452         /* swap the 32-bit integers in the header */
453         ds->swapArray32(ds, inHeader, 5*4, outHeader, pErrorCode);
454 
455         /* swap the inverse table; tableSize counts uint32_t[3] rows */
456         ds->swapArray32(ds, inBytes+header.table, header.tableSize*3*4,
457                            outBytes+header.table, pErrorCode);
458 
459         /* swap the continuation table; contsSize counts UChars */
460         ds->swapArray16(ds, inBytes+header.conts, header.contsSize*U_SIZEOF_UCHAR,
461                            outBytes+header.conts, pErrorCode);
462     }
463 
464     return headerSize+header.byteSize;
465 }
466 
467 #endif /* #if !UCONFIG_NO_COLLATION */
468