1 // © 2018 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3
4 // utrie_swap.cpp
5 // created: 2018aug08 Markus W. Scherer
6
7 #include "unicode/utypes.h"
8 #include "cmemory.h"
9 #include "ucptrie_impl.h"
10 #include "udataswp.h"
11 #include "utrie.h"
12 #include "utrie2_impl.h"
13
14 // These functions for swapping different generations of ICU code point tries are here
15 // so that their implementation files need not depend on swapper code,
16 // need not depend on each other, and so that other swapper code
17 // need not depend on other trie code.
18
19 namespace {
20
21 constexpr int32_t ASCII_LIMIT = 0x80;
22
23 } // namespace
24
25 U_CAPI int32_t U_EXPORT2
utrie_swap(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)26 utrie_swap(const UDataSwapper *ds,
27 const void *inData, int32_t length, void *outData,
28 UErrorCode *pErrorCode) {
29 const UTrieHeader *inTrie;
30 UTrieHeader trie;
31 int32_t size;
32 UBool dataIs32;
33
34 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
35 return 0;
36 }
37 if(ds==NULL || inData==NULL || (length>=0 && outData==NULL)) {
38 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
39 return 0;
40 }
41
42 /* setup and swapping */
43 if(length>=0 && (uint32_t)length<sizeof(UTrieHeader)) {
44 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
45 return 0;
46 }
47
48 inTrie=(const UTrieHeader *)inData;
49 trie.signature=ds->readUInt32(inTrie->signature);
50 trie.options=ds->readUInt32(inTrie->options);
51 trie.indexLength=udata_readInt32(ds, inTrie->indexLength);
52 trie.dataLength=udata_readInt32(ds, inTrie->dataLength);
53
54 if( trie.signature!=0x54726965 ||
55 (trie.options&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_SHIFT ||
56 ((trie.options>>UTRIE_OPTIONS_INDEX_SHIFT)&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_INDEX_SHIFT ||
57 trie.indexLength<UTRIE_BMP_INDEX_LENGTH ||
58 (trie.indexLength&(UTRIE_SURROGATE_BLOCK_COUNT-1))!=0 ||
59 trie.dataLength<UTRIE_DATA_BLOCK_LENGTH ||
60 (trie.dataLength&(UTRIE_DATA_GRANULARITY-1))!=0 ||
61 ((trie.options&UTRIE_OPTIONS_LATIN1_IS_LINEAR)!=0 && trie.dataLength<(UTRIE_DATA_BLOCK_LENGTH+0x100))
62 ) {
63 *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UTrie */
64 return 0;
65 }
66
67 dataIs32=(UBool)((trie.options&UTRIE_OPTIONS_DATA_IS_32_BIT)!=0);
68 size=sizeof(UTrieHeader)+trie.indexLength*2+trie.dataLength*(dataIs32?4:2);
69
70 if(length>=0) {
71 UTrieHeader *outTrie;
72
73 if(length<size) {
74 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
75 return 0;
76 }
77
78 outTrie=(UTrieHeader *)outData;
79
80 /* swap the header */
81 ds->swapArray32(ds, inTrie, sizeof(UTrieHeader), outTrie, pErrorCode);
82
83 /* swap the index and the data */
84 if(dataIs32) {
85 ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode);
86 ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, trie.dataLength*4,
87 (uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode);
88 } else {
89 ds->swapArray16(ds, inTrie+1, (trie.indexLength+trie.dataLength)*2, outTrie+1, pErrorCode);
90 }
91 }
92
93 return size;
94 }
95
96 U_CAPI int32_t U_EXPORT2
utrie2_swap(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)97 utrie2_swap(const UDataSwapper *ds,
98 const void *inData, int32_t length, void *outData,
99 UErrorCode *pErrorCode) {
100 const UTrie2Header *inTrie;
101 UTrie2Header trie;
102 int32_t dataLength, size;
103 UTrie2ValueBits valueBits;
104
105 if(U_FAILURE(*pErrorCode)) {
106 return 0;
107 }
108 if(ds==NULL || inData==NULL || (length>=0 && outData==NULL)) {
109 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
110 return 0;
111 }
112
113 /* setup and swapping */
114 if(length>=0 && length<(int32_t)sizeof(UTrie2Header)) {
115 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
116 return 0;
117 }
118
119 inTrie=(const UTrie2Header *)inData;
120 trie.signature=ds->readUInt32(inTrie->signature);
121 trie.options=ds->readUInt16(inTrie->options);
122 trie.indexLength=ds->readUInt16(inTrie->indexLength);
123 trie.shiftedDataLength=ds->readUInt16(inTrie->shiftedDataLength);
124
125 valueBits=(UTrie2ValueBits)(trie.options&UTRIE2_OPTIONS_VALUE_BITS_MASK);
126 dataLength=(int32_t)trie.shiftedDataLength<<UTRIE2_INDEX_SHIFT;
127
128 if( trie.signature!=UTRIE2_SIG ||
129 valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits ||
130 trie.indexLength<UTRIE2_INDEX_1_OFFSET ||
131 dataLength<UTRIE2_DATA_START_OFFSET
132 ) {
133 *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UTrie */
134 return 0;
135 }
136
137 size=sizeof(UTrie2Header)+trie.indexLength*2;
138 switch(valueBits) {
139 case UTRIE2_16_VALUE_BITS:
140 size+=dataLength*2;
141 break;
142 case UTRIE2_32_VALUE_BITS:
143 size+=dataLength*4;
144 break;
145 default:
146 *pErrorCode=U_INVALID_FORMAT_ERROR;
147 return 0;
148 }
149
150 if(length>=0) {
151 UTrie2Header *outTrie;
152
153 if(length<size) {
154 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
155 return 0;
156 }
157
158 outTrie=(UTrie2Header *)outData;
159
160 /* swap the header */
161 ds->swapArray32(ds, &inTrie->signature, 4, &outTrie->signature, pErrorCode);
162 ds->swapArray16(ds, &inTrie->options, 12, &outTrie->options, pErrorCode);
163
164 /* swap the index and the data */
165 switch(valueBits) {
166 case UTRIE2_16_VALUE_BITS:
167 ds->swapArray16(ds, inTrie+1, (trie.indexLength+dataLength)*2, outTrie+1, pErrorCode);
168 break;
169 case UTRIE2_32_VALUE_BITS:
170 ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode);
171 ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, dataLength*4,
172 (uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode);
173 break;
174 default:
175 *pErrorCode=U_INVALID_FORMAT_ERROR;
176 return 0;
177 }
178 }
179
180 return size;
181 }
182
183 U_CAPI int32_t U_EXPORT2
ucptrie_swap(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)184 ucptrie_swap(const UDataSwapper *ds,
185 const void *inData, int32_t length, void *outData,
186 UErrorCode *pErrorCode) {
187 const UCPTrieHeader *inTrie;
188 UCPTrieHeader trie;
189 int32_t dataLength, size;
190 UCPTrieValueWidth valueWidth;
191
192 if(U_FAILURE(*pErrorCode)) {
193 return 0;
194 }
195 if(ds==nullptr || inData==nullptr || (length>=0 && outData==nullptr)) {
196 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
197 return 0;
198 }
199
200 /* setup and swapping */
201 if(length>=0 && length<(int32_t)sizeof(UCPTrieHeader)) {
202 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
203 return 0;
204 }
205
206 inTrie=(const UCPTrieHeader *)inData;
207 trie.signature=ds->readUInt32(inTrie->signature);
208 trie.options=ds->readUInt16(inTrie->options);
209 trie.indexLength=ds->readUInt16(inTrie->indexLength);
210 trie.dataLength = ds->readUInt16(inTrie->dataLength);
211
212 UCPTrieType type = (UCPTrieType)((trie.options >> 6) & 3);
213 valueWidth = (UCPTrieValueWidth)(trie.options & UCPTRIE_OPTIONS_VALUE_BITS_MASK);
214 dataLength = ((int32_t)(trie.options & UCPTRIE_OPTIONS_DATA_LENGTH_MASK) << 4) | trie.dataLength;
215
216 int32_t minIndexLength = type == UCPTRIE_TYPE_FAST ?
217 UCPTRIE_BMP_INDEX_LENGTH : UCPTRIE_SMALL_INDEX_LENGTH;
218 if( trie.signature!=UCPTRIE_SIG ||
219 type > UCPTRIE_TYPE_SMALL ||
220 (trie.options & UCPTRIE_OPTIONS_RESERVED_MASK) != 0 ||
221 valueWidth > UCPTRIE_VALUE_BITS_8 ||
222 trie.indexLength < minIndexLength ||
223 dataLength < ASCII_LIMIT
224 ) {
225 *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UCPTrie */
226 return 0;
227 }
228
229 size=sizeof(UCPTrieHeader)+trie.indexLength*2;
230 switch(valueWidth) {
231 case UCPTRIE_VALUE_BITS_16:
232 size+=dataLength*2;
233 break;
234 case UCPTRIE_VALUE_BITS_32:
235 size+=dataLength*4;
236 break;
237 case UCPTRIE_VALUE_BITS_8:
238 size+=dataLength;
239 break;
240 default:
241 *pErrorCode=U_INVALID_FORMAT_ERROR;
242 return 0;
243 }
244
245 if(length>=0) {
246 UCPTrieHeader *outTrie;
247
248 if(length<size) {
249 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
250 return 0;
251 }
252
253 outTrie=(UCPTrieHeader *)outData;
254
255 /* swap the header */
256 ds->swapArray32(ds, &inTrie->signature, 4, &outTrie->signature, pErrorCode);
257 ds->swapArray16(ds, &inTrie->options, 12, &outTrie->options, pErrorCode);
258
259 /* swap the index */
260 const uint16_t *inIndex=reinterpret_cast<const uint16_t *>(inTrie+1);
261 uint16_t *outIndex=reinterpret_cast<uint16_t *>(outTrie+1);
262 ds->swapArray16(ds, inIndex, trie.indexLength*2, outIndex, pErrorCode);
263
264 /* swap the data */
265 const uint16_t *inData=inIndex+trie.indexLength;
266 uint16_t *outData=outIndex+trie.indexLength;
267 switch(valueWidth) {
268 case UCPTRIE_VALUE_BITS_16:
269 ds->swapArray16(ds, inData, dataLength*2, outData, pErrorCode);
270 break;
271 case UCPTRIE_VALUE_BITS_32:
272 ds->swapArray32(ds, inData, dataLength*4, outData, pErrorCode);
273 break;
274 case UCPTRIE_VALUE_BITS_8:
275 if(inTrie!=outTrie) {
276 uprv_memmove(outData, inData, dataLength);
277 }
278 break;
279 default:
280 *pErrorCode=U_INVALID_FORMAT_ERROR;
281 return 0;
282 }
283 }
284
285 return size;
286 }
287
288 namespace {
289
290 /**
291 * Gets the trie version from 32-bit-aligned memory containing the serialized form
292 * of a UTrie (version 1), a UTrie2 (version 2), or a UCPTrie (version 3).
293 *
294 * @param data a pointer to 32-bit-aligned memory containing the serialized form of a trie
295 * @param length the number of bytes available at data;
296 * can be more than necessary (see return value)
297 * @param anyEndianOk If false, only platform-endian serialized forms are recognized.
298 * If true, opposite-endian serialized forms are recognized as well.
299 * @return the trie version of the serialized form, or 0 if it is not
300 * recognized as a serialized trie
301 */
302 int32_t
getVersion(const void * data,int32_t length,UBool anyEndianOk)303 getVersion(const void *data, int32_t length, UBool anyEndianOk) {
304 uint32_t signature;
305 if(length<16 || data==nullptr || (U_POINTER_MASK_LSB(data, 3)!=0)) {
306 return 0;
307 }
308 signature=*(const uint32_t *)data;
309 if(signature==UCPTRIE_SIG) {
310 return 3;
311 }
312 if(anyEndianOk && signature==UCPTRIE_OE_SIG) {
313 return 3;
314 }
315 if(signature==UTRIE2_SIG) {
316 return 2;
317 }
318 if(anyEndianOk && signature==UTRIE2_OE_SIG) {
319 return 2;
320 }
321 if(signature==UTRIE_SIG) {
322 return 1;
323 }
324 if(anyEndianOk && signature==UTRIE_OE_SIG) {
325 return 1;
326 }
327 return 0;
328 }
329
330 } // namespace
331
332 U_CAPI int32_t U_EXPORT2
utrie_swapAnyVersion(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)333 utrie_swapAnyVersion(const UDataSwapper *ds,
334 const void *inData, int32_t length, void *outData,
335 UErrorCode *pErrorCode) {
336 if(U_FAILURE(*pErrorCode)) { return 0; }
337 switch(getVersion(inData, length, true)) {
338 case 1:
339 return utrie_swap(ds, inData, length, outData, pErrorCode);
340 case 2:
341 return utrie2_swap(ds, inData, length, outData, pErrorCode);
342 case 3:
343 return ucptrie_swap(ds, inData, length, outData, pErrorCode);
344 default:
345 *pErrorCode=U_INVALID_FORMAT_ERROR;
346 return 0;
347 }
348 }
349