1 // © 2018 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3
4 // utrie_swap.cpp
5 // created: 2018aug08 Markus W. Scherer
6
7 #include "unicode/utypes.h"
8 #include "cmemory.h"
9 #include "ucptrie_impl.h"
10 #include "udataswp.h"
11 #include "utrie.h"
12 #include "utrie2_impl.h"
13
14 // These functions for swapping different generations of ICU code point tries are here
15 // so that their implementation files need not depend on swapper code,
16 // need not depend on each other, and so that other swapper code
17 // need not depend on other trie code.
18
19 namespace {
20
21 constexpr int32_t ASCII_LIMIT = 0x80;
22
23 } // namespace
24
25 U_CAPI int32_t U_EXPORT2
utrie_swap(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)26 utrie_swap(const UDataSwapper *ds,
27 const void *inData, int32_t length, void *outData,
28 UErrorCode *pErrorCode) {
29 const UTrieHeader *inTrie;
30 UTrieHeader trie;
31 int32_t size;
32 UBool dataIs32;
33
34 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
35 return 0;
36 }
37 if(ds==NULL || inData==NULL || (length>=0 && outData==NULL)) {
38 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
39 return 0;
40 }
41
42 /* setup and swapping */
43 if(length>=0 && (uint32_t)length<sizeof(UTrieHeader)) {
44 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
45 return 0;
46 }
47
48 inTrie=(const UTrieHeader *)inData;
49 trie.signature=ds->readUInt32(inTrie->signature);
50 trie.options=ds->readUInt32(inTrie->options);
51 trie.indexLength=udata_readInt32(ds, inTrie->indexLength);
52 trie.dataLength=udata_readInt32(ds, inTrie->dataLength);
53
54 if( trie.signature!=0x54726965 ||
55 (trie.options&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_SHIFT ||
56 ((trie.options>>UTRIE_OPTIONS_INDEX_SHIFT)&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_INDEX_SHIFT ||
57 trie.indexLength<UTRIE_BMP_INDEX_LENGTH ||
58 (trie.indexLength&(UTRIE_SURROGATE_BLOCK_COUNT-1))!=0 ||
59 trie.dataLength<UTRIE_DATA_BLOCK_LENGTH ||
60 (trie.dataLength&(UTRIE_DATA_GRANULARITY-1))!=0 ||
61 ((trie.options&UTRIE_OPTIONS_LATIN1_IS_LINEAR)!=0 && trie.dataLength<(UTRIE_DATA_BLOCK_LENGTH+0x100))
62 ) {
63 *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UTrie */
64 return 0;
65 }
66
67 dataIs32=(UBool)((trie.options&UTRIE_OPTIONS_DATA_IS_32_BIT)!=0);
68 size=sizeof(UTrieHeader)+trie.indexLength*2+trie.dataLength*(dataIs32?4:2);
69
70 if(length>=0) {
71 UTrieHeader *outTrie;
72
73 if(length<size) {
74 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
75 return 0;
76 }
77
78 outTrie=(UTrieHeader *)outData;
79
80 /* swap the header */
81 ds->swapArray32(ds, inTrie, sizeof(UTrieHeader), outTrie, pErrorCode);
82
83 /* swap the index and the data */
84 if(dataIs32) {
85 ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode);
86 ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, trie.dataLength*4,
87 (uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode);
88 } else {
89 ds->swapArray16(ds, inTrie+1, (trie.indexLength+trie.dataLength)*2, outTrie+1, pErrorCode);
90 }
91 }
92
93 return size;
94 }
95
96 U_CAPI int32_t U_EXPORT2
utrie2_swap(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)97 utrie2_swap(const UDataSwapper *ds,
98 const void *inData, int32_t length, void *outData,
99 UErrorCode *pErrorCode) {
100 const UTrie2Header *inTrie;
101 UTrie2Header trie;
102 int32_t dataLength, size;
103 UTrie2ValueBits valueBits;
104
105 if(U_FAILURE(*pErrorCode)) {
106 return 0;
107 }
108 if(ds==NULL || inData==NULL || (length>=0 && outData==NULL)) {
109 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
110 return 0;
111 }
112
113 /* setup and swapping */
114 if(length>=0 && length<(int32_t)sizeof(UTrie2Header)) {
115 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
116 return 0;
117 }
118
119 inTrie=(const UTrie2Header *)inData;
120 trie.signature=ds->readUInt32(inTrie->signature);
121 trie.options=ds->readUInt16(inTrie->options);
122 trie.indexLength=ds->readUInt16(inTrie->indexLength);
123 trie.shiftedDataLength=ds->readUInt16(inTrie->shiftedDataLength);
124
125 valueBits=(UTrie2ValueBits)(trie.options&UTRIE2_OPTIONS_VALUE_BITS_MASK);
126 dataLength=(int32_t)trie.shiftedDataLength<<UTRIE2_INDEX_SHIFT;
127
128 if( trie.signature!=UTRIE2_SIG ||
129 valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits ||
130 trie.indexLength<UTRIE2_INDEX_1_OFFSET ||
131 dataLength<UTRIE2_DATA_START_OFFSET
132 ) {
133 *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UTrie */
134 return 0;
135 }
136
137 size=sizeof(UTrie2Header)+trie.indexLength*2;
138 switch(valueBits) {
139 case UTRIE2_16_VALUE_BITS:
140 size+=dataLength*2;
141 break;
142 case UTRIE2_32_VALUE_BITS:
143 size+=dataLength*4;
144 break;
145 default:
146 *pErrorCode=U_INVALID_FORMAT_ERROR;
147 return 0;
148 }
149
150 if(length>=0) {
151 UTrie2Header *outTrie;
152
153 if(length<size) {
154 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
155 return 0;
156 }
157
158 outTrie=(UTrie2Header *)outData;
159
160 /* swap the header */
161 ds->swapArray32(ds, &inTrie->signature, 4, &outTrie->signature, pErrorCode);
162 ds->swapArray16(ds, &inTrie->options, 12, &outTrie->options, pErrorCode);
163
164 /* swap the index and the data */
165 switch(valueBits) {
166 case UTRIE2_16_VALUE_BITS:
167 ds->swapArray16(ds, inTrie+1, (trie.indexLength+dataLength)*2, outTrie+1, pErrorCode);
168 break;
169 case UTRIE2_32_VALUE_BITS:
170 ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode);
171 ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, dataLength*4,
172 (uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode);
173 break;
174 default:
175 *pErrorCode=U_INVALID_FORMAT_ERROR;
176 return 0;
177 }
178 }
179
180 return size;
181 }
182
183 U_CAPI int32_t U_EXPORT2
ucptrie_swap(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)184 ucptrie_swap(const UDataSwapper *ds,
185 const void *inData, int32_t length, void *outData,
186 UErrorCode *pErrorCode) {
187 const UCPTrieHeader *inTrie;
188 UCPTrieHeader trie;
189 int32_t dataLength, size;
190 UCPTrieValueWidth valueWidth;
191
192 if(U_FAILURE(*pErrorCode)) {
193 return 0;
194 }
195 if(ds==nullptr || inData==nullptr || (length>=0 && outData==nullptr)) {
196 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
197 return 0;
198 }
199
200 /* setup and swapping */
201 if(length>=0 && length<(int32_t)sizeof(UCPTrieHeader)) {
202 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
203 return 0;
204 }
205
206 inTrie=(const UCPTrieHeader *)inData;
207 trie.signature=ds->readUInt32(inTrie->signature);
208 trie.options=ds->readUInt16(inTrie->options);
209 trie.indexLength=ds->readUInt16(inTrie->indexLength);
210 trie.dataLength = ds->readUInt16(inTrie->dataLength);
211
212 UCPTrieType type = (UCPTrieType)((trie.options >> 6) & 3);
213 valueWidth = (UCPTrieValueWidth)(trie.options & UCPTRIE_OPTIONS_VALUE_BITS_MASK);
214 dataLength = ((int32_t)(trie.options & UCPTRIE_OPTIONS_DATA_LENGTH_MASK) << 4) | trie.dataLength;
215
216 int32_t minIndexLength = type == UCPTRIE_TYPE_FAST ?
217 UCPTRIE_BMP_INDEX_LENGTH : UCPTRIE_SMALL_INDEX_LENGTH;
218 if( trie.signature!=UCPTRIE_SIG ||
219 type > UCPTRIE_TYPE_SMALL ||
220 (trie.options & UCPTRIE_OPTIONS_RESERVED_MASK) != 0 ||
221 valueWidth > UCPTRIE_VALUE_BITS_8 ||
222 trie.indexLength < minIndexLength ||
223 dataLength < ASCII_LIMIT
224 ) {
225 *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UCPTrie */
226 return 0;
227 }
228
229 size=sizeof(UCPTrieHeader)+trie.indexLength*2;
230 switch(valueWidth) {
231 case UCPTRIE_VALUE_BITS_16:
232 size+=dataLength*2;
233 break;
234 case UCPTRIE_VALUE_BITS_32:
235 size+=dataLength*4;
236 break;
237 case UCPTRIE_VALUE_BITS_8:
238 size+=dataLength;
239 break;
240 default:
241 *pErrorCode=U_INVALID_FORMAT_ERROR;
242 return 0;
243 }
244
245 if(length>=0) {
246 UCPTrieHeader *outTrie;
247
248 if(length<size) {
249 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
250 return 0;
251 }
252
253 outTrie=(UCPTrieHeader *)outData;
254
255 /* swap the header */
256 ds->swapArray32(ds, &inTrie->signature, 4, &outTrie->signature, pErrorCode);
257 ds->swapArray16(ds, &inTrie->options, 12, &outTrie->options, pErrorCode);
258
259 /* swap the index and the data */
260 switch(valueWidth) {
261 case UCPTRIE_VALUE_BITS_16:
262 ds->swapArray16(ds, inTrie+1, (trie.indexLength+dataLength)*2, outTrie+1, pErrorCode);
263 break;
264 case UCPTRIE_VALUE_BITS_32:
265 ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode);
266 ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, dataLength*4,
267 (uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode);
268 break;
269 case UCPTRIE_VALUE_BITS_8:
270 ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode);
271 if(inTrie!=outTrie) {
272 uprv_memmove((outTrie+1)+trie.indexLength, (inTrie+1)+trie.indexLength, dataLength);
273 }
274 break;
275 default:
276 *pErrorCode=U_INVALID_FORMAT_ERROR;
277 return 0;
278 }
279 }
280
281 return size;
282 }
283
284 namespace {
285
286 /**
287 * Gets the trie version from 32-bit-aligned memory containing the serialized form
288 * of a UTrie (version 1), a UTrie2 (version 2), or a UCPTrie (version 3).
289 *
290 * @param data a pointer to 32-bit-aligned memory containing the serialized form of a trie
291 * @param length the number of bytes available at data;
292 * can be more than necessary (see return value)
293 * @param anyEndianOk If FALSE, only platform-endian serialized forms are recognized.
294 * If TRUE, opposite-endian serialized forms are recognized as well.
295 * @return the trie version of the serialized form, or 0 if it is not
296 * recognized as a serialized trie
297 */
298 int32_t
getVersion(const void * data,int32_t length,UBool anyEndianOk)299 getVersion(const void *data, int32_t length, UBool anyEndianOk) {
300 uint32_t signature;
301 if(length<16 || data==nullptr || (U_POINTER_MASK_LSB(data, 3)!=0)) {
302 return 0;
303 }
304 signature=*(const uint32_t *)data;
305 if(signature==UCPTRIE_SIG) {
306 return 3;
307 }
308 if(anyEndianOk && signature==UCPTRIE_OE_SIG) {
309 return 3;
310 }
311 if(signature==UTRIE2_SIG) {
312 return 2;
313 }
314 if(anyEndianOk && signature==UTRIE2_OE_SIG) {
315 return 2;
316 }
317 if(signature==UTRIE_SIG) {
318 return 1;
319 }
320 if(anyEndianOk && signature==UTRIE_OE_SIG) {
321 return 1;
322 }
323 return 0;
324 }
325
326 } // namespace
327
328 U_CAPI int32_t U_EXPORT2
utrie_swapAnyVersion(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)329 utrie_swapAnyVersion(const UDataSwapper *ds,
330 const void *inData, int32_t length, void *outData,
331 UErrorCode *pErrorCode) {
332 if(U_FAILURE(*pErrorCode)) { return 0; }
333 switch(getVersion(inData, length, TRUE)) {
334 case 1:
335 return utrie_swap(ds, inData, length, outData, pErrorCode);
336 case 2:
337 return utrie2_swap(ds, inData, length, outData, pErrorCode);
338 case 3:
339 return ucptrie_swap(ds, inData, length, outData, pErrorCode);
340 default:
341 *pErrorCode=U_INVALID_FORMAT_ERROR;
342 return 0;
343 }
344 }
345