1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 * Copyright (C) 2001-2012, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 *******************************************************************************
10 * file name: ustr_wcs.cpp
11 * encoding: UTF-8
12 * tab size: 8 (not used)
13 * indentation:4
14 *
15 * created on: 2004sep07
16 * created by: Markus W. Scherer
17 *
18 * u_strToWCS() and u_strFromWCS() functions
19 * moved here from ustrtrns.c for better modularization.
20 */
21
22 #include "unicode/utypes.h"
23 #include "unicode/ustring.h"
24 #include "cstring.h"
25 #include "cwchar.h"
26 #include "cmemory.h"
27 #include "ustr_imp.h"
28 #include "ustr_cnv.h"
29
30 #if defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION
31
32 #define _STACK_BUFFER_CAPACITY 1000
33 #define _BUFFER_CAPACITY_MULTIPLIER 2
34
35 #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
36 // TODO: We should use CharString for char buffers and UnicodeString for UChar buffers.
37 // Then we could change this to work only with wchar_t buffers.
38 static inline UBool
u_growAnyBufferFromStatic(void * context,void ** pBuffer,int32_t * pCapacity,int32_t reqCapacity,int32_t length,int32_t size)39 u_growAnyBufferFromStatic(void *context,
40 void **pBuffer, int32_t *pCapacity, int32_t reqCapacity,
41 int32_t length, int32_t size) {
42 // Use char* not void* to avoid the compiler's strict-aliasing assumptions
43 // and related warnings.
44 char *newBuffer=(char *)uprv_malloc(reqCapacity*size);
45 if(newBuffer!=NULL) {
46 if(length>0) {
47 uprv_memcpy(newBuffer, *pBuffer, (size_t)length*size);
48 }
49 *pCapacity=reqCapacity;
50 } else {
51 *pCapacity=0;
52 }
53
54 /* release the old pBuffer if it was not statically allocated */
55 if(*pBuffer!=(char *)context) {
56 uprv_free(*pBuffer);
57 }
58
59 *pBuffer=newBuffer;
60 return (UBool)(newBuffer!=NULL);
61 }
62
63 /* helper function */
64 static wchar_t*
_strToWCS(wchar_t * dest,int32_t destCapacity,int32_t * pDestLength,const UChar * src,int32_t srcLength,UErrorCode * pErrorCode)65 _strToWCS(wchar_t *dest,
66 int32_t destCapacity,
67 int32_t *pDestLength,
68 const UChar *src,
69 int32_t srcLength,
70 UErrorCode *pErrorCode){
71
72 char stackBuffer [_STACK_BUFFER_CAPACITY];
73 char* tempBuf = stackBuffer;
74 int32_t tempBufCapacity = _STACK_BUFFER_CAPACITY;
75 char* tempBufLimit = stackBuffer + tempBufCapacity;
76 UConverter* conv = NULL;
77 char* saveBuf = tempBuf;
78 wchar_t* intTarget=NULL;
79 int32_t intTargetCapacity=0;
80 int count=0,retVal=0;
81
82 const UChar *pSrcLimit =NULL;
83 const UChar *pSrc = src;
84
85 conv = u_getDefaultConverter(pErrorCode);
86
87 if(U_FAILURE(*pErrorCode)){
88 return NULL;
89 }
90
91 if(srcLength == -1){
92 srcLength = u_strlen(pSrc);
93 }
94
95 pSrcLimit = pSrc + srcLength;
96
97 for(;;) {
98 /* reset the error state */
99 *pErrorCode = U_ZERO_ERROR;
100
101 /* convert to chars using default converter */
102 ucnv_fromUnicode(conv,&tempBuf,tempBufLimit,&pSrc,pSrcLimit,NULL,(UBool)(pSrc==pSrcLimit),pErrorCode);
103 count =(tempBuf - saveBuf);
104
105 /* This should rarely occur */
106 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR){
107 tempBuf = saveBuf;
108
109 /* we don't have enough room on the stack grow the buffer */
110 int32_t newCapacity = 2 * srcLength;
111 if(newCapacity <= tempBufCapacity) {
112 newCapacity = _BUFFER_CAPACITY_MULTIPLIER * tempBufCapacity;
113 }
114 if(!u_growAnyBufferFromStatic(stackBuffer,(void**) &tempBuf, &tempBufCapacity,
115 newCapacity, count, 1)) {
116 goto cleanup;
117 }
118
119 saveBuf = tempBuf;
120 tempBufLimit = tempBuf + tempBufCapacity;
121 tempBuf = tempBuf + count;
122
123 } else {
124 break;
125 }
126 }
127
128 if(U_FAILURE(*pErrorCode)){
129 goto cleanup;
130 }
131
132 /* done with conversion null terminate the char buffer */
133 if(count>=tempBufCapacity){
134 tempBuf = saveBuf;
135 /* we don't have enough room on the stack grow the buffer */
136 if(!u_growAnyBufferFromStatic(stackBuffer,(void**) &tempBuf, &tempBufCapacity,
137 count+1, count, 1)) {
138 goto cleanup;
139 }
140 saveBuf = tempBuf;
141 }
142
143 saveBuf[count]=0;
144
145
146 /* allocate more space than required
147 * here we assume that every char requires
148 * no more than 2 wchar_ts
149 */
150 intTargetCapacity = (count * _BUFFER_CAPACITY_MULTIPLIER + 1) /*for null termination */;
151 intTarget = (wchar_t*)uprv_malloc( intTargetCapacity * sizeof(wchar_t) );
152
153 if(intTarget){
154
155 int32_t nulLen = 0;
156 int32_t remaining = intTargetCapacity;
157 wchar_t* pIntTarget=intTarget;
158 tempBuf = saveBuf;
159
160 /* now convert the mbs to wcs */
161 for(;;){
162
163 /* we can call the system API since we are sure that
164 * there is atleast 1 null in the input
165 */
166 retVal = uprv_mbstowcs(pIntTarget,(tempBuf+nulLen),remaining);
167
168 if(retVal==-1){
169 *pErrorCode = U_INVALID_CHAR_FOUND;
170 break;
171 }else if(retVal== remaining){/* should never occur */
172 int numWritten = (pIntTarget-intTarget);
173 u_growAnyBufferFromStatic(NULL,(void**) &intTarget,
174 &intTargetCapacity,
175 intTargetCapacity * _BUFFER_CAPACITY_MULTIPLIER,
176 numWritten,
177 sizeof(wchar_t));
178 pIntTarget = intTarget;
179 remaining=intTargetCapacity;
180
181 if(nulLen!=count){ /*there are embedded nulls*/
182 pIntTarget+=numWritten;
183 remaining-=numWritten;
184 }
185
186 }else{
187 int32_t nulVal;
188 /*scan for nulls */
189 /* we donot check for limit since tempBuf is null terminated */
190 while(tempBuf[nulLen++] != 0){
191 }
192 nulVal = (nulLen < srcLength) ? 1 : 0;
193 pIntTarget = pIntTarget + retVal+nulVal;
194 remaining -=(retVal+nulVal);
195
196 /* check if we have reached the source limit*/
197 if(nulLen>=(count)){
198 break;
199 }
200 }
201 }
202 count = (int32_t)(pIntTarget-intTarget);
203
204 if(0 < count && count <= destCapacity){
205 uprv_memcpy(dest, intTarget, (size_t)count*sizeof(wchar_t));
206 }
207
208 if(pDestLength){
209 *pDestLength = count;
210 }
211
212 /* free the allocated memory */
213 uprv_free(intTarget);
214
215 }else{
216 *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
217 }
218 cleanup:
219 /* are we still using stack buffer */
220 if(stackBuffer != saveBuf){
221 uprv_free(saveBuf);
222 }
223 u_terminateWChars(dest,destCapacity,count,pErrorCode);
224
225 u_releaseDefaultConverter(conv);
226
227 return dest;
228 }
229 #endif
230
231 U_CAPI wchar_t* U_EXPORT2
u_strToWCS(wchar_t * dest,int32_t destCapacity,int32_t * pDestLength,const UChar * src,int32_t srcLength,UErrorCode * pErrorCode)232 u_strToWCS(wchar_t *dest,
233 int32_t destCapacity,
234 int32_t *pDestLength,
235 const UChar *src,
236 int32_t srcLength,
237 UErrorCode *pErrorCode){
238
239 /* args check */
240 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){
241 return NULL;
242 }
243
244 if( (src==NULL && srcLength!=0) || srcLength < -1 ||
245 (destCapacity<0) || (dest == NULL && destCapacity > 0)
246 ) {
247 *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
248 return NULL;
249 }
250
251 #ifdef U_WCHAR_IS_UTF16
252 /* wchar_t is UTF-16 just do a memcpy */
253 if(srcLength == -1){
254 srcLength = u_strlen(src);
255 }
256 if(0 < srcLength && srcLength <= destCapacity){
257 u_memcpy((UChar *)dest, src, srcLength);
258 }
259 if(pDestLength){
260 *pDestLength = srcLength;
261 }
262
263 u_terminateUChars((UChar *)dest,destCapacity,srcLength,pErrorCode);
264
265 return dest;
266
267 #elif defined U_WCHAR_IS_UTF32
268
269 return (wchar_t*)u_strToUTF32((UChar32*)dest, destCapacity, pDestLength,
270 src, srcLength, pErrorCode);
271
272 #else
273
274 return _strToWCS(dest,destCapacity,pDestLength,src,srcLength, pErrorCode);
275
276 #endif
277
278 }
279
280 #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
281 /* helper function */
282 static UChar*
_strFromWCS(UChar * dest,int32_t destCapacity,int32_t * pDestLength,const wchar_t * src,int32_t srcLength,UErrorCode * pErrorCode)283 _strFromWCS( UChar *dest,
284 int32_t destCapacity,
285 int32_t *pDestLength,
286 const wchar_t *src,
287 int32_t srcLength,
288 UErrorCode *pErrorCode)
289 {
290 int32_t retVal =0, count =0 ;
291 UConverter* conv = NULL;
292 UChar* pTarget = NULL;
293 UChar* pTargetLimit = NULL;
294 UChar* target = NULL;
295
296 UChar uStack [_STACK_BUFFER_CAPACITY];
297
298 wchar_t wStack[_STACK_BUFFER_CAPACITY];
299 wchar_t* pWStack = wStack;
300
301
302 char cStack[_STACK_BUFFER_CAPACITY];
303 int32_t cStackCap = _STACK_BUFFER_CAPACITY;
304 char* pCSrc=cStack;
305 char* pCSave=pCSrc;
306 char* pCSrcLimit=NULL;
307
308 const wchar_t* pSrc = src;
309 const wchar_t* pSrcLimit = NULL;
310
311 if(srcLength ==-1){
312 /* if the wchar_t source is null terminated we can safely
313 * assume that there are no embedded nulls, this is a fast
314 * path for null terminated strings.
315 */
316 for(;;){
317 /* convert wchars to chars */
318 retVal = uprv_wcstombs(pCSrc,src, cStackCap);
319
320 if(retVal == -1){
321 *pErrorCode = U_ILLEGAL_CHAR_FOUND;
322 goto cleanup;
323 }else if(retVal >= (cStackCap-1)){
324 /* Should rarely occur */
325 u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap,
326 cStackCap * _BUFFER_CAPACITY_MULTIPLIER, 0, sizeof(char));
327 pCSave = pCSrc;
328 }else{
329 /* converted every thing */
330 pCSrc = pCSrc+retVal;
331 break;
332 }
333 }
334
335 }else{
336 /* here the source is not null terminated
337 * so it may have nulls embedded and we need to
338 * do some extra processing
339 */
340 int32_t remaining =cStackCap;
341
342 pSrcLimit = src + srcLength;
343
344 for(;;){
345 int32_t nulLen = 0;
346
347 /* find nulls in the string */
348 while(nulLen<srcLength && pSrc[nulLen++]!=0){
349 }
350
351 if((pSrc+nulLen) < pSrcLimit){
352 /* check if we have enough room in pCSrc */
353 if(remaining < (nulLen * MB_CUR_MAX)){
354 /* should rarely occur */
355 int32_t len = (pCSrc-pCSave);
356 pCSrc = pCSave;
357 /* we do not have enough room so grow the buffer*/
358 u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap,
359 _BUFFER_CAPACITY_MULTIPLIER*cStackCap+(nulLen*MB_CUR_MAX),len,sizeof(char));
360
361 pCSave = pCSrc;
362 pCSrc = pCSave+len;
363 remaining = cStackCap-(pCSrc - pCSave);
364 }
365
366 /* we have found a null so convert the
367 * chunk from beginning of non-null char to null
368 */
369 retVal = uprv_wcstombs(pCSrc,pSrc,remaining);
370
371 if(retVal==-1){
372 /* an error occurred bail out */
373 *pErrorCode = U_ILLEGAL_CHAR_FOUND;
374 goto cleanup;
375 }
376
377 pCSrc += retVal+1 /* already null terminated */;
378
379 pSrc += nulLen; /* skip past the null */
380 srcLength-=nulLen; /* decrement the srcLength */
381 remaining -= (pCSrc-pCSave);
382
383
384 }else{
385 /* the source is not null terminated and we are
386 * end of source so we copy the source to a temp buffer
387 * null terminate it and convert wchar_ts to chars
388 */
389 if(nulLen >= _STACK_BUFFER_CAPACITY){
390 /* Should rarely occur */
391 /* allocate new buffer buffer */
392 pWStack =(wchar_t*) uprv_malloc(sizeof(wchar_t) * (nulLen + 1));
393 if(pWStack==NULL){
394 *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
395 goto cleanup;
396 }
397 }
398 if(nulLen>0){
399 /* copy the contents to tempStack */
400 uprv_memcpy(pWStack, pSrc, (size_t)nulLen*sizeof(wchar_t));
401 }
402
403 /* null terminate the tempBuffer */
404 pWStack[nulLen] =0 ;
405
406 if(remaining < (nulLen * MB_CUR_MAX)){
407 /* Should rarely occur */
408 int32_t len = (pCSrc-pCSave);
409 pCSrc = pCSave;
410 /* we do not have enough room so grow the buffer*/
411 u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap,
412 cStackCap+(nulLen*MB_CUR_MAX),len,sizeof(char));
413
414 pCSave = pCSrc;
415 pCSrc = pCSave+len;
416 remaining = cStackCap-(pCSrc - pCSave);
417 }
418 /* convert to chars */
419 retVal = uprv_wcstombs(pCSrc,pWStack,remaining);
420
421 pCSrc += retVal;
422 pSrc += nulLen;
423 srcLength-=nulLen; /* decrement the srcLength */
424 break;
425 }
426 }
427 }
428
429 /* OK..now we have converted from wchar_ts to chars now
430 * convert chars to UChars
431 */
432 pCSrcLimit = pCSrc;
433 pCSrc = pCSave;
434 pTarget = target= dest;
435 pTargetLimit = dest + destCapacity;
436
437 conv= u_getDefaultConverter(pErrorCode);
438
439 if(U_FAILURE(*pErrorCode)|| conv==NULL){
440 goto cleanup;
441 }
442
443 for(;;) {
444
445 *pErrorCode = U_ZERO_ERROR;
446
447 /* convert to stack buffer*/
448 ucnv_toUnicode(conv,&pTarget,pTargetLimit,(const char**)&pCSrc,pCSrcLimit,NULL,(UBool)(pCSrc==pCSrcLimit),pErrorCode);
449
450 /* increment count to number written to stack */
451 count+= pTarget - target;
452
453 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR){
454 target = uStack;
455 pTarget = uStack;
456 pTargetLimit = uStack + _STACK_BUFFER_CAPACITY;
457 } else {
458 break;
459 }
460
461 }
462
463 if(pDestLength){
464 *pDestLength =count;
465 }
466
467 u_terminateUChars(dest,destCapacity,count,pErrorCode);
468
469 cleanup:
470
471 if(cStack != pCSave){
472 uprv_free(pCSave);
473 }
474
475 if(wStack != pWStack){
476 uprv_free(pWStack);
477 }
478
479 u_releaseDefaultConverter(conv);
480
481 return dest;
482 }
483 #endif
484
485 U_CAPI UChar* U_EXPORT2
u_strFromWCS(UChar * dest,int32_t destCapacity,int32_t * pDestLength,const wchar_t * src,int32_t srcLength,UErrorCode * pErrorCode)486 u_strFromWCS(UChar *dest,
487 int32_t destCapacity,
488 int32_t *pDestLength,
489 const wchar_t *src,
490 int32_t srcLength,
491 UErrorCode *pErrorCode)
492 {
493
494 /* args check */
495 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){
496 return NULL;
497 }
498
499 if( (src==NULL && srcLength!=0) || srcLength < -1 ||
500 (destCapacity<0) || (dest == NULL && destCapacity > 0)
501 ) {
502 *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
503 return NULL;
504 }
505
506 #ifdef U_WCHAR_IS_UTF16
507 /* wchar_t is UTF-16 just do a memcpy */
508 if(srcLength == -1){
509 srcLength = u_strlen((const UChar *)src);
510 }
511 if(0 < srcLength && srcLength <= destCapacity){
512 u_memcpy(dest, (const UChar *)src, srcLength);
513 }
514 if(pDestLength){
515 *pDestLength = srcLength;
516 }
517
518 u_terminateUChars(dest,destCapacity,srcLength,pErrorCode);
519
520 return dest;
521
522 #elif defined U_WCHAR_IS_UTF32
523
524 return u_strFromUTF32(dest, destCapacity, pDestLength,
525 (UChar32*)src, srcLength, pErrorCode);
526
527 #else
528
529 return _strFromWCS(dest,destCapacity,pDestLength,src,srcLength,pErrorCode);
530
531 #endif
532
533 }
534
535 #endif /* #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32) && !UCONFIG_NO_CONVERSION */
536