1 /*
2 ******************************************************************************
3 *
4 * Copyright (C) 1998-2007, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 ******************************************************************************
8 *
9 * ucnv.c:
10 * Implements APIs for the ICU's codeset conversion library;
11 * mostly calls through internal functions;
12 * created by Bertrand A. Damiba
13 *
14 * Modification History:
15 *
16 * Date Name Description
17 * 04/04/99 helena Fixed internal header inclusion.
18 * 05/09/00 helena Added implementation to handle fallback mappings.
19 * 06/20/2000 helena OS/400 port changes; mostly typecast.
20 */
21
22 #include "unicode/utypes.h"
23
24 #if !UCONFIG_NO_CONVERSION
25
26 #include "unicode/ustring.h"
27 #include "unicode/ucnv.h"
28 #include "unicode/ucnv_err.h"
29 #include "unicode/uset.h"
30 #include "putilimp.h"
31 #include "cmemory.h"
32 #include "cstring.h"
33 #include "uassert.h"
34 #include "utracimp.h"
35 #include "ustr_imp.h"
36 #include "ucnv_imp.h"
37 #include "ucnv_cnv.h"
38 #include "ucnv_bld.h"
39
40 /* size of intermediate and preflighting buffers in ucnv_convert() */
41 #define CHUNK_SIZE 1024
42
43 typedef struct UAmbiguousConverter {
44 const char *name;
45 const UChar variant5c;
46 } UAmbiguousConverter;
47
48 static const UAmbiguousConverter ambiguousConverters[]={
49 { "ibm-897_P100-1995", 0xa5 },
50 { "ibm-942_P120-1999", 0xa5 },
51 { "ibm-943_P130-1999", 0xa5 },
52 { "ibm-946_P100-1995", 0xa5 },
53 { "ibm-33722_P120-1999", 0xa5 },
54 /*{ "ibm-54191_P100-2006", 0xa5 },*/
55 /*{ "ibm-62383_P100-2007", 0xa5 },*/
56 /*{ "ibm-891_P100-1995", 0x20a9 },*/
57 { "ibm-944_P100-1995", 0x20a9 },
58 { "ibm-949_P110-1999", 0x20a9 },
59 { "ibm-1363_P110-1997", 0x20a9 },
60 { "ISO_2022,locale=ko,version=0", 0x20a9 }
61 };
62
63 /*Calls through createConverter */
64 U_CAPI UConverter* U_EXPORT2
ucnv_open(const char * name,UErrorCode * err)65 ucnv_open (const char *name,
66 UErrorCode * err)
67 {
68 UConverter *r;
69
70 if (err == NULL || U_FAILURE (*err)) {
71 return NULL;
72 }
73
74 r = ucnv_createConverter(NULL, name, err);
75 return r;
76 }
77
78 U_CAPI UConverter* U_EXPORT2
ucnv_openPackage(const char * packageName,const char * converterName,UErrorCode * err)79 ucnv_openPackage (const char *packageName, const char *converterName, UErrorCode * err)
80 {
81 return ucnv_createConverterFromPackage(packageName, converterName, err);
82 }
83
84 /*Extracts the UChar* to a char* and calls through createConverter */
85 U_CAPI UConverter* U_EXPORT2
ucnv_openU(const UChar * name,UErrorCode * err)86 ucnv_openU (const UChar * name,
87 UErrorCode * err)
88 {
89 char asciiName[UCNV_MAX_CONVERTER_NAME_LENGTH];
90
91 if (err == NULL || U_FAILURE(*err))
92 return NULL;
93 if (name == NULL)
94 return ucnv_open (NULL, err);
95 if (u_strlen(name) >= UCNV_MAX_CONVERTER_NAME_LENGTH)
96 {
97 *err = U_ILLEGAL_ARGUMENT_ERROR;
98 return NULL;
99 }
100 return ucnv_open(u_austrcpy(asciiName, name), err);
101 }
102
103 /* Copy the string that is represented by the UConverterPlatform enum
104 * @param platformString An output buffer
105 * @param platform An enum representing a platform
106 * @return the length of the copied string.
107 */
108 static int32_t
ucnv_copyPlatformString(char * platformString,UConverterPlatform pltfrm)109 ucnv_copyPlatformString(char *platformString, UConverterPlatform pltfrm)
110 {
111 switch (pltfrm)
112 {
113 case UCNV_IBM:
114 uprv_strcpy(platformString, "ibm-");
115 return 4;
116 case UCNV_UNKNOWN:
117 break;
118 }
119
120 /* default to empty string */
121 *platformString = 0;
122 return 0;
123 }
124
125 /*Assumes a $platform-#codepage.$CONVERTER_FILE_EXTENSION scheme and calls
126 *through createConverter*/
127 U_CAPI UConverter* U_EXPORT2
ucnv_openCCSID(int32_t codepage,UConverterPlatform platform,UErrorCode * err)128 ucnv_openCCSID (int32_t codepage,
129 UConverterPlatform platform,
130 UErrorCode * err)
131 {
132 char myName[UCNV_MAX_CONVERTER_NAME_LENGTH];
133 int32_t myNameLen;
134
135 if (err == NULL || U_FAILURE (*err))
136 return NULL;
137
138 /* ucnv_copyPlatformString could return "ibm-" or "cp" */
139 myNameLen = ucnv_copyPlatformString(myName, platform);
140 T_CString_integerToString(myName + myNameLen, codepage, 10);
141
142 return ucnv_createConverter(NULL, myName, err);
143 }
144
145 /* Creating a temporary stack-based object that can be used in one thread,
146 and created from a converter that is shared across threads.
147 */
148
149 U_CAPI UConverter* U_EXPORT2
ucnv_safeClone(const UConverter * cnv,void * stackBuffer,int32_t * pBufferSize,UErrorCode * status)150 ucnv_safeClone(const UConverter* cnv, void *stackBuffer, int32_t *pBufferSize, UErrorCode *status)
151 {
152 UConverter *localConverter, *allocatedConverter;
153 int32_t bufferSizeNeeded;
154 char *stackBufferChars = (char *)stackBuffer;
155 UErrorCode cbErr;
156 UConverterToUnicodeArgs toUArgs = {
157 sizeof(UConverterToUnicodeArgs),
158 TRUE,
159 NULL,
160 NULL,
161 NULL,
162 NULL,
163 NULL,
164 NULL
165 };
166 UConverterFromUnicodeArgs fromUArgs = {
167 sizeof(UConverterFromUnicodeArgs),
168 TRUE,
169 NULL,
170 NULL,
171 NULL,
172 NULL,
173 NULL,
174 NULL
175 };
176
177 UTRACE_ENTRY_OC(UTRACE_UCNV_CLONE);
178
179 if (status == NULL || U_FAILURE(*status)){
180 UTRACE_EXIT_STATUS(status? *status: U_ILLEGAL_ARGUMENT_ERROR);
181 return 0;
182 }
183
184 if (!pBufferSize || !cnv){
185 *status = U_ILLEGAL_ARGUMENT_ERROR;
186 UTRACE_EXIT_STATUS(*status);
187 return 0;
188 }
189
190 UTRACE_DATA3(UTRACE_OPEN_CLOSE, "clone converter %s at %p into stackBuffer %p",
191 ucnv_getName(cnv, status), cnv, stackBuffer);
192
193 if (cnv->sharedData->impl->safeClone != NULL) {
194 /* call the custom safeClone function for sizing */
195 bufferSizeNeeded = 0;
196 cnv->sharedData->impl->safeClone(cnv, NULL, &bufferSizeNeeded, status);
197 }
198 else
199 {
200 /* inherent sizing */
201 bufferSizeNeeded = sizeof(UConverter);
202 }
203
204 if (*pBufferSize <= 0){ /* 'preflighting' request - set needed size into *pBufferSize */
205 *pBufferSize = bufferSizeNeeded;
206 UTRACE_EXIT_VALUE(bufferSizeNeeded);
207 return 0;
208 }
209
210
211 /* Pointers on 64-bit platforms need to be aligned
212 * on a 64-bit boundary in memory.
213 */
214 if (U_ALIGNMENT_OFFSET(stackBuffer) != 0) {
215 int32_t offsetUp = (int32_t)U_ALIGNMENT_OFFSET_UP(stackBufferChars);
216 if(*pBufferSize > offsetUp) {
217 *pBufferSize -= offsetUp;
218 stackBufferChars += offsetUp;
219 } else {
220 /* prevent using the stack buffer but keep the size > 0 so that we do not just preflight */
221 *pBufferSize = 1;
222 }
223 }
224
225 stackBuffer = (void *)stackBufferChars;
226
227 /* Now, see if we must allocate any memory */
228 if (*pBufferSize < bufferSizeNeeded || stackBuffer == NULL)
229 {
230 /* allocate one here...*/
231 localConverter = allocatedConverter = (UConverter *) uprv_malloc (bufferSizeNeeded);
232
233 if(localConverter == NULL) {
234 *status = U_MEMORY_ALLOCATION_ERROR;
235 UTRACE_EXIT_STATUS(*status);
236 return NULL;
237 }
238
239 if (U_SUCCESS(*status)) {
240 *status = U_SAFECLONE_ALLOCATED_WARNING;
241 }
242
243 /* record the fact that memory was allocated */
244 *pBufferSize = bufferSizeNeeded;
245 } else {
246 /* just use the stack buffer */
247 localConverter = (UConverter*) stackBuffer;
248 allocatedConverter = NULL;
249 }
250
251 uprv_memset(localConverter, 0, bufferSizeNeeded);
252
253 /* Copy initial state */
254 uprv_memcpy(localConverter, cnv, sizeof(UConverter));
255 localConverter->isCopyLocal = localConverter->isExtraLocal = FALSE;
256
257 /* copy the substitution string */
258 if (cnv->subChars == (uint8_t *)cnv->subUChars) {
259 localConverter->subChars = (uint8_t *)localConverter->subUChars;
260 } else {
261 localConverter->subChars = (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);
262 if (localConverter->subChars == NULL) {
263 uprv_free(allocatedConverter);
264 UTRACE_EXIT_STATUS(*status);
265 return NULL;
266 }
267 uprv_memcpy(localConverter->subChars, cnv->subChars, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);
268 }
269
270 /* now either call the safeclone fcn or not */
271 if (cnv->sharedData->impl->safeClone != NULL) {
272 /* call the custom safeClone function */
273 localConverter = cnv->sharedData->impl->safeClone(cnv, localConverter, pBufferSize, status);
274 }
275
276 if(localConverter==NULL || U_FAILURE(*status)) {
277 if (allocatedConverter != NULL && allocatedConverter->subChars != (uint8_t *)allocatedConverter->subUChars) {
278 uprv_free(allocatedConverter->subChars);
279 }
280 uprv_free(allocatedConverter);
281 UTRACE_EXIT_STATUS(*status);
282 return NULL;
283 }
284
285 /* increment refcount of shared data if needed */
286 /*
287 Checking whether it's an algorithic converter is okay
288 in multithreaded applications because the value never changes.
289 Don't check referenceCounter for any other value.
290 */
291 if (cnv->sharedData->referenceCounter != ~0) {
292 ucnv_incrementRefCount(cnv->sharedData);
293 }
294
295 if(localConverter == (UConverter*)stackBuffer) {
296 /* we're using user provided data - set to not destroy */
297 localConverter->isCopyLocal = TRUE;
298 }
299
300 /* allow callback functions to handle any memory allocation */
301 toUArgs.converter = fromUArgs.converter = localConverter;
302 cbErr = U_ZERO_ERROR;
303 cnv->fromCharErrorBehaviour(cnv->toUContext, &toUArgs, NULL, 0, UCNV_CLONE, &cbErr);
304 cbErr = U_ZERO_ERROR;
305 cnv->fromUCharErrorBehaviour(cnv->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLONE, &cbErr);
306
307 UTRACE_EXIT_PTR_STATUS(localConverter, *status);
308 return localConverter;
309 }
310
311
312
313 /*Decreases the reference counter in the shared immutable section of the object
314 *and frees the mutable part*/
315
316 U_CAPI void U_EXPORT2
ucnv_close(UConverter * converter)317 ucnv_close (UConverter * converter)
318 {
319 UErrorCode errorCode = U_ZERO_ERROR;
320
321 UTRACE_ENTRY_OC(UTRACE_UCNV_CLOSE);
322
323 if (converter == NULL)
324 {
325 UTRACE_EXIT();
326 return;
327 }
328
329 UTRACE_DATA3(UTRACE_OPEN_CLOSE, "close converter %s at %p, isCopyLocal=%b",
330 ucnv_getName(converter, &errorCode), converter, converter->isCopyLocal);
331
332 /* In order to speed up the close, only call the callbacks when they have been changed.
333 This performance check will only work when the callbacks are set within a shared library
334 or from user code that statically links this code. */
335 /* first, notify the callback functions that the converter is closed */
336 if (converter->fromCharErrorBehaviour != UCNV_TO_U_DEFAULT_CALLBACK) {
337 UConverterToUnicodeArgs toUArgs = {
338 sizeof(UConverterToUnicodeArgs),
339 TRUE,
340 NULL,
341 NULL,
342 NULL,
343 NULL,
344 NULL,
345 NULL
346 };
347
348 toUArgs.converter = converter;
349 errorCode = U_ZERO_ERROR;
350 converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_CLOSE, &errorCode);
351 }
352 if (converter->fromUCharErrorBehaviour != UCNV_FROM_U_DEFAULT_CALLBACK) {
353 UConverterFromUnicodeArgs fromUArgs = {
354 sizeof(UConverterFromUnicodeArgs),
355 TRUE,
356 NULL,
357 NULL,
358 NULL,
359 NULL,
360 NULL,
361 NULL
362 };
363 fromUArgs.converter = converter;
364 errorCode = U_ZERO_ERROR;
365 converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLOSE, &errorCode);
366 }
367
368 if (converter->sharedData->impl->close != NULL) {
369 converter->sharedData->impl->close(converter);
370 }
371
372 if (converter->subChars != (uint8_t *)converter->subUChars) {
373 uprv_free(converter->subChars);
374 }
375
376 /*
377 Checking whether it's an algorithic converter is okay
378 in multithreaded applications because the value never changes.
379 Don't check referenceCounter for any other value.
380 */
381 if (converter->sharedData->referenceCounter != ~0) {
382 ucnv_unloadSharedDataIfReady(converter->sharedData);
383 }
384
385 if(!converter->isCopyLocal){
386 uprv_free(converter);
387 }
388
389 UTRACE_EXIT();
390 }
391
392 /*returns a single Name from the list, will return NULL if out of bounds
393 */
394 U_CAPI const char* U_EXPORT2
ucnv_getAvailableName(int32_t n)395 ucnv_getAvailableName (int32_t n)
396 {
397 if (0 <= n && n <= 0xffff) {
398 UErrorCode err = U_ZERO_ERROR;
399 const char *name = ucnv_bld_getAvailableConverter((uint16_t)n, &err);
400 if (U_SUCCESS(err)) {
401 return name;
402 }
403 }
404 return NULL;
405 }
406
407 U_CAPI int32_t U_EXPORT2
ucnv_countAvailable()408 ucnv_countAvailable ()
409 {
410 UErrorCode err = U_ZERO_ERROR;
411 return ucnv_bld_countAvailableConverters(&err);
412 }
413
414 U_CAPI void U_EXPORT2
ucnv_getSubstChars(const UConverter * converter,char * mySubChar,int8_t * len,UErrorCode * err)415 ucnv_getSubstChars (const UConverter * converter,
416 char *mySubChar,
417 int8_t * len,
418 UErrorCode * err)
419 {
420 if (U_FAILURE (*err))
421 return;
422
423 if (converter->subCharLen <= 0) {
424 /* Unicode string or empty string from ucnv_setSubstString(). */
425 *len = 0;
426 return;
427 }
428
429 if (*len < converter->subCharLen) /*not enough space in subChars */
430 {
431 *err = U_INDEX_OUTOFBOUNDS_ERROR;
432 return;
433 }
434
435 uprv_memcpy (mySubChar, converter->subChars, converter->subCharLen); /*fills in the subchars */
436 *len = converter->subCharLen; /*store # of bytes copied to buffer */
437 }
438
439 U_CAPI void U_EXPORT2
ucnv_setSubstChars(UConverter * converter,const char * mySubChar,int8_t len,UErrorCode * err)440 ucnv_setSubstChars (UConverter * converter,
441 const char *mySubChar,
442 int8_t len,
443 UErrorCode * err)
444 {
445 if (U_FAILURE (*err))
446 return;
447
448 /*Makes sure that the subChar is within the codepages char length boundaries */
449 if ((len > converter->sharedData->staticData->maxBytesPerChar)
450 || (len < converter->sharedData->staticData->minBytesPerChar))
451 {
452 *err = U_ILLEGAL_ARGUMENT_ERROR;
453 return;
454 }
455
456 uprv_memcpy (converter->subChars, mySubChar, len); /*copies the subchars */
457 converter->subCharLen = len; /*sets the new len */
458
459 /*
460 * There is currently (2001Feb) no separate API to set/get subChar1.
461 * In order to always have subChar written after it is explicitly set,
462 * we set subChar1 to 0.
463 */
464 converter->subChar1 = 0;
465
466 return;
467 }
468
469 U_CAPI void U_EXPORT2
ucnv_setSubstString(UConverter * cnv,const UChar * s,int32_t length,UErrorCode * err)470 ucnv_setSubstString(UConverter *cnv,
471 const UChar *s,
472 int32_t length,
473 UErrorCode *err) {
474 UAlignedMemory cloneBuffer[U_CNV_SAFECLONE_BUFFERSIZE / sizeof(UAlignedMemory) + 1];
475 char chars[UCNV_ERROR_BUFFER_LENGTH];
476
477 UConverter *clone;
478 uint8_t *subChars;
479 int32_t cloneSize, length8;
480
481 /* Let the following functions check all arguments. */
482 cloneSize = sizeof(cloneBuffer);
483 clone = ucnv_safeClone(cnv, cloneBuffer, &cloneSize, err);
484 ucnv_setFromUCallBack(clone, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, err);
485 length8 = ucnv_fromUChars(clone, chars, (int32_t)sizeof(chars), s, length, err);
486 ucnv_close(clone);
487 if (U_FAILURE(*err)) {
488 return;
489 }
490
491 if (cnv->sharedData->impl->writeSub == NULL
492 #if !UCONFIG_NO_LEGACY_CONVERSION
493 || (cnv->sharedData->staticData->conversionType == UCNV_MBCS &&
494 ucnv_MBCSGetType(cnv) != UCNV_EBCDIC_STATEFUL)
495 #endif
496 ) {
497 /* The converter is not stateful. Store the charset bytes as a fixed string. */
498 subChars = (uint8_t *)chars;
499 } else {
500 /*
501 * The converter has a non-default writeSub() function, indicating
502 * that it is stateful.
503 * Store the Unicode string for on-the-fly conversion for correct
504 * state handling.
505 */
506 if (length > UCNV_ERROR_BUFFER_LENGTH) {
507 /*
508 * Should not occur. The converter should output at least one byte
509 * per UChar, which means that ucnv_fromUChars() should catch all
510 * overflows.
511 */
512 *err = U_BUFFER_OVERFLOW_ERROR;
513 return;
514 }
515 subChars = (uint8_t *)s;
516 if (length < 0) {
517 length = u_strlen(s);
518 }
519 length8 = length * U_SIZEOF_UCHAR;
520 }
521
522 /*
523 * For storing the substitution string, select either the small buffer inside
524 * UConverter or allocate a subChars buffer.
525 */
526 if (length8 > UCNV_MAX_SUBCHAR_LEN) {
527 /* Use a separate buffer for the string. Outside UConverter to not make it too large. */
528 if (cnv->subChars == (uint8_t *)cnv->subUChars) {
529 /* Allocate a new buffer for the string. */
530 cnv->subChars = (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);
531 if (cnv->subChars == NULL) {
532 cnv->subChars = (uint8_t *)cnv->subUChars;
533 *err = U_MEMORY_ALLOCATION_ERROR;
534 return;
535 }
536 uprv_memset(cnv->subChars, 0, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);
537 }
538 }
539
540 /* Copy the substitution string into the UConverter or its subChars buffer. */
541 if (length8 == 0) {
542 cnv->subCharLen = 0;
543 } else {
544 uprv_memcpy(cnv->subChars, subChars, length8);
545 if (subChars == (uint8_t *)chars) {
546 cnv->subCharLen = (int8_t)length8;
547 } else /* subChars == s */ {
548 cnv->subCharLen = (int8_t)-length;
549 }
550 }
551
552 /* See comment in ucnv_setSubstChars(). */
553 cnv->subChar1 = 0;
554 }
555
556 /*resets the internal states of a converter
557 *goal : have the same behaviour than a freshly created converter
558 */
_reset(UConverter * converter,UConverterResetChoice choice,UBool callCallback)559 static void _reset(UConverter *converter, UConverterResetChoice choice,
560 UBool callCallback) {
561 if(converter == NULL) {
562 return;
563 }
564
565 if(callCallback) {
566 /* first, notify the callback functions that the converter is reset */
567 UConverterToUnicodeArgs toUArgs = {
568 sizeof(UConverterToUnicodeArgs),
569 TRUE,
570 NULL,
571 NULL,
572 NULL,
573 NULL,
574 NULL,
575 NULL
576 };
577 UConverterFromUnicodeArgs fromUArgs = {
578 sizeof(UConverterFromUnicodeArgs),
579 TRUE,
580 NULL,
581 NULL,
582 NULL,
583 NULL,
584 NULL,
585 NULL
586 };
587 UErrorCode errorCode;
588
589 toUArgs.converter = fromUArgs.converter = converter;
590 if(choice<=UCNV_RESET_TO_UNICODE) {
591 errorCode = U_ZERO_ERROR;
592 converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_RESET, &errorCode);
593 }
594 if(choice!=UCNV_RESET_TO_UNICODE) {
595 errorCode = U_ZERO_ERROR;
596 converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_RESET, &errorCode);
597 }
598 }
599
600 /* now reset the converter itself */
601 if(choice<=UCNV_RESET_TO_UNICODE) {
602 converter->toUnicodeStatus = converter->sharedData->toUnicodeStatus;
603 converter->mode = 0;
604 converter->toULength = 0;
605 converter->invalidCharLength = converter->UCharErrorBufferLength = 0;
606 converter->preToULength = 0;
607 }
608 if(choice!=UCNV_RESET_TO_UNICODE) {
609 converter->fromUnicodeStatus = 0;
610 converter->fromUChar32 = 0;
611 converter->invalidUCharLength = converter->charErrorBufferLength = 0;
612 converter->preFromUFirstCP = U_SENTINEL;
613 converter->preFromULength = 0;
614 }
615
616 if (converter->sharedData->impl->reset != NULL) {
617 /* call the custom reset function */
618 converter->sharedData->impl->reset(converter, choice);
619 }
620 }
621
622 U_CAPI void U_EXPORT2
ucnv_reset(UConverter * converter)623 ucnv_reset(UConverter *converter)
624 {
625 _reset(converter, UCNV_RESET_BOTH, TRUE);
626 }
627
628 U_CAPI void U_EXPORT2
ucnv_resetToUnicode(UConverter * converter)629 ucnv_resetToUnicode(UConverter *converter)
630 {
631 _reset(converter, UCNV_RESET_TO_UNICODE, TRUE);
632 }
633
634 U_CAPI void U_EXPORT2
ucnv_resetFromUnicode(UConverter * converter)635 ucnv_resetFromUnicode(UConverter *converter)
636 {
637 _reset(converter, UCNV_RESET_FROM_UNICODE, TRUE);
638 }
639
640 U_CAPI int8_t U_EXPORT2
ucnv_getMaxCharSize(const UConverter * converter)641 ucnv_getMaxCharSize (const UConverter * converter)
642 {
643 return converter->maxBytesPerUChar;
644 }
645
646
647 U_CAPI int8_t U_EXPORT2
ucnv_getMinCharSize(const UConverter * converter)648 ucnv_getMinCharSize (const UConverter * converter)
649 {
650 return converter->sharedData->staticData->minBytesPerChar;
651 }
652
653 U_CAPI const char* U_EXPORT2
ucnv_getName(const UConverter * converter,UErrorCode * err)654 ucnv_getName (const UConverter * converter, UErrorCode * err)
655
656 {
657 if (U_FAILURE (*err))
658 return NULL;
659 if(converter->sharedData->impl->getName){
660 const char* temp= converter->sharedData->impl->getName(converter);
661 if(temp)
662 return temp;
663 }
664 return converter->sharedData->staticData->name;
665 }
666
667 U_CAPI int32_t U_EXPORT2
ucnv_getCCSID(const UConverter * converter,UErrorCode * err)668 ucnv_getCCSID(const UConverter * converter,
669 UErrorCode * err)
670 {
671 int32_t ccsid;
672 if (U_FAILURE (*err))
673 return -1;
674
675 ccsid = converter->sharedData->staticData->codepage;
676 if (ccsid == 0) {
677 /* Rare case. This is for cases like gb18030,
678 which doesn't have an IBM cannonical name, but does have an IBM alias. */
679 const char *standardName = ucnv_getStandardName(ucnv_getName(converter, err), "IBM", err);
680 if (U_SUCCESS(*err) && standardName) {
681 const char *ccsidStr = uprv_strchr(standardName, '-');
682 if (ccsidStr) {
683 ccsid = (int32_t)atol(ccsidStr+1); /* +1 to skip '-' */
684 }
685 }
686 }
687 return ccsid;
688 }
689
690
691 U_CAPI UConverterPlatform U_EXPORT2
ucnv_getPlatform(const UConverter * converter,UErrorCode * err)692 ucnv_getPlatform (const UConverter * converter,
693 UErrorCode * err)
694 {
695 if (U_FAILURE (*err))
696 return UCNV_UNKNOWN;
697
698 return (UConverterPlatform)converter->sharedData->staticData->platform;
699 }
700
701 U_CAPI void U_EXPORT2
ucnv_getToUCallBack(const UConverter * converter,UConverterToUCallback * action,const void ** context)702 ucnv_getToUCallBack (const UConverter * converter,
703 UConverterToUCallback *action,
704 const void **context)
705 {
706 *action = converter->fromCharErrorBehaviour;
707 *context = converter->toUContext;
708 }
709
710 U_CAPI void U_EXPORT2
ucnv_getFromUCallBack(const UConverter * converter,UConverterFromUCallback * action,const void ** context)711 ucnv_getFromUCallBack (const UConverter * converter,
712 UConverterFromUCallback *action,
713 const void **context)
714 {
715 *action = converter->fromUCharErrorBehaviour;
716 *context = converter->fromUContext;
717 }
718
719 U_CAPI void U_EXPORT2
ucnv_setToUCallBack(UConverter * converter,UConverterToUCallback newAction,const void * newContext,UConverterToUCallback * oldAction,const void ** oldContext,UErrorCode * err)720 ucnv_setToUCallBack (UConverter * converter,
721 UConverterToUCallback newAction,
722 const void* newContext,
723 UConverterToUCallback *oldAction,
724 const void** oldContext,
725 UErrorCode * err)
726 {
727 if (U_FAILURE (*err))
728 return;
729 if (oldAction) *oldAction = converter->fromCharErrorBehaviour;
730 converter->fromCharErrorBehaviour = newAction;
731 if (oldContext) *oldContext = converter->toUContext;
732 converter->toUContext = newContext;
733 }
734
735 U_CAPI void U_EXPORT2
ucnv_setFromUCallBack(UConverter * converter,UConverterFromUCallback newAction,const void * newContext,UConverterFromUCallback * oldAction,const void ** oldContext,UErrorCode * err)736 ucnv_setFromUCallBack (UConverter * converter,
737 UConverterFromUCallback newAction,
738 const void* newContext,
739 UConverterFromUCallback *oldAction,
740 const void** oldContext,
741 UErrorCode * err)
742 {
743 if (U_FAILURE (*err))
744 return;
745 if (oldAction) *oldAction = converter->fromUCharErrorBehaviour;
746 converter->fromUCharErrorBehaviour = newAction;
747 if (oldContext) *oldContext = converter->fromUContext;
748 converter->fromUContext = newContext;
749 }
750
751 static void
_updateOffsets(int32_t * offsets,int32_t length,int32_t sourceIndex,int32_t errorInputLength)752 _updateOffsets(int32_t *offsets, int32_t length,
753 int32_t sourceIndex, int32_t errorInputLength) {
754 int32_t *limit;
755 int32_t delta, offset;
756
757 if(sourceIndex>=0) {
758 /*
759 * adjust each offset by adding the previous sourceIndex
760 * minus the length of the input sequence that caused an
761 * error, if any
762 */
763 delta=sourceIndex-errorInputLength;
764 } else {
765 /*
766 * set each offset to -1 because this conversion function
767 * does not handle offsets
768 */
769 delta=-1;
770 }
771
772 limit=offsets+length;
773 if(delta==0) {
774 /* most common case, nothing to do */
775 } else if(delta>0) {
776 /* add the delta to each offset (but not if the offset is <0) */
777 while(offsets<limit) {
778 offset=*offsets;
779 if(offset>=0) {
780 *offsets=offset+delta;
781 }
782 ++offsets;
783 }
784 } else /* delta<0 */ {
785 /*
786 * set each offset to -1 because this conversion function
787 * does not handle offsets
788 * or the error input sequence started in a previous buffer
789 */
790 while(offsets<limit) {
791 *offsets++=-1;
792 }
793 }
794 }
795
796 /* ucnv_fromUnicode --------------------------------------------------------- */
797
798 /*
799 * Implementation note for m:n conversions
800 *
801 * While collecting source units to find the longest match for m:n conversion,
802 * some source units may need to be stored for a partial match.
803 * When a second buffer does not yield a match on all of the previously stored
804 * source units, then they must be "replayed", i.e., fed back into the converter.
805 *
806 * The code relies on the fact that replaying will not nest -
807 * converting a replay buffer will not result in a replay.
808 * This is because a replay is necessary only after the _continuation_ of a
809 * partial match failed, but a replay buffer is converted as a whole.
810 * It may result in some of its units being stored again for a partial match,
811 * but there will not be a continuation _during_ the replay which could fail.
812 *
813 * It is conceivable that a callback function could call the converter
814 * recursively in a way that causes another replay to be stored, but that
815 * would be an error in the callback function.
816 * Such violations will cause assertion failures in a debug build,
817 * and wrong output, but they will not cause a crash.
818 */
819
820 static void
_fromUnicodeWithCallback(UConverterFromUnicodeArgs * pArgs,UErrorCode * err)821 _fromUnicodeWithCallback(UConverterFromUnicodeArgs *pArgs, UErrorCode *err) {
822 UConverterFromUnicode fromUnicode;
823 UConverter *cnv;
824 const UChar *s;
825 char *t;
826 int32_t *offsets;
827 int32_t sourceIndex;
828 int32_t errorInputLength;
829 UBool converterSawEndOfInput, calledCallback;
830
831 /* variables for m:n conversion */
832 UChar replay[UCNV_EXT_MAX_UCHARS];
833 const UChar *realSource, *realSourceLimit;
834 int32_t realSourceIndex;
835 UBool realFlush;
836
837 cnv=pArgs->converter;
838 s=pArgs->source;
839 t=pArgs->target;
840 offsets=pArgs->offsets;
841
842 /* get the converter implementation function */
843 sourceIndex=0;
844 if(offsets==NULL) {
845 fromUnicode=cnv->sharedData->impl->fromUnicode;
846 } else {
847 fromUnicode=cnv->sharedData->impl->fromUnicodeWithOffsets;
848 if(fromUnicode==NULL) {
849 /* there is no WithOffsets implementation */
850 fromUnicode=cnv->sharedData->impl->fromUnicode;
851 /* we will write -1 for each offset */
852 sourceIndex=-1;
853 }
854 }
855
856 if(cnv->preFromULength>=0) {
857 /* normal mode */
858 realSource=NULL;
859
860 /* avoid compiler warnings - not otherwise necessary, and the values do not matter */
861 realSourceLimit=NULL;
862 realFlush=FALSE;
863 realSourceIndex=0;
864 } else {
865 /*
866 * Previous m:n conversion stored source units from a partial match
867 * and failed to consume all of them.
868 * We need to "replay" them from a temporary buffer and convert them first.
869 */
870 realSource=pArgs->source;
871 realSourceLimit=pArgs->sourceLimit;
872 realFlush=pArgs->flush;
873 realSourceIndex=sourceIndex;
874
875 uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR);
876 pArgs->source=replay;
877 pArgs->sourceLimit=replay-cnv->preFromULength;
878 pArgs->flush=FALSE;
879 sourceIndex=-1;
880
881 cnv->preFromULength=0;
882 }
883
884 /*
885 * loop for conversion and error handling
886 *
887 * loop {
888 * convert
889 * loop {
890 * update offsets
891 * handle end of input
892 * handle errors/call callback
893 * }
894 * }
895 */
896 for(;;) {
897 if(U_SUCCESS(*err)) {
898 /* convert */
899 fromUnicode(pArgs, err);
900
901 /*
902 * set a flag for whether the converter
903 * successfully processed the end of the input
904 *
905 * need not check cnv->preFromULength==0 because a replay (<0) will cause
906 * s<sourceLimit before converterSawEndOfInput is checked
907 */
908 converterSawEndOfInput=
909 (UBool)(U_SUCCESS(*err) &&
910 pArgs->flush && pArgs->source==pArgs->sourceLimit &&
911 cnv->fromUChar32==0);
912 } else {
913 /* handle error from ucnv_convertEx() */
914 converterSawEndOfInput=FALSE;
915 }
916
917 /* no callback called yet for this iteration */
918 calledCallback=FALSE;
919
920 /* no sourceIndex adjustment for conversion, only for callback output */
921 errorInputLength=0;
922
923 /*
924 * loop for offsets and error handling
925 *
926 * iterates at most 3 times:
927 * 1. to clean up after the conversion function
928 * 2. after the callback
929 * 3. after the callback again if there was truncated input
930 */
931 for(;;) {
932 /* update offsets if we write any */
933 if(offsets!=NULL) {
934 int32_t length=(int32_t)(pArgs->target-t);
935 if(length>0) {
936 _updateOffsets(offsets, length, sourceIndex, errorInputLength);
937
938 /*
939 * if a converter handles offsets and updates the offsets
940 * pointer at the end, then pArgs->offset should not change
941 * here;
942 * however, some converters do not handle offsets at all
943 * (sourceIndex<0) or may not update the offsets pointer
944 */
945 pArgs->offsets=offsets+=length;
946 }
947
948 if(sourceIndex>=0) {
949 sourceIndex+=(int32_t)(pArgs->source-s);
950 }
951 }
952
953 if(cnv->preFromULength<0) {
954 /*
955 * switch the source to new replay units (cannot occur while replaying)
956 * after offset handling and before end-of-input and callback handling
957 */
958 if(realSource==NULL) {
959 realSource=pArgs->source;
960 realSourceLimit=pArgs->sourceLimit;
961 realFlush=pArgs->flush;
962 realSourceIndex=sourceIndex;
963
964 uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR);
965 pArgs->source=replay;
966 pArgs->sourceLimit=replay-cnv->preFromULength;
967 pArgs->flush=FALSE;
968 if((sourceIndex+=cnv->preFromULength)<0) {
969 sourceIndex=-1;
970 }
971
972 cnv->preFromULength=0;
973 } else {
974 /* see implementation note before _fromUnicodeWithCallback() */
975 U_ASSERT(realSource==NULL);
976 *err=U_INTERNAL_PROGRAM_ERROR;
977 }
978 }
979
980 /* update pointers */
981 s=pArgs->source;
982 t=pArgs->target;
983
984 if(U_SUCCESS(*err)) {
985 if(s<pArgs->sourceLimit) {
986 /*
987 * continue with the conversion loop while there is still input left
988 * (continue converting by breaking out of only the inner loop)
989 */
990 break;
991 } else if(realSource!=NULL) {
992 /* switch back from replaying to the real source and continue */
993 pArgs->source=realSource;
994 pArgs->sourceLimit=realSourceLimit;
995 pArgs->flush=realFlush;
996 sourceIndex=realSourceIndex;
997
998 realSource=NULL;
999 break;
1000 } else if(pArgs->flush && cnv->fromUChar32!=0) {
1001 /*
1002 * the entire input stream is consumed
1003 * and there is a partial, truncated input sequence left
1004 */
1005
1006 /* inject an error and continue with callback handling */
1007 *err=U_TRUNCATED_CHAR_FOUND;
1008 calledCallback=FALSE; /* new error condition */
1009 } else {
1010 /* input consumed */
1011 if(pArgs->flush) {
1012 /*
1013 * return to the conversion loop once more if the flush
1014 * flag is set and the conversion function has not
1015 * successfully processed the end of the input yet
1016 *
1017 * (continue converting by breaking out of only the inner loop)
1018 */
1019 if(!converterSawEndOfInput) {
1020 break;
1021 }
1022
1023 /* reset the converter without calling the callback function */
1024 _reset(cnv, UCNV_RESET_FROM_UNICODE, FALSE);
1025 }
1026
1027 /* done successfully */
1028 return;
1029 }
1030 }
1031
1032 /* U_FAILURE(*err) */
1033 {
1034 UErrorCode e;
1035
1036 if( calledCallback ||
1037 (e=*err)==U_BUFFER_OVERFLOW_ERROR ||
1038 (e!=U_INVALID_CHAR_FOUND &&
1039 e!=U_ILLEGAL_CHAR_FOUND &&
1040 e!=U_TRUNCATED_CHAR_FOUND)
1041 ) {
1042 /*
1043 * the callback did not or cannot resolve the error:
1044 * set output pointers and return
1045 *
1046 * the check for buffer overflow is redundant but it is
1047 * a high-runner case and hopefully documents the intent
1048 * well
1049 *
1050 * if we were replaying, then the replay buffer must be
1051 * copied back into the UConverter
1052 * and the real arguments must be restored
1053 */
1054 if(realSource!=NULL) {
1055 int32_t length;
1056
1057 U_ASSERT(cnv->preFromULength==0);
1058
1059 length=(int32_t)(pArgs->sourceLimit-pArgs->source);
1060 if(length>0) {
1061 uprv_memcpy(cnv->preFromU, pArgs->source, length*U_SIZEOF_UCHAR);
1062 cnv->preFromULength=(int8_t)-length;
1063 }
1064
1065 pArgs->source=realSource;
1066 pArgs->sourceLimit=realSourceLimit;
1067 pArgs->flush=realFlush;
1068 }
1069
1070 return;
1071 }
1072 }
1073
1074 /* callback handling */
1075 {
1076 UChar32 codePoint;
1077
1078 /* get and write the code point */
1079 codePoint=cnv->fromUChar32;
1080 errorInputLength=0;
1081 U16_APPEND_UNSAFE(cnv->invalidUCharBuffer, errorInputLength, codePoint);
1082 cnv->invalidUCharLength=(int8_t)errorInputLength;
1083
1084 /* set the converter state to deal with the next character */
1085 cnv->fromUChar32=0;
1086
1087 /* call the callback function */
1088 cnv->fromUCharErrorBehaviour(cnv->fromUContext, pArgs,
1089 cnv->invalidUCharBuffer, errorInputLength, codePoint,
1090 *err==U_INVALID_CHAR_FOUND ? UCNV_UNASSIGNED : UCNV_ILLEGAL,
1091 err);
1092 }
1093
1094 /*
1095 * loop back to the offset handling
1096 *
1097 * this flag will indicate after offset handling
1098 * that a callback was called;
1099 * if the callback did not resolve the error, then we return
1100 */
1101 calledCallback=TRUE;
1102 }
1103 }
1104 }
1105
1106 /*
1107 * Output the fromUnicode overflow buffer.
1108 * Call this function if(cnv->charErrorBufferLength>0).
1109 * @return TRUE if overflow
1110 */
1111 static UBool
ucnv_outputOverflowFromUnicode(UConverter * cnv,char ** target,const char * targetLimit,int32_t ** pOffsets,UErrorCode * err)1112 ucnv_outputOverflowFromUnicode(UConverter *cnv,
1113 char **target, const char *targetLimit,
1114 int32_t **pOffsets,
1115 UErrorCode *err) {
1116 int32_t *offsets;
1117 char *overflow, *t;
1118 int32_t i, length;
1119
1120 t=*target;
1121 if(pOffsets!=NULL) {
1122 offsets=*pOffsets;
1123 } else {
1124 offsets=NULL;
1125 }
1126
1127 overflow=(char *)cnv->charErrorBuffer;
1128 length=cnv->charErrorBufferLength;
1129 i=0;
1130 while(i<length) {
1131 if(t==targetLimit) {
1132 /* the overflow buffer contains too much, keep the rest */
1133 int32_t j=0;
1134
1135 do {
1136 overflow[j++]=overflow[i++];
1137 } while(i<length);
1138
1139 cnv->charErrorBufferLength=(int8_t)j;
1140 *target=t;
1141 if(offsets!=NULL) {
1142 *pOffsets=offsets;
1143 }
1144 *err=U_BUFFER_OVERFLOW_ERROR;
1145 return TRUE;
1146 }
1147
1148 /* copy the overflow contents to the target */
1149 *t++=overflow[i++];
1150 if(offsets!=NULL) {
1151 *offsets++=-1; /* no source index available for old output */
1152 }
1153 }
1154
1155 /* the overflow buffer is completely copied to the target */
1156 cnv->charErrorBufferLength=0;
1157 *target=t;
1158 if(offsets!=NULL) {
1159 *pOffsets=offsets;
1160 }
1161 return FALSE;
1162 }
1163
1164 U_CAPI void U_EXPORT2
ucnv_fromUnicode(UConverter * cnv,char ** target,const char * targetLimit,const UChar ** source,const UChar * sourceLimit,int32_t * offsets,UBool flush,UErrorCode * err)1165 ucnv_fromUnicode(UConverter *cnv,
1166 char **target, const char *targetLimit,
1167 const UChar **source, const UChar *sourceLimit,
1168 int32_t *offsets,
1169 UBool flush,
1170 UErrorCode *err) {
1171 UConverterFromUnicodeArgs args;
1172 const UChar *s;
1173 char *t;
1174
1175 /* check parameters */
1176 if(err==NULL || U_FAILURE(*err)) {
1177 return;
1178 }
1179
1180 if(cnv==NULL || target==NULL || source==NULL) {
1181 *err=U_ILLEGAL_ARGUMENT_ERROR;
1182 return;
1183 }
1184
1185 s=*source;
1186 t=*target;
1187
1188 if ((const void *)U_MAX_PTR(sourceLimit) == (const void *)sourceLimit) {
1189 /*
1190 Prevent code from going into an infinite loop in case we do hit this
1191 limit. The limit pointer is expected to be on a UChar * boundary.
1192 This also prevents the next argument check from failing.
1193 */
1194 sourceLimit = (const UChar *)(((const char *)sourceLimit) - 1);
1195 }
1196
1197 /*
1198 * All these conditions should never happen.
1199 *
1200 * 1) Make sure that the limits are >= to the address source or target
1201 *
1202 * 2) Make sure that the buffer sizes do not exceed the number range for
1203 * int32_t because some functions use the size (in units or bytes)
1204 * rather than comparing pointers, and because offsets are int32_t values.
1205 *
1206 * size_t is guaranteed to be unsigned and large enough for the job.
1207 *
1208 * Return with an error instead of adjusting the limits because we would
1209 * not be able to maintain the semantics that either the source must be
1210 * consumed or the target filled (unless an error occurs).
1211 * An adjustment would be targetLimit=t+0x7fffffff; for example.
1212 *
1213 * 3) Make sure that the user didn't incorrectly cast a UChar * pointer
1214 * to a char * pointer and provide an incomplete UChar code unit.
1215 */
1216 if (sourceLimit<s || targetLimit<t ||
1217 ((size_t)(sourceLimit-s)>(size_t)0x3fffffff && sourceLimit>s) ||
1218 ((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t) ||
1219 (((const char *)sourceLimit-(const char *)s) & 1) != 0)
1220 {
1221 *err=U_ILLEGAL_ARGUMENT_ERROR;
1222 return;
1223 }
1224
1225 /* output the target overflow buffer */
1226 if( cnv->charErrorBufferLength>0 &&
1227 ucnv_outputOverflowFromUnicode(cnv, target, targetLimit, &offsets, err)
1228 ) {
1229 /* U_BUFFER_OVERFLOW_ERROR */
1230 return;
1231 }
1232 /* *target may have moved, therefore stop using t */
1233
1234 if(!flush && s==sourceLimit && cnv->preFromULength>=0) {
1235 /* the overflow buffer is emptied and there is no new input: we are done */
1236 return;
1237 }
1238
1239 /*
1240 * Do not simply return with a buffer overflow error if
1241 * !flush && t==targetLimit
1242 * because it is possible that the source will not generate any output.
1243 * For example, the skip callback may be called;
1244 * it does not output anything.
1245 */
1246
1247 /* prepare the converter arguments */
1248 args.converter=cnv;
1249 args.flush=flush;
1250 args.offsets=offsets;
1251 args.source=s;
1252 args.sourceLimit=sourceLimit;
1253 args.target=*target;
1254 args.targetLimit=targetLimit;
1255 args.size=sizeof(args);
1256
1257 _fromUnicodeWithCallback(&args, err);
1258
1259 *source=args.source;
1260 *target=args.target;
1261 }
1262
1263 /* ucnv_toUnicode() --------------------------------------------------------- */
1264
1265 static void
_toUnicodeWithCallback(UConverterToUnicodeArgs * pArgs,UErrorCode * err)1266 _toUnicodeWithCallback(UConverterToUnicodeArgs *pArgs, UErrorCode *err) {
1267 UConverterToUnicode toUnicode;
1268 UConverter *cnv;
1269 const char *s;
1270 UChar *t;
1271 int32_t *offsets;
1272 int32_t sourceIndex;
1273 int32_t errorInputLength;
1274 UBool converterSawEndOfInput, calledCallback;
1275
1276 /* variables for m:n conversion */
1277 char replay[UCNV_EXT_MAX_BYTES];
1278 const char *realSource, *realSourceLimit;
1279 int32_t realSourceIndex;
1280 UBool realFlush;
1281
1282 cnv=pArgs->converter;
1283 s=pArgs->source;
1284 t=pArgs->target;
1285 offsets=pArgs->offsets;
1286
1287 /* get the converter implementation function */
1288 sourceIndex=0;
1289 if(offsets==NULL) {
1290 toUnicode=cnv->sharedData->impl->toUnicode;
1291 } else {
1292 toUnicode=cnv->sharedData->impl->toUnicodeWithOffsets;
1293 if(toUnicode==NULL) {
1294 /* there is no WithOffsets implementation */
1295 toUnicode=cnv->sharedData->impl->toUnicode;
1296 /* we will write -1 for each offset */
1297 sourceIndex=-1;
1298 }
1299 }
1300
1301 if(cnv->preToULength>=0) {
1302 /* normal mode */
1303 realSource=NULL;
1304
1305 /* avoid compiler warnings - not otherwise necessary, and the values do not matter */
1306 realSourceLimit=NULL;
1307 realFlush=FALSE;
1308 realSourceIndex=0;
1309 } else {
1310 /*
1311 * Previous m:n conversion stored source units from a partial match
1312 * and failed to consume all of them.
1313 * We need to "replay" them from a temporary buffer and convert them first.
1314 */
1315 realSource=pArgs->source;
1316 realSourceLimit=pArgs->sourceLimit;
1317 realFlush=pArgs->flush;
1318 realSourceIndex=sourceIndex;
1319
1320 uprv_memcpy(replay, cnv->preToU, -cnv->preToULength);
1321 pArgs->source=replay;
1322 pArgs->sourceLimit=replay-cnv->preToULength;
1323 pArgs->flush=FALSE;
1324 sourceIndex=-1;
1325
1326 cnv->preToULength=0;
1327 }
1328
1329 /*
1330 * loop for conversion and error handling
1331 *
1332 * loop {
1333 * convert
1334 * loop {
1335 * update offsets
1336 * handle end of input
1337 * handle errors/call callback
1338 * }
1339 * }
1340 */
1341 for(;;) {
1342 if(U_SUCCESS(*err)) {
1343 /* convert */
1344 toUnicode(pArgs, err);
1345
1346 /*
1347 * set a flag for whether the converter
1348 * successfully processed the end of the input
1349 *
1350 * need not check cnv->preToULength==0 because a replay (<0) will cause
1351 * s<sourceLimit before converterSawEndOfInput is checked
1352 */
1353 converterSawEndOfInput=
1354 (UBool)(U_SUCCESS(*err) &&
1355 pArgs->flush && pArgs->source==pArgs->sourceLimit &&
1356 cnv->toULength==0);
1357 } else {
1358 /* handle error from getNextUChar() or ucnv_convertEx() */
1359 converterSawEndOfInput=FALSE;
1360 }
1361
1362 /* no callback called yet for this iteration */
1363 calledCallback=FALSE;
1364
1365 /* no sourceIndex adjustment for conversion, only for callback output */
1366 errorInputLength=0;
1367
1368 /*
1369 * loop for offsets and error handling
1370 *
1371 * iterates at most 3 times:
1372 * 1. to clean up after the conversion function
1373 * 2. after the callback
1374 * 3. after the callback again if there was truncated input
1375 */
1376 for(;;) {
1377 /* update offsets if we write any */
1378 if(offsets!=NULL) {
1379 int32_t length=(int32_t)(pArgs->target-t);
1380 if(length>0) {
1381 _updateOffsets(offsets, length, sourceIndex, errorInputLength);
1382
1383 /*
1384 * if a converter handles offsets and updates the offsets
1385 * pointer at the end, then pArgs->offset should not change
1386 * here;
1387 * however, some converters do not handle offsets at all
1388 * (sourceIndex<0) or may not update the offsets pointer
1389 */
1390 pArgs->offsets=offsets+=length;
1391 }
1392
1393 if(sourceIndex>=0) {
1394 sourceIndex+=(int32_t)(pArgs->source-s);
1395 }
1396 }
1397
1398 if(cnv->preToULength<0) {
1399 /*
1400 * switch the source to new replay units (cannot occur while replaying)
1401 * after offset handling and before end-of-input and callback handling
1402 */
1403 if(realSource==NULL) {
1404 realSource=pArgs->source;
1405 realSourceLimit=pArgs->sourceLimit;
1406 realFlush=pArgs->flush;
1407 realSourceIndex=sourceIndex;
1408
1409 uprv_memcpy(replay, cnv->preToU, -cnv->preToULength);
1410 pArgs->source=replay;
1411 pArgs->sourceLimit=replay-cnv->preToULength;
1412 pArgs->flush=FALSE;
1413 if((sourceIndex+=cnv->preToULength)<0) {
1414 sourceIndex=-1;
1415 }
1416
1417 cnv->preToULength=0;
1418 } else {
1419 /* see implementation note before _fromUnicodeWithCallback() */
1420 U_ASSERT(realSource==NULL);
1421 *err=U_INTERNAL_PROGRAM_ERROR;
1422 }
1423 }
1424
1425 /* update pointers */
1426 s=pArgs->source;
1427 t=pArgs->target;
1428
1429 if(U_SUCCESS(*err)) {
1430 if(s<pArgs->sourceLimit) {
1431 /*
1432 * continue with the conversion loop while there is still input left
1433 * (continue converting by breaking out of only the inner loop)
1434 */
1435 break;
1436 } else if(realSource!=NULL) {
1437 /* switch back from replaying to the real source and continue */
1438 pArgs->source=realSource;
1439 pArgs->sourceLimit=realSourceLimit;
1440 pArgs->flush=realFlush;
1441 sourceIndex=realSourceIndex;
1442
1443 realSource=NULL;
1444 break;
1445 } else if(pArgs->flush && cnv->toULength>0) {
1446 /*
1447 * the entire input stream is consumed
1448 * and there is a partial, truncated input sequence left
1449 */
1450
1451 /* inject an error and continue with callback handling */
1452 *err=U_TRUNCATED_CHAR_FOUND;
1453 calledCallback=FALSE; /* new error condition */
1454 } else {
1455 /* input consumed */
1456 if(pArgs->flush) {
1457 /*
1458 * return to the conversion loop once more if the flush
1459 * flag is set and the conversion function has not
1460 * successfully processed the end of the input yet
1461 *
1462 * (continue converting by breaking out of only the inner loop)
1463 */
1464 if(!converterSawEndOfInput) {
1465 break;
1466 }
1467
1468 /* reset the converter without calling the callback function */
1469 _reset(cnv, UCNV_RESET_TO_UNICODE, FALSE);
1470 }
1471
1472 /* done successfully */
1473 return;
1474 }
1475 }
1476
1477 /* U_FAILURE(*err) */
1478 {
1479 UErrorCode e;
1480
1481 if( calledCallback ||
1482 (e=*err)==U_BUFFER_OVERFLOW_ERROR ||
1483 (e!=U_INVALID_CHAR_FOUND &&
1484 e!=U_ILLEGAL_CHAR_FOUND &&
1485 e!=U_TRUNCATED_CHAR_FOUND &&
1486 e!=U_ILLEGAL_ESCAPE_SEQUENCE &&
1487 e!=U_UNSUPPORTED_ESCAPE_SEQUENCE)
1488 ) {
1489 /*
1490 * the callback did not or cannot resolve the error:
1491 * set output pointers and return
1492 *
1493 * the check for buffer overflow is redundant but it is
1494 * a high-runner case and hopefully documents the intent
1495 * well
1496 *
1497 * if we were replaying, then the replay buffer must be
1498 * copied back into the UConverter
1499 * and the real arguments must be restored
1500 */
1501 if(realSource!=NULL) {
1502 int32_t length;
1503
1504 U_ASSERT(cnv->preToULength==0);
1505
1506 length=(int32_t)(pArgs->sourceLimit-pArgs->source);
1507 if(length>0) {
1508 uprv_memcpy(cnv->preToU, pArgs->source, length);
1509 cnv->preToULength=(int8_t)-length;
1510 }
1511
1512 pArgs->source=realSource;
1513 pArgs->sourceLimit=realSourceLimit;
1514 pArgs->flush=realFlush;
1515 }
1516
1517 return;
1518 }
1519 }
1520
1521 /* copy toUBytes[] to invalidCharBuffer[] */
1522 errorInputLength=cnv->invalidCharLength=cnv->toULength;
1523 if(errorInputLength>0) {
1524 uprv_memcpy(cnv->invalidCharBuffer, cnv->toUBytes, errorInputLength);
1525 }
1526
1527 /* set the converter state to deal with the next character */
1528 cnv->toULength=0;
1529
1530 /* call the callback function */
1531 cnv->fromCharErrorBehaviour(cnv->toUContext, pArgs,
1532 cnv->invalidCharBuffer, errorInputLength,
1533 (*err==U_INVALID_CHAR_FOUND || *err==U_UNSUPPORTED_ESCAPE_SEQUENCE) ?
1534 UCNV_UNASSIGNED : UCNV_ILLEGAL,
1535 err);
1536
1537 /*
1538 * loop back to the offset handling
1539 *
1540 * this flag will indicate after offset handling
1541 * that a callback was called;
1542 * if the callback did not resolve the error, then we return
1543 */
1544 calledCallback=TRUE;
1545 }
1546 }
1547 }
1548
1549 /*
1550 * Output the toUnicode overflow buffer.
1551 * Call this function if(cnv->UCharErrorBufferLength>0).
1552 * @return TRUE if overflow
1553 */
1554 static UBool
ucnv_outputOverflowToUnicode(UConverter * cnv,UChar ** target,const UChar * targetLimit,int32_t ** pOffsets,UErrorCode * err)1555 ucnv_outputOverflowToUnicode(UConverter *cnv,
1556 UChar **target, const UChar *targetLimit,
1557 int32_t **pOffsets,
1558 UErrorCode *err) {
1559 int32_t *offsets;
1560 UChar *overflow, *t;
1561 int32_t i, length;
1562
1563 t=*target;
1564 if(pOffsets!=NULL) {
1565 offsets=*pOffsets;
1566 } else {
1567 offsets=NULL;
1568 }
1569
1570 overflow=cnv->UCharErrorBuffer;
1571 length=cnv->UCharErrorBufferLength;
1572 i=0;
1573 while(i<length) {
1574 if(t==targetLimit) {
1575 /* the overflow buffer contains too much, keep the rest */
1576 int32_t j=0;
1577
1578 do {
1579 overflow[j++]=overflow[i++];
1580 } while(i<length);
1581
1582 cnv->UCharErrorBufferLength=(int8_t)j;
1583 *target=t;
1584 if(offsets!=NULL) {
1585 *pOffsets=offsets;
1586 }
1587 *err=U_BUFFER_OVERFLOW_ERROR;
1588 return TRUE;
1589 }
1590
1591 /* copy the overflow contents to the target */
1592 *t++=overflow[i++];
1593 if(offsets!=NULL) {
1594 *offsets++=-1; /* no source index available for old output */
1595 }
1596 }
1597
1598 /* the overflow buffer is completely copied to the target */
1599 cnv->UCharErrorBufferLength=0;
1600 *target=t;
1601 if(offsets!=NULL) {
1602 *pOffsets=offsets;
1603 }
1604 return FALSE;
1605 }
1606
1607 U_CAPI void U_EXPORT2
ucnv_toUnicode(UConverter * cnv,UChar ** target,const UChar * targetLimit,const char ** source,const char * sourceLimit,int32_t * offsets,UBool flush,UErrorCode * err)1608 ucnv_toUnicode(UConverter *cnv,
1609 UChar **target, const UChar *targetLimit,
1610 const char **source, const char *sourceLimit,
1611 int32_t *offsets,
1612 UBool flush,
1613 UErrorCode *err) {
1614 UConverterToUnicodeArgs args;
1615 const char *s;
1616 UChar *t;
1617
1618 /* check parameters */
1619 if(err==NULL || U_FAILURE(*err)) {
1620 return;
1621 }
1622
1623 if(cnv==NULL || target==NULL || source==NULL) {
1624 *err=U_ILLEGAL_ARGUMENT_ERROR;
1625 return;
1626 }
1627
1628 s=*source;
1629 t=*target;
1630
1631 if ((const void *)U_MAX_PTR(targetLimit) == (const void *)targetLimit) {
1632 /*
1633 Prevent code from going into an infinite loop in case we do hit this
1634 limit. The limit pointer is expected to be on a UChar * boundary.
1635 This also prevents the next argument check from failing.
1636 */
1637 targetLimit = (const UChar *)(((const char *)targetLimit) - 1);
1638 }
1639
1640 /*
1641 * All these conditions should never happen.
1642 *
1643 * 1) Make sure that the limits are >= to the address source or target
1644 *
1645 * 2) Make sure that the buffer sizes do not exceed the number range for
1646 * int32_t because some functions use the size (in units or bytes)
1647 * rather than comparing pointers, and because offsets are int32_t values.
1648 *
1649 * size_t is guaranteed to be unsigned and large enough for the job.
1650 *
1651 * Return with an error instead of adjusting the limits because we would
1652 * not be able to maintain the semantics that either the source must be
1653 * consumed or the target filled (unless an error occurs).
1654 * An adjustment would be sourceLimit=t+0x7fffffff; for example.
1655 *
1656 * 3) Make sure that the user didn't incorrectly cast a UChar * pointer
1657 * to a char * pointer and provide an incomplete UChar code unit.
1658 */
1659 if (sourceLimit<s || targetLimit<t ||
1660 ((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s) ||
1661 ((size_t)(targetLimit-t)>(size_t)0x3fffffff && targetLimit>t) ||
1662 (((const char *)targetLimit-(const char *)t) & 1) != 0
1663 ) {
1664 *err=U_ILLEGAL_ARGUMENT_ERROR;
1665 return;
1666 }
1667
1668 /* output the target overflow buffer */
1669 if( cnv->UCharErrorBufferLength>0 &&
1670 ucnv_outputOverflowToUnicode(cnv, target, targetLimit, &offsets, err)
1671 ) {
1672 /* U_BUFFER_OVERFLOW_ERROR */
1673 return;
1674 }
1675 /* *target may have moved, therefore stop using t */
1676
1677 if(!flush && s==sourceLimit && cnv->preToULength>=0) {
1678 /* the overflow buffer is emptied and there is no new input: we are done */
1679 return;
1680 }
1681
1682 /*
1683 * Do not simply return with a buffer overflow error if
1684 * !flush && t==targetLimit
1685 * because it is possible that the source will not generate any output.
1686 * For example, the skip callback may be called;
1687 * it does not output anything.
1688 */
1689
1690 /* prepare the converter arguments */
1691 args.converter=cnv;
1692 args.flush=flush;
1693 args.offsets=offsets;
1694 args.source=s;
1695 args.sourceLimit=sourceLimit;
1696 args.target=*target;
1697 args.targetLimit=targetLimit;
1698 args.size=sizeof(args);
1699
1700 _toUnicodeWithCallback(&args, err);
1701
1702 *source=args.source;
1703 *target=args.target;
1704 }
1705
1706 /* ucnv_to/fromUChars() ----------------------------------------------------- */
1707
1708 U_CAPI int32_t U_EXPORT2
ucnv_fromUChars(UConverter * cnv,char * dest,int32_t destCapacity,const UChar * src,int32_t srcLength,UErrorCode * pErrorCode)1709 ucnv_fromUChars(UConverter *cnv,
1710 char *dest, int32_t destCapacity,
1711 const UChar *src, int32_t srcLength,
1712 UErrorCode *pErrorCode) {
1713 const UChar *srcLimit;
1714 char *originalDest, *destLimit;
1715 int32_t destLength;
1716
1717 /* check arguments */
1718 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1719 return 0;
1720 }
1721
1722 if( cnv==NULL ||
1723 destCapacity<0 || (destCapacity>0 && dest==NULL) ||
1724 srcLength<-1 || (srcLength!=0 && src==NULL)
1725 ) {
1726 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1727 return 0;
1728 }
1729
1730 /* initialize */
1731 ucnv_resetFromUnicode(cnv);
1732 originalDest=dest;
1733 if(srcLength==-1) {
1734 srcLength=u_strlen(src);
1735 }
1736 if(srcLength>0) {
1737 srcLimit=src+srcLength;
1738 destLimit=dest+destCapacity;
1739
1740 /* pin the destination limit to U_MAX_PTR; NULL check is for OS/400 */
1741 if(destLimit<dest || (destLimit==NULL && dest!=NULL)) {
1742 destLimit=(char *)U_MAX_PTR(dest);
1743 }
1744
1745 /* perform the conversion */
1746 ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode);
1747 destLength=(int32_t)(dest-originalDest);
1748
1749 /* if an overflow occurs, then get the preflighting length */
1750 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
1751 char buffer[1024];
1752
1753 destLimit=buffer+sizeof(buffer);
1754 do {
1755 dest=buffer;
1756 *pErrorCode=U_ZERO_ERROR;
1757 ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode);
1758 destLength+=(int32_t)(dest-buffer);
1759 } while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR);
1760 }
1761 } else {
1762 destLength=0;
1763 }
1764
1765 return u_terminateChars(originalDest, destCapacity, destLength, pErrorCode);
1766 }
1767
1768 U_CAPI int32_t U_EXPORT2
ucnv_toUChars(UConverter * cnv,UChar * dest,int32_t destCapacity,const char * src,int32_t srcLength,UErrorCode * pErrorCode)1769 ucnv_toUChars(UConverter *cnv,
1770 UChar *dest, int32_t destCapacity,
1771 const char *src, int32_t srcLength,
1772 UErrorCode *pErrorCode) {
1773 const char *srcLimit;
1774 UChar *originalDest, *destLimit;
1775 int32_t destLength;
1776
1777 /* check arguments */
1778 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1779 return 0;
1780 }
1781
1782 if( cnv==NULL ||
1783 destCapacity<0 || (destCapacity>0 && dest==NULL) ||
1784 srcLength<-1 || (srcLength!=0 && src==NULL))
1785 {
1786 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1787 return 0;
1788 }
1789
1790 /* initialize */
1791 ucnv_resetToUnicode(cnv);
1792 originalDest=dest;
1793 if(srcLength==-1) {
1794 srcLength=(int32_t)uprv_strlen(src);
1795 }
1796 if(srcLength>0) {
1797 srcLimit=src+srcLength;
1798 destLimit=dest+destCapacity;
1799
1800 /* pin the destination limit to U_MAX_PTR; NULL check is for OS/400 */
1801 if(destLimit<dest || (destLimit==NULL && dest!=NULL)) {
1802 destLimit=(UChar *)U_MAX_PTR(dest);
1803 }
1804
1805 /* perform the conversion */
1806 ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode);
1807 destLength=(int32_t)(dest-originalDest);
1808
1809 /* if an overflow occurs, then get the preflighting length */
1810 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR)
1811 {
1812 UChar buffer[1024];
1813
1814 destLimit=buffer+sizeof(buffer)/U_SIZEOF_UCHAR;
1815 do {
1816 dest=buffer;
1817 *pErrorCode=U_ZERO_ERROR;
1818 ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode);
1819 destLength+=(int32_t)(dest-buffer);
1820 }
1821 while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR);
1822 }
1823 } else {
1824 destLength=0;
1825 }
1826
1827 return u_terminateUChars(originalDest, destCapacity, destLength, pErrorCode);
1828 }
1829
1830 /* ucnv_getNextUChar() ------------------------------------------------------ */
1831
1832 U_CAPI UChar32 U_EXPORT2
ucnv_getNextUChar(UConverter * cnv,const char ** source,const char * sourceLimit,UErrorCode * err)1833 ucnv_getNextUChar(UConverter *cnv,
1834 const char **source, const char *sourceLimit,
1835 UErrorCode *err) {
1836 UConverterToUnicodeArgs args;
1837 UChar buffer[U16_MAX_LENGTH];
1838 const char *s;
1839 UChar32 c;
1840 int32_t i, length;
1841
1842 /* check parameters */
1843 if(err==NULL || U_FAILURE(*err)) {
1844 return 0xffff;
1845 }
1846
1847 if(cnv==NULL || source==NULL) {
1848 *err=U_ILLEGAL_ARGUMENT_ERROR;
1849 return 0xffff;
1850 }
1851
1852 s=*source;
1853 if(sourceLimit<s) {
1854 *err=U_ILLEGAL_ARGUMENT_ERROR;
1855 return 0xffff;
1856 }
1857
1858 /*
1859 * Make sure that the buffer sizes do not exceed the number range for
1860 * int32_t because some functions use the size (in units or bytes)
1861 * rather than comparing pointers, and because offsets are int32_t values.
1862 *
1863 * size_t is guaranteed to be unsigned and large enough for the job.
1864 *
1865 * Return with an error instead of adjusting the limits because we would
1866 * not be able to maintain the semantics that either the source must be
1867 * consumed or the target filled (unless an error occurs).
1868 * An adjustment would be sourceLimit=t+0x7fffffff; for example.
1869 */
1870 if(((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s)) {
1871 *err=U_ILLEGAL_ARGUMENT_ERROR;
1872 return 0xffff;
1873 }
1874
1875 c=U_SENTINEL;
1876
1877 /* flush the target overflow buffer */
1878 if(cnv->UCharErrorBufferLength>0) {
1879 UChar *overflow;
1880
1881 overflow=cnv->UCharErrorBuffer;
1882 i=0;
1883 length=cnv->UCharErrorBufferLength;
1884 U16_NEXT(overflow, i, length, c);
1885
1886 /* move the remaining overflow contents up to the beginning */
1887 if((cnv->UCharErrorBufferLength=(int8_t)(length-i))>0) {
1888 uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+i,
1889 cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR);
1890 }
1891
1892 if(!U16_IS_LEAD(c) || i<length) {
1893 return c;
1894 }
1895 /*
1896 * Continue if the overflow buffer contained only a lead surrogate,
1897 * in case the converter outputs single surrogates from complete
1898 * input sequences.
1899 */
1900 }
1901
1902 /*
1903 * flush==TRUE is implied for ucnv_getNextUChar()
1904 *
1905 * do not simply return even if s==sourceLimit because the converter may
1906 * not have seen flush==TRUE before
1907 */
1908
1909 /* prepare the converter arguments */
1910 args.converter=cnv;
1911 args.flush=TRUE;
1912 args.offsets=NULL;
1913 args.source=s;
1914 args.sourceLimit=sourceLimit;
1915 args.target=buffer;
1916 args.targetLimit=buffer+1;
1917 args.size=sizeof(args);
1918
1919 if(c<0) {
1920 /*
1921 * call the native getNextUChar() implementation if we are
1922 * at a character boundary (toULength==0)
1923 *
1924 * unlike with _toUnicode(), getNextUChar() implementations must set
1925 * U_TRUNCATED_CHAR_FOUND for truncated input,
1926 * in addition to setting toULength/toUBytes[]
1927 */
1928 if(cnv->toULength==0 && cnv->sharedData->impl->getNextUChar!=NULL) {
1929 c=cnv->sharedData->impl->getNextUChar(&args, err);
1930 *source=s=args.source;
1931 if(*err==U_INDEX_OUTOFBOUNDS_ERROR) {
1932 /* reset the converter without calling the callback function */
1933 _reset(cnv, UCNV_RESET_TO_UNICODE, FALSE);
1934 return 0xffff; /* no output */
1935 } else if(U_SUCCESS(*err) && c>=0) {
1936 return c;
1937 /*
1938 * else fall through to use _toUnicode() because
1939 * UCNV_GET_NEXT_UCHAR_USE_TO_U: the native function did not want to handle it after all
1940 * U_FAILURE: call _toUnicode() for callback handling (do not output c)
1941 */
1942 }
1943 }
1944
1945 /* convert to one UChar in buffer[0], or handle getNextUChar() errors */
1946 _toUnicodeWithCallback(&args, err);
1947
1948 if(*err==U_BUFFER_OVERFLOW_ERROR) {
1949 *err=U_ZERO_ERROR;
1950 }
1951
1952 i=0;
1953 length=(int32_t)(args.target-buffer);
1954 } else {
1955 /* write the lead surrogate from the overflow buffer */
1956 buffer[0]=(UChar)c;
1957 args.target=buffer+1;
1958 i=0;
1959 length=1;
1960 }
1961
1962 /* buffer contents starts at i and ends before length */
1963
1964 if(U_FAILURE(*err)) {
1965 c=0xffff; /* no output */
1966 } else if(length==0) {
1967 /* no input or only state changes */
1968 *err=U_INDEX_OUTOFBOUNDS_ERROR;
1969 /* no need to reset explicitly because _toUnicodeWithCallback() did it */
1970 c=0xffff; /* no output */
1971 } else {
1972 c=buffer[0];
1973 i=1;
1974 if(!U16_IS_LEAD(c)) {
1975 /* consume c=buffer[0], done */
1976 } else {
1977 /* got a lead surrogate, see if a trail surrogate follows */
1978 UChar c2;
1979
1980 if(cnv->UCharErrorBufferLength>0) {
1981 /* got overflow output from the conversion */
1982 if(U16_IS_TRAIL(c2=cnv->UCharErrorBuffer[0])) {
1983 /* got a trail surrogate, too */
1984 c=U16_GET_SUPPLEMENTARY(c, c2);
1985
1986 /* move the remaining overflow contents up to the beginning */
1987 if((--cnv->UCharErrorBufferLength)>0) {
1988 uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+1,
1989 cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR);
1990 }
1991 } else {
1992 /* c is an unpaired lead surrogate, just return it */
1993 }
1994 } else if(args.source<sourceLimit) {
1995 /* convert once more, to buffer[1] */
1996 args.targetLimit=buffer+2;
1997 _toUnicodeWithCallback(&args, err);
1998 if(*err==U_BUFFER_OVERFLOW_ERROR) {
1999 *err=U_ZERO_ERROR;
2000 }
2001
2002 length=(int32_t)(args.target-buffer);
2003 if(U_SUCCESS(*err) && length==2 && U16_IS_TRAIL(c2=buffer[1])) {
2004 /* got a trail surrogate, too */
2005 c=U16_GET_SUPPLEMENTARY(c, c2);
2006 i=2;
2007 }
2008 }
2009 }
2010 }
2011
2012 /*
2013 * move leftover output from buffer[i..length[
2014 * into the beginning of the overflow buffer
2015 */
2016 if(i<length) {
2017 /* move further overflow back */
2018 int32_t delta=length-i;
2019 if((length=cnv->UCharErrorBufferLength)>0) {
2020 uprv_memmove(cnv->UCharErrorBuffer+delta, cnv->UCharErrorBuffer,
2021 length*U_SIZEOF_UCHAR);
2022 }
2023 cnv->UCharErrorBufferLength=(int8_t)(length+delta);
2024
2025 cnv->UCharErrorBuffer[0]=buffer[i++];
2026 if(delta>1) {
2027 cnv->UCharErrorBuffer[1]=buffer[i];
2028 }
2029 }
2030
2031 *source=args.source;
2032 return c;
2033 }
2034
2035 /* ucnv_convert() and siblings ---------------------------------------------- */
2036
2037 U_CAPI void U_EXPORT2
ucnv_convertEx(UConverter * targetCnv,UConverter * sourceCnv,char ** target,const char * targetLimit,const char ** source,const char * sourceLimit,UChar * pivotStart,UChar ** pivotSource,UChar ** pivotTarget,const UChar * pivotLimit,UBool reset,UBool flush,UErrorCode * pErrorCode)2038 ucnv_convertEx(UConverter *targetCnv, UConverter *sourceCnv,
2039 char **target, const char *targetLimit,
2040 const char **source, const char *sourceLimit,
2041 UChar *pivotStart, UChar **pivotSource,
2042 UChar **pivotTarget, const UChar *pivotLimit,
2043 UBool reset, UBool flush,
2044 UErrorCode *pErrorCode) {
2045 UChar pivotBuffer[CHUNK_SIZE];
2046 const UChar *myPivotSource;
2047 UChar *myPivotTarget;
2048 const char *s;
2049 char *t;
2050
2051 UConverterToUnicodeArgs toUArgs;
2052 UConverterFromUnicodeArgs fromUArgs;
2053 UConverterConvert convert;
2054
2055 /* error checking */
2056 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
2057 return;
2058 }
2059
2060 if( targetCnv==NULL || sourceCnv==NULL ||
2061 source==NULL || *source==NULL ||
2062 target==NULL || *target==NULL || targetLimit==NULL
2063 ) {
2064 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2065 return;
2066 }
2067
2068 s=*source;
2069 t=*target;
2070 if((sourceLimit!=NULL && sourceLimit<s) || targetLimit<t) {
2071 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2072 return;
2073 }
2074
2075 /*
2076 * Make sure that the buffer sizes do not exceed the number range for
2077 * int32_t. See ucnv_toUnicode() for a more detailed comment.
2078 */
2079 if(
2080 (sourceLimit!=NULL && ((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s)) ||
2081 ((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t)
2082 ) {
2083 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2084 return;
2085 }
2086
2087 if(pivotStart==NULL) {
2088 if(!flush) {
2089 /* streaming conversion requires an explicit pivot buffer */
2090 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2091 return;
2092 }
2093
2094 /* use the stack pivot buffer */
2095 myPivotSource=myPivotTarget=pivotStart=pivotBuffer;
2096 pivotSource=(UChar **)&myPivotSource;
2097 pivotTarget=&myPivotTarget;
2098 pivotLimit=pivotBuffer+CHUNK_SIZE;
2099 } else if( pivotStart>=pivotLimit ||
2100 pivotSource==NULL || *pivotSource==NULL ||
2101 pivotTarget==NULL || *pivotTarget==NULL ||
2102 pivotLimit==NULL
2103 ) {
2104 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2105 return;
2106 }
2107
2108 if(sourceLimit==NULL) {
2109 /* get limit of single-byte-NUL-terminated source string */
2110 sourceLimit=uprv_strchr(*source, 0);
2111 }
2112
2113 if(reset) {
2114 ucnv_resetToUnicode(sourceCnv);
2115 ucnv_resetFromUnicode(targetCnv);
2116 *pivotSource=*pivotTarget=pivotStart;
2117 } else if(targetCnv->charErrorBufferLength>0) {
2118 /* output the targetCnv overflow buffer */
2119 if(ucnv_outputOverflowFromUnicode(targetCnv, target, targetLimit, NULL, pErrorCode)) {
2120 /* U_BUFFER_OVERFLOW_ERROR */
2121 return;
2122 }
2123 /* *target has moved, therefore stop using t */
2124
2125 if( !flush &&
2126 targetCnv->preFromULength>=0 && *pivotSource==*pivotTarget &&
2127 sourceCnv->UCharErrorBufferLength==0 && sourceCnv->preToULength>=0 && s==sourceLimit
2128 ) {
2129 /* the fromUnicode overflow buffer is emptied and there is no new input: we are done */
2130 return;
2131 }
2132 }
2133
2134 /* Is direct-UTF-8 conversion available? */
2135 if( sourceCnv->sharedData->staticData->conversionType==UCNV_UTF8 &&
2136 targetCnv->sharedData->impl->fromUTF8!=NULL
2137 ) {
2138 convert=targetCnv->sharedData->impl->fromUTF8;
2139 } else if( targetCnv->sharedData->staticData->conversionType==UCNV_UTF8 &&
2140 sourceCnv->sharedData->impl->toUTF8!=NULL
2141 ) {
2142 convert=sourceCnv->sharedData->impl->toUTF8;
2143 } else {
2144 convert=NULL;
2145 }
2146
2147 /*
2148 * If direct-UTF-8 conversion is available, then we use a smaller
2149 * pivot buffer for error handling and partial matches
2150 * so that we quickly return to direct conversion.
2151 *
2152 * 32 is large enough for UCNV_EXT_MAX_UCHARS and UCNV_ERROR_BUFFER_LENGTH.
2153 *
2154 * We could reduce the pivot buffer size further, at the cost of
2155 * buffer overflows from callbacks.
2156 * The pivot buffer should not be smaller than the maximum number of
2157 * fromUnicode extension table input UChars
2158 * (for m:n conversion, see
2159 * targetCnv->sharedData->mbcs.extIndexes[UCNV_EXT_COUNT_UCHARS])
2160 * or 2 for surrogate pairs.
2161 *
2162 * Too small a buffer can cause thrashing between pivoting and direct
2163 * conversion, with function call overhead outweighing the benefits
2164 * of direct conversion.
2165 */
2166 if(convert!=NULL && (pivotLimit-pivotStart)>32) {
2167 pivotLimit=pivotStart+32;
2168 }
2169
2170 /* prepare the converter arguments */
2171 fromUArgs.converter=targetCnv;
2172 fromUArgs.flush=FALSE;
2173 fromUArgs.offsets=NULL;
2174 fromUArgs.target=*target;
2175 fromUArgs.targetLimit=targetLimit;
2176 fromUArgs.size=sizeof(fromUArgs);
2177
2178 toUArgs.converter=sourceCnv;
2179 toUArgs.flush=flush;
2180 toUArgs.offsets=NULL;
2181 toUArgs.source=s;
2182 toUArgs.sourceLimit=sourceLimit;
2183 toUArgs.targetLimit=pivotLimit;
2184 toUArgs.size=sizeof(toUArgs);
2185
2186 /*
2187 * TODO: Consider separating this function into two functions,
2188 * extracting exactly the conversion loop,
2189 * for readability and to reduce the set of visible variables.
2190 *
2191 * Otherwise stop using s and t from here on.
2192 */
2193 s=t=NULL;
2194
2195 /*
2196 * conversion loop
2197 *
2198 * The sequence of steps in the loop may appear backward,
2199 * but the principle is simple:
2200 * In the chain of
2201 * source - sourceCnv overflow - pivot - targetCnv overflow - target
2202 * empty out later buffers before refilling them from earlier ones.
2203 *
2204 * The targetCnv overflow buffer is flushed out only once before the loop.
2205 */
2206 for(;;) {
2207 /*
2208 * if(pivot not empty or error or replay or flush fromUnicode) {
2209 * fromUnicode(pivot -> target);
2210 * }
2211 *
2212 * For pivoting conversion; and for direct conversion for
2213 * error callback handling and flushing the replay buffer.
2214 */
2215 if( *pivotSource<*pivotTarget ||
2216 U_FAILURE(*pErrorCode) ||
2217 targetCnv->preFromULength<0 ||
2218 fromUArgs.flush
2219 ) {
2220 fromUArgs.source=*pivotSource;
2221 fromUArgs.sourceLimit=*pivotTarget;
2222 _fromUnicodeWithCallback(&fromUArgs, pErrorCode);
2223 if(U_FAILURE(*pErrorCode)) {
2224 /* target overflow, or conversion error */
2225 *pivotSource=(UChar *)fromUArgs.source;
2226 break;
2227 }
2228
2229 /*
2230 * _fromUnicodeWithCallback() must have consumed the pivot contents
2231 * (*pivotSource==*pivotTarget) since it returned with U_SUCCESS()
2232 */
2233 }
2234
2235 /* The pivot buffer is empty; reset it so we start at pivotStart. */
2236 *pivotSource=*pivotTarget=pivotStart;
2237
2238 /*
2239 * if(sourceCnv overflow buffer not empty) {
2240 * move(sourceCnv overflow buffer -> pivot);
2241 * continue;
2242 * }
2243 */
2244 /* output the sourceCnv overflow buffer */
2245 if(sourceCnv->UCharErrorBufferLength>0) {
2246 if(ucnv_outputOverflowToUnicode(sourceCnv, pivotTarget, pivotLimit, NULL, pErrorCode)) {
2247 /* U_BUFFER_OVERFLOW_ERROR */
2248 *pErrorCode=U_ZERO_ERROR;
2249 }
2250 continue;
2251 }
2252
2253 /*
2254 * check for end of input and break if done
2255 *
2256 * Checking both flush and fromUArgs.flush ensures that the converters
2257 * have been called with the flush flag set if the ucnv_convertEx()
2258 * caller set it.
2259 */
2260 if( toUArgs.source==sourceLimit &&
2261 sourceCnv->preToULength>=0 && sourceCnv->toULength==0 &&
2262 (!flush || fromUArgs.flush)
2263 ) {
2264 /* done successfully */
2265 break;
2266 }
2267
2268 /*
2269 * use direct conversion if available
2270 * but not if continuing a partial match
2271 * or flushing the toUnicode replay buffer
2272 */
2273 if(convert!=NULL && targetCnv->preFromUFirstCP<0 && sourceCnv->preToULength==0) {
2274 if(*pErrorCode==U_USING_DEFAULT_WARNING) {
2275 /* remove a warning that may be set by this function */
2276 *pErrorCode=U_ZERO_ERROR;
2277 }
2278 convert(&fromUArgs, &toUArgs, pErrorCode);
2279 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
2280 break;
2281 } else if(U_FAILURE(*pErrorCode)) {
2282 if(sourceCnv->toULength>0) {
2283 /*
2284 * Fall through to calling _toUnicodeWithCallback()
2285 * for callback handling.
2286 *
2287 * The pivot buffer will be reset with
2288 * *pivotSource=*pivotTarget=pivotStart;
2289 * which indicates a toUnicode error to the caller
2290 * (*pivotSource==pivotStart shows no pivot UChars consumed).
2291 */
2292 } else {
2293 /*
2294 * Indicate a fromUnicode error to the caller
2295 * (*pivotSource>pivotStart shows some pivot UChars consumed).
2296 */
2297 *pivotSource=*pivotTarget=pivotStart+1;
2298 /*
2299 * Loop around to calling _fromUnicodeWithCallbacks()
2300 * for callback handling.
2301 */
2302 continue;
2303 }
2304 } else if(*pErrorCode==U_USING_DEFAULT_WARNING) {
2305 /*
2306 * No error, but the implementation requested to temporarily
2307 * fall back to pivoting.
2308 */
2309 *pErrorCode=U_ZERO_ERROR;
2310 /*
2311 * The following else branches are almost identical to the end-of-input
2312 * handling in _toUnicodeWithCallback().
2313 * Avoid calling it just for the end of input.
2314 */
2315 } else if(flush && sourceCnv->toULength>0) { /* flush==toUArgs.flush */
2316 /*
2317 * the entire input stream is consumed
2318 * and there is a partial, truncated input sequence left
2319 */
2320
2321 /* inject an error and continue with callback handling */
2322 *pErrorCode=U_TRUNCATED_CHAR_FOUND;
2323 } else {
2324 /* input consumed */
2325 if(flush) {
2326 /* reset the converters without calling the callback functions */
2327 _reset(sourceCnv, UCNV_RESET_TO_UNICODE, FALSE);
2328 _reset(targetCnv, UCNV_RESET_FROM_UNICODE, FALSE);
2329 }
2330
2331 /* done successfully */
2332 break;
2333 }
2334 }
2335
2336 /*
2337 * toUnicode(source -> pivot);
2338 *
2339 * For pivoting conversion; and for direct conversion for
2340 * error callback handling, continuing partial matches
2341 * and flushing the replay buffer.
2342 *
2343 * The pivot buffer is empty and reset.
2344 */
2345 toUArgs.target=pivotStart; /* ==*pivotTarget */
2346 /* toUArgs.targetLimit=pivotLimit; already set before the loop */
2347 _toUnicodeWithCallback(&toUArgs, pErrorCode);
2348 *pivotTarget=toUArgs.target;
2349 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
2350 /* pivot overflow: continue with the conversion loop */
2351 *pErrorCode=U_ZERO_ERROR;
2352 } else if(U_FAILURE(*pErrorCode) || (!flush && *pivotTarget==pivotStart)) {
2353 /* conversion error, or there was nothing left to convert */
2354 break;
2355 }
2356 /*
2357 * else:
2358 * _toUnicodeWithCallback() wrote into the pivot buffer,
2359 * continue with fromUnicode conversion.
2360 *
2361 * Set the fromUnicode flush flag if we flush and if toUnicode has
2362 * processed the end of the input.
2363 */
2364 if( flush && toUArgs.source==sourceLimit &&
2365 sourceCnv->preToULength>=0 &&
2366 sourceCnv->UCharErrorBufferLength==0
2367 ) {
2368 fromUArgs.flush=TRUE;
2369 }
2370 }
2371
2372 /*
2373 * The conversion loop is exited when one of the following is true:
2374 * - the entire source text has been converted successfully to the target buffer
2375 * - a target buffer overflow occurred
2376 * - a conversion error occurred
2377 */
2378
2379 *source=toUArgs.source;
2380 *target=fromUArgs.target;
2381
2382 /* terminate the target buffer if possible */
2383 if(flush && U_SUCCESS(*pErrorCode)) {
2384 if(*target!=targetLimit) {
2385 **target=0;
2386 if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) {
2387 *pErrorCode=U_ZERO_ERROR;
2388 }
2389 } else {
2390 *pErrorCode=U_STRING_NOT_TERMINATED_WARNING;
2391 }
2392 }
2393 }
2394
2395 /* internal implementation of ucnv_convert() etc. with preflighting */
2396 static int32_t
ucnv_internalConvert(UConverter * outConverter,UConverter * inConverter,char * target,int32_t targetCapacity,const char * source,int32_t sourceLength,UErrorCode * pErrorCode)2397 ucnv_internalConvert(UConverter *outConverter, UConverter *inConverter,
2398 char *target, int32_t targetCapacity,
2399 const char *source, int32_t sourceLength,
2400 UErrorCode *pErrorCode) {
2401 UChar pivotBuffer[CHUNK_SIZE];
2402 UChar *pivot, *pivot2;
2403
2404 char *myTarget;
2405 const char *sourceLimit;
2406 const char *targetLimit;
2407 int32_t targetLength=0;
2408
2409 /* set up */
2410 if(sourceLength<0) {
2411 sourceLimit=uprv_strchr(source, 0);
2412 } else {
2413 sourceLimit=source+sourceLength;
2414 }
2415
2416 /* if there is no input data, we're done */
2417 if(source==sourceLimit) {
2418 return u_terminateChars(target, targetCapacity, 0, pErrorCode);
2419 }
2420
2421 pivot=pivot2=pivotBuffer;
2422 myTarget=target;
2423 targetLength=0;
2424
2425 if(targetCapacity>0) {
2426 /* perform real conversion */
2427 targetLimit=target+targetCapacity;
2428 ucnv_convertEx(outConverter, inConverter,
2429 &myTarget, targetLimit,
2430 &source, sourceLimit,
2431 pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE,
2432 FALSE,
2433 TRUE,
2434 pErrorCode);
2435 targetLength=(int32_t)(myTarget-target);
2436 }
2437
2438 /*
2439 * If the output buffer is exhausted (or we are only "preflighting"), we need to stop writing
2440 * to it but continue the conversion in order to store in targetCapacity
2441 * the number of bytes that was required.
2442 */
2443 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR || targetCapacity==0)
2444 {
2445 char targetBuffer[CHUNK_SIZE];
2446
2447 targetLimit=targetBuffer+CHUNK_SIZE;
2448 do {
2449 *pErrorCode=U_ZERO_ERROR;
2450 myTarget=targetBuffer;
2451 ucnv_convertEx(outConverter, inConverter,
2452 &myTarget, targetLimit,
2453 &source, sourceLimit,
2454 pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE,
2455 FALSE,
2456 TRUE,
2457 pErrorCode);
2458 targetLength+=(int32_t)(myTarget-targetBuffer);
2459 } while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR);
2460
2461 /* done with preflighting, set warnings and errors as appropriate */
2462 return u_terminateChars(target, targetCapacity, targetLength, pErrorCode);
2463 }
2464
2465 /* no need to call u_terminateChars() because ucnv_convertEx() took care of that */
2466 return targetLength;
2467 }
2468
2469 U_CAPI int32_t U_EXPORT2
ucnv_convert(const char * toConverterName,const char * fromConverterName,char * target,int32_t targetCapacity,const char * source,int32_t sourceLength,UErrorCode * pErrorCode)2470 ucnv_convert(const char *toConverterName, const char *fromConverterName,
2471 char *target, int32_t targetCapacity,
2472 const char *source, int32_t sourceLength,
2473 UErrorCode *pErrorCode) {
2474 UConverter in, out; /* stack-allocated */
2475 UConverter *inConverter, *outConverter;
2476 int32_t targetLength;
2477
2478 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
2479 return 0;
2480 }
2481
2482 if( source==NULL || sourceLength<-1 ||
2483 targetCapacity<0 || (targetCapacity>0 && target==NULL)
2484 ) {
2485 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2486 return 0;
2487 }
2488
2489 /* if there is no input data, we're done */
2490 if(sourceLength==0 || (sourceLength<0 && *source==0)) {
2491 return u_terminateChars(target, targetCapacity, 0, pErrorCode);
2492 }
2493
2494 /* create the converters */
2495 inConverter=ucnv_createConverter(&in, fromConverterName, pErrorCode);
2496 if(U_FAILURE(*pErrorCode)) {
2497 return 0;
2498 }
2499
2500 outConverter=ucnv_createConverter(&out, toConverterName, pErrorCode);
2501 if(U_FAILURE(*pErrorCode)) {
2502 ucnv_close(inConverter);
2503 return 0;
2504 }
2505
2506 targetLength=ucnv_internalConvert(outConverter, inConverter,
2507 target, targetCapacity,
2508 source, sourceLength,
2509 pErrorCode);
2510
2511 ucnv_close(inConverter);
2512 ucnv_close(outConverter);
2513
2514 return targetLength;
2515 }
2516
2517 /* @internal */
2518 static int32_t
ucnv_convertAlgorithmic(UBool convertToAlgorithmic,UConverterType algorithmicType,UConverter * cnv,char * target,int32_t targetCapacity,const char * source,int32_t sourceLength,UErrorCode * pErrorCode)2519 ucnv_convertAlgorithmic(UBool convertToAlgorithmic,
2520 UConverterType algorithmicType,
2521 UConverter *cnv,
2522 char *target, int32_t targetCapacity,
2523 const char *source, int32_t sourceLength,
2524 UErrorCode *pErrorCode) {
2525 UConverter algoConverterStatic; /* stack-allocated */
2526 UConverter *algoConverter, *to, *from;
2527 int32_t targetLength;
2528
2529 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
2530 return 0;
2531 }
2532
2533 if( cnv==NULL || source==NULL || sourceLength<-1 ||
2534 targetCapacity<0 || (targetCapacity>0 && target==NULL)
2535 ) {
2536 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2537 return 0;
2538 }
2539
2540 /* if there is no input data, we're done */
2541 if(sourceLength==0 || (sourceLength<0 && *source==0)) {
2542 return u_terminateChars(target, targetCapacity, 0, pErrorCode);
2543 }
2544
2545 /* create the algorithmic converter */
2546 algoConverter=ucnv_createAlgorithmicConverter(&algoConverterStatic, algorithmicType,
2547 "", 0, pErrorCode);
2548 if(U_FAILURE(*pErrorCode)) {
2549 return 0;
2550 }
2551
2552 /* reset the other converter */
2553 if(convertToAlgorithmic) {
2554 /* cnv->Unicode->algo */
2555 ucnv_resetToUnicode(cnv);
2556 to=algoConverter;
2557 from=cnv;
2558 } else {
2559 /* algo->Unicode->cnv */
2560 ucnv_resetFromUnicode(cnv);
2561 from=algoConverter;
2562 to=cnv;
2563 }
2564
2565 targetLength=ucnv_internalConvert(to, from,
2566 target, targetCapacity,
2567 source, sourceLength,
2568 pErrorCode);
2569
2570 ucnv_close(algoConverter);
2571
2572 return targetLength;
2573 }
2574
2575 U_CAPI int32_t U_EXPORT2
ucnv_toAlgorithmic(UConverterType algorithmicType,UConverter * cnv,char * target,int32_t targetCapacity,const char * source,int32_t sourceLength,UErrorCode * pErrorCode)2576 ucnv_toAlgorithmic(UConverterType algorithmicType,
2577 UConverter *cnv,
2578 char *target, int32_t targetCapacity,
2579 const char *source, int32_t sourceLength,
2580 UErrorCode *pErrorCode) {
2581 return ucnv_convertAlgorithmic(TRUE, algorithmicType, cnv,
2582 target, targetCapacity,
2583 source, sourceLength,
2584 pErrorCode);
2585 }
2586
2587 U_CAPI int32_t U_EXPORT2
ucnv_fromAlgorithmic(UConverter * cnv,UConverterType algorithmicType,char * target,int32_t targetCapacity,const char * source,int32_t sourceLength,UErrorCode * pErrorCode)2588 ucnv_fromAlgorithmic(UConverter *cnv,
2589 UConverterType algorithmicType,
2590 char *target, int32_t targetCapacity,
2591 const char *source, int32_t sourceLength,
2592 UErrorCode *pErrorCode) {
2593 return ucnv_convertAlgorithmic(FALSE, algorithmicType, cnv,
2594 target, targetCapacity,
2595 source, sourceLength,
2596 pErrorCode);
2597 }
2598
2599 U_CAPI UConverterType U_EXPORT2
ucnv_getType(const UConverter * converter)2600 ucnv_getType(const UConverter* converter)
2601 {
2602 int8_t type = converter->sharedData->staticData->conversionType;
2603 #if !UCONFIG_NO_LEGACY_CONVERSION
2604 if(type == UCNV_MBCS) {
2605 return ucnv_MBCSGetType(converter);
2606 }
2607 #endif
2608 return (UConverterType)type;
2609 }
2610
2611 U_CAPI void U_EXPORT2
ucnv_getStarters(const UConverter * converter,UBool starters[256],UErrorCode * err)2612 ucnv_getStarters(const UConverter* converter,
2613 UBool starters[256],
2614 UErrorCode* err)
2615 {
2616 if (err == NULL || U_FAILURE(*err)) {
2617 return;
2618 }
2619
2620 if(converter->sharedData->impl->getStarters != NULL) {
2621 converter->sharedData->impl->getStarters(converter, starters, err);
2622 } else {
2623 *err = U_ILLEGAL_ARGUMENT_ERROR;
2624 }
2625 }
2626
ucnv_getAmbiguous(const UConverter * cnv)2627 static const UAmbiguousConverter *ucnv_getAmbiguous(const UConverter *cnv)
2628 {
2629 UErrorCode errorCode;
2630 const char *name;
2631 int32_t i;
2632
2633 if(cnv==NULL) {
2634 return NULL;
2635 }
2636
2637 errorCode=U_ZERO_ERROR;
2638 name=ucnv_getName(cnv, &errorCode);
2639 if(U_FAILURE(errorCode)) {
2640 return NULL;
2641 }
2642
2643 for(i=0; i<(int32_t)(sizeof(ambiguousConverters)/sizeof(UAmbiguousConverter)); ++i)
2644 {
2645 if(0==uprv_strcmp(name, ambiguousConverters[i].name))
2646 {
2647 return ambiguousConverters+i;
2648 }
2649 }
2650
2651 return NULL;
2652 }
2653
2654 U_CAPI void U_EXPORT2
ucnv_fixFileSeparator(const UConverter * cnv,UChar * source,int32_t sourceLength)2655 ucnv_fixFileSeparator(const UConverter *cnv,
2656 UChar* source,
2657 int32_t sourceLength) {
2658 const UAmbiguousConverter *a;
2659 int32_t i;
2660 UChar variant5c;
2661
2662 if(cnv==NULL || source==NULL || sourceLength<=0 || (a=ucnv_getAmbiguous(cnv))==NULL)
2663 {
2664 return;
2665 }
2666
2667 variant5c=a->variant5c;
2668 for(i=0; i<sourceLength; ++i) {
2669 if(source[i]==variant5c) {
2670 source[i]=0x5c;
2671 }
2672 }
2673 }
2674
2675 U_CAPI UBool U_EXPORT2
ucnv_isAmbiguous(const UConverter * cnv)2676 ucnv_isAmbiguous(const UConverter *cnv) {
2677 return (UBool)(ucnv_getAmbiguous(cnv)!=NULL);
2678 }
2679
2680 U_CAPI void U_EXPORT2
ucnv_setFallback(UConverter * cnv,UBool usesFallback)2681 ucnv_setFallback(UConverter *cnv, UBool usesFallback)
2682 {
2683 cnv->useFallback = usesFallback;
2684 }
2685
2686 U_CAPI UBool U_EXPORT2
ucnv_usesFallback(const UConverter * cnv)2687 ucnv_usesFallback(const UConverter *cnv)
2688 {
2689 return cnv->useFallback;
2690 }
2691
2692 U_CAPI void U_EXPORT2
ucnv_getInvalidChars(const UConverter * converter,char * errBytes,int8_t * len,UErrorCode * err)2693 ucnv_getInvalidChars (const UConverter * converter,
2694 char *errBytes,
2695 int8_t * len,
2696 UErrorCode * err)
2697 {
2698 if (err == NULL || U_FAILURE(*err))
2699 {
2700 return;
2701 }
2702 if (len == NULL || errBytes == NULL || converter == NULL)
2703 {
2704 *err = U_ILLEGAL_ARGUMENT_ERROR;
2705 return;
2706 }
2707 if (*len < converter->invalidCharLength)
2708 {
2709 *err = U_INDEX_OUTOFBOUNDS_ERROR;
2710 return;
2711 }
2712 if ((*len = converter->invalidCharLength) > 0)
2713 {
2714 uprv_memcpy (errBytes, converter->invalidCharBuffer, *len);
2715 }
2716 }
2717
2718 U_CAPI void U_EXPORT2
ucnv_getInvalidUChars(const UConverter * converter,UChar * errChars,int8_t * len,UErrorCode * err)2719 ucnv_getInvalidUChars (const UConverter * converter,
2720 UChar *errChars,
2721 int8_t * len,
2722 UErrorCode * err)
2723 {
2724 if (err == NULL || U_FAILURE(*err))
2725 {
2726 return;
2727 }
2728 if (len == NULL || errChars == NULL || converter == NULL)
2729 {
2730 *err = U_ILLEGAL_ARGUMENT_ERROR;
2731 return;
2732 }
2733 if (*len < converter->invalidUCharLength)
2734 {
2735 *err = U_INDEX_OUTOFBOUNDS_ERROR;
2736 return;
2737 }
2738 if ((*len = converter->invalidUCharLength) > 0)
2739 {
2740 uprv_memcpy (errChars, converter->invalidUCharBuffer, sizeof(UChar) * (*len));
2741 }
2742 }
2743
2744 #define SIG_MAX_LEN 5
2745
2746 U_CAPI const char* U_EXPORT2
ucnv_detectUnicodeSignature(const char * source,int32_t sourceLength,int32_t * signatureLength,UErrorCode * pErrorCode)2747 ucnv_detectUnicodeSignature( const char* source,
2748 int32_t sourceLength,
2749 int32_t* signatureLength,
2750 UErrorCode* pErrorCode) {
2751 int32_t dummy;
2752
2753 /* initial 0xa5 bytes: make sure that if we read <SIG_MAX_LEN
2754 * bytes we don't misdetect something
2755 */
2756 char start[SIG_MAX_LEN]={ '\xa5', '\xa5', '\xa5', '\xa5', '\xa5' };
2757 int i = 0;
2758
2759 if((pErrorCode==NULL) || U_FAILURE(*pErrorCode)){
2760 return NULL;
2761 }
2762
2763 if(source == NULL || sourceLength < -1){
2764 *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
2765 return NULL;
2766 }
2767
2768 if(signatureLength == NULL) {
2769 signatureLength = &dummy;
2770 }
2771
2772 if(sourceLength==-1){
2773 sourceLength=(int32_t)uprv_strlen(source);
2774 }
2775
2776
2777 while(i<sourceLength&& i<SIG_MAX_LEN){
2778 start[i]=source[i];
2779 i++;
2780 }
2781
2782 if(start[0] == '\xFE' && start[1] == '\xFF') {
2783 *signatureLength=2;
2784 return "UTF-16BE";
2785 } else if(start[0] == '\xFF' && start[1] == '\xFE') {
2786 if(start[2] == '\x00' && start[3] =='\x00') {
2787 *signatureLength=4;
2788 return "UTF-32LE";
2789 } else {
2790 *signatureLength=2;
2791 return "UTF-16LE";
2792 }
2793 } else if(start[0] == '\xEF' && start[1] == '\xBB' && start[2] == '\xBF') {
2794 *signatureLength=3;
2795 return "UTF-8";
2796 } else if(start[0] == '\x00' && start[1] == '\x00' &&
2797 start[2] == '\xFE' && start[3]=='\xFF') {
2798 *signatureLength=4;
2799 return "UTF-32BE";
2800 } else if(start[0] == '\x0E' && start[1] == '\xFE' && start[2] == '\xFF') {
2801 *signatureLength=3;
2802 return "SCSU";
2803 } else if(start[0] == '\xFB' && start[1] == '\xEE' && start[2] == '\x28') {
2804 *signatureLength=3;
2805 return "BOCU-1";
2806 } else if(start[0] == '\x2B' && start[1] == '\x2F' && start[2] == '\x76') {
2807 /*
2808 * UTF-7: Initial U+FEFF is encoded as +/v8 or +/v9 or +/v+ or +/v/
2809 * depending on the second UTF-16 code unit.
2810 * Detect the entire, closed Unicode mode sequence +/v8- for only U+FEFF
2811 * if it occurs.
2812 *
2813 * So far we have +/v
2814 */
2815 if(start[3] == '\x38' && start[4] == '\x2D') {
2816 /* 5 bytes +/v8- */
2817 *signatureLength=5;
2818 return "UTF-7";
2819 } else if(start[3] == '\x38' || start[3] == '\x39' || start[3] == '\x2B' || start[3] == '\x2F') {
2820 /* 4 bytes +/v8 or +/v9 or +/v+ or +/v/ */
2821 *signatureLength=4;
2822 return "UTF-7";
2823 }
2824 }else if(start[0]=='\xDD' && start[1]== '\x73'&& start[2]=='\x66' && start[3]=='\x73'){
2825 *signatureLength=4;
2826 return "UTF-EBCDIC";
2827 }
2828
2829
2830 /* no known Unicode signature byte sequence recognized */
2831 *signatureLength=0;
2832 return NULL;
2833 }
2834
2835 U_CAPI int32_t U_EXPORT2
ucnv_fromUCountPending(const UConverter * cnv,UErrorCode * status)2836 ucnv_fromUCountPending(const UConverter* cnv, UErrorCode* status)
2837 {
2838 if(status == NULL || U_FAILURE(*status)){
2839 return -1;
2840 }
2841 if(cnv == NULL){
2842 *status = U_ILLEGAL_ARGUMENT_ERROR;
2843 return -1;
2844 }
2845
2846 if(cnv->preFromULength > 0){
2847 return U16_LENGTH(cnv->preFromUFirstCP)+cnv->preFromULength ;
2848 }else if(cnv->preFromULength < 0){
2849 return -cnv->preFromULength ;
2850 }else if(cnv->fromUChar32 > 0){
2851 return 1;
2852 }else if(cnv->preFromUFirstCP >0){
2853 return U16_LENGTH(cnv->preFromUFirstCP);
2854 }
2855 return 0;
2856
2857 }
2858
2859 U_CAPI int32_t U_EXPORT2
ucnv_toUCountPending(const UConverter * cnv,UErrorCode * status)2860 ucnv_toUCountPending(const UConverter* cnv, UErrorCode* status){
2861
2862 if(status == NULL || U_FAILURE(*status)){
2863 return -1;
2864 }
2865 if(cnv == NULL){
2866 *status = U_ILLEGAL_ARGUMENT_ERROR;
2867 return -1;
2868 }
2869
2870 if(cnv->preToULength > 0){
2871 return cnv->preToULength ;
2872 }else if(cnv->preToULength < 0){
2873 return -cnv->preToULength;
2874 }else if(cnv->toULength > 0){
2875 return cnv->toULength;
2876 }
2877 return 0;
2878 }
2879 #endif
2880
2881 /*
2882 * Hey, Emacs, please set the following:
2883 *
2884 * Local Variables:
2885 * indent-tabs-mode: nil
2886 * End:
2887 *
2888 */
2889