• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4  * COPYRIGHT:
5  * Copyright (c) 1997-2016, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  ********************************************************************/
8 /*******************************************************************************
9 *
10 * File nucnvtst.c
11 *
12 * Modification History:
13 *        Name                     Description
14 *    Steven R. Loomis     7/8/1999      Adding input buffer test
15 ********************************************************************************
16 */
17 #include <stdio.h>
18 #include "cstring.h"
19 #include "unicode/uloc.h"
20 #include "unicode/ucnv.h"
21 #include "unicode/ucnv_err.h"
22 #include "unicode/ucnv_cb.h"
23 #include "cintltst.h"
24 #include "unicode/utypes.h"
25 #include "unicode/ustring.h"
26 #include "unicode/ucol.h"
27 #include "unicode/utf16.h"
28 #include "cmemory.h"
29 #include "nucnvtst.h"
30 
31 static void TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message);
32 static void TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message);
33 #if !UCONFIG_NO_COLLATION
34 static void TestJitterbug981(void);
35 #endif
36 #if !UCONFIG_NO_LEGACY_CONVERSION
37 static void TestJitterbug1293(void);
38 #endif
39 static void TestNewConvertWithBufferSizes(int32_t osize, int32_t isize) ;
40 static void TestConverterTypesAndStarters(void);
41 static void TestAmbiguous(void);
42 static void TestSignatureDetection(void);
43 static void TestUTF7(void);
44 static void TestIMAP(void);
45 static void TestUTF8(void);
46 static void TestCESU8(void);
47 static void TestUTF16(void);
48 static void TestUTF16BE(void);
49 static void TestUTF16LE(void);
50 static void TestUTF32(void);
51 static void TestUTF32BE(void);
52 static void TestUTF32LE(void);
53 static void TestLATIN1(void);
54 
55 #if !UCONFIG_NO_LEGACY_CONVERSION
56 static void TestSBCS(void);
57 static void TestDBCS(void);
58 static void TestMBCS(void);
59 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
60 static void TestICCRunout(void);
61 #endif
62 
63 #ifdef U_ENABLE_GENERIC_ISO_2022
64 static void TestISO_2022(void);
65 #endif
66 
67 static void TestISO_2022_JP(void);
68 static void TestISO_2022_JP_1(void);
69 static void TestISO_2022_JP_2(void);
70 static void TestISO_2022_KR(void);
71 static void TestISO_2022_KR_1(void);
72 static void TestISO_2022_CN(void);
73 #if 0
74    /*
75     * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
76     */
77 static void TestISO_2022_CN_EXT(void);
78 #endif
79 static void TestJIS(void);
80 static void TestHZ(void);
81 #endif
82 
83 static void TestSCSU(void);
84 
85 #if !UCONFIG_NO_LEGACY_CONVERSION
86 static void TestEBCDIC_STATEFUL(void);
87 static void TestGB18030(void);
88 static void TestLMBCS(void);
89 static void TestJitterbug255(void);
90 static void TestEBCDICUS4XML(void);
91 #if 0
92    /*
93     * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
94     */
95 static void TestJitterbug915(void);
96 #endif
97 static void TestISCII(void);
98 
99 static void TestCoverageMBCS(void);
100 static void TestJitterbug2346(void);
101 static void TestJitterbug2411(void);
102 static void TestJB5275(void);
103 static void TestJB5275_1(void);
104 static void TestJitterbug6175(void);
105 
106 static void TestIsFixedWidth(void);
107 #endif
108 
109 static void TestInBufSizes(void);
110 
111 static void TestRoundTrippingAllUTF(void);
112 static void TestConv(const uint16_t in[],
113                      int len,
114                      const char* conv,
115                      const char* lang,
116                      char byteArr[],
117                      int byteArrLen);
118 
119 /* open a converter, using test data if it begins with '@' */
120 static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err);
121 
122 
123 #define NEW_MAX_BUFFER 999
124 
125 static int32_t  gInBufferSize = NEW_MAX_BUFFER;
126 static int32_t  gOutBufferSize = NEW_MAX_BUFFER;
127 static char     gNuConvTestName[1024];
128 
129 #define nct_min(x,y)  ((x<y) ? x : y)
130 
my_ucnv_open(const char * cnv,UErrorCode * err)131 static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err)
132 {
133   if(cnv && cnv[0] == '@') {
134     return ucnv_openPackage(loadTestData(err), cnv+1, err);
135   } else {
136     return ucnv_open(cnv, err);
137   }
138 }
139 
printSeq(const unsigned char * a,int len)140 static void printSeq(const unsigned char* a, int len)
141 {
142     int i=0;
143     log_verbose("{");
144     while (i<len)
145         log_verbose("0x%02x ", a[i++]);
146     log_verbose("}\n");
147 }
148 
printUSeq(const UChar * a,int len)149 static void printUSeq(const UChar* a, int len)
150 {
151     int i=0;
152     log_verbose("{U+");
153     while (i<len) log_verbose("0x%04x ", a[i++]);
154     log_verbose("}\n");
155 }
156 
printSeqErr(const unsigned char * a,int len)157 static void printSeqErr(const unsigned char* a, int len)
158 {
159     int i=0;
160     fprintf(stderr, "{");
161     while (i<len)
162         fprintf(stderr, "0x%02x ", a[i++]);
163     fprintf(stderr, "}\n");
164 }
165 
printUSeqErr(const UChar * a,int len)166 static void printUSeqErr(const UChar* a, int len)
167 {
168     int i=0;
169     fprintf(stderr, "{U+");
170     while (i<len)
171         fprintf(stderr, "0x%04x ", a[i++]);
172     fprintf(stderr,"}\n");
173 }
174 
175 static void
TestNextUChar(UConverter * cnv,const char * source,const char * limit,const int32_t results[],const char * message)176 TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message)
177 {
178      const char* s0;
179      const char* s=(char*)source;
180      const int32_t *r=results;
181      UErrorCode errorCode=U_ZERO_ERROR;
182      UChar32 c;
183 
184      while(s<limit) {
185         s0=s;
186         c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
187         if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
188             break; /* no more significant input */
189         } else if(U_FAILURE(errorCode)) {
190             log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode));
191             break;
192         } else if(
193             /* test the expected number of input bytes only if >=0 */
194             (*r>=0 && (int32_t)(s-s0)!=*r) ||
195             c!=*(r+1)
196         ) {
197             log_err("%s ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
198                 message, c, (s-s0), *(r+1), *r);
199             break;
200         }
201         r+=2;
202     }
203 }
204 
205 static void
TestNextUCharError(UConverter * cnv,const char * source,const char * limit,UErrorCode expected,const char * message)206 TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message)
207 {
208      const char* s=(char*)source;
209      UErrorCode errorCode=U_ZERO_ERROR;
210      uint32_t c;
211      c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
212      if(errorCode != expected){
213         log_err("FAIL: Expected:%s when %s-----Got:%s\n", myErrorName(expected), message, myErrorName(errorCode));
214      }
215      if(c != 0xFFFD && c != 0xffff){
216         log_err("FAIL: Expected return value of 0xfffd or 0xffff when %s-----Got 0x%lx\n", message, c);
217      }
218 
219 }
220 
TestInBufSizes(void)221 static void TestInBufSizes(void)
222 {
223   TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,1);
224 #if 1
225   TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,2);
226   TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,3);
227   TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,4);
228   TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,5);
229   TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,6);
230   TestNewConvertWithBufferSizes(1,1);
231   TestNewConvertWithBufferSizes(2,3);
232   TestNewConvertWithBufferSizes(3,2);
233 #endif
234 }
235 
TestOutBufSizes(void)236 static void TestOutBufSizes(void)
237 {
238 #if 1
239   TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,NEW_MAX_BUFFER);
240   TestNewConvertWithBufferSizes(1,NEW_MAX_BUFFER);
241   TestNewConvertWithBufferSizes(2,NEW_MAX_BUFFER);
242   TestNewConvertWithBufferSizes(3,NEW_MAX_BUFFER);
243   TestNewConvertWithBufferSizes(4,NEW_MAX_BUFFER);
244   TestNewConvertWithBufferSizes(5,NEW_MAX_BUFFER);
245 
246 #endif
247 }
248 
249 
addTestNewConvert(TestNode ** root)250 void addTestNewConvert(TestNode** root)
251 {
252 #if !UCONFIG_NO_FILE_IO
253    addTest(root, &TestInBufSizes, "tsconv/nucnvtst/TestInBufSizes");
254    addTest(root, &TestOutBufSizes, "tsconv/nucnvtst/TestOutBufSizes");
255 #endif
256    addTest(root, &TestConverterTypesAndStarters, "tsconv/nucnvtst/TestConverterTypesAndStarters");
257    addTest(root, &TestAmbiguous, "tsconv/nucnvtst/TestAmbiguous");
258    addTest(root, &TestSignatureDetection, "tsconv/nucnvtst/TestSignatureDetection");
259    addTest(root, &TestUTF7, "tsconv/nucnvtst/TestUTF7");
260    addTest(root, &TestIMAP, "tsconv/nucnvtst/TestIMAP");
261    addTest(root, &TestUTF8, "tsconv/nucnvtst/TestUTF8");
262 
263    /* test ucnv_getNextUChar() for charsets that encode single surrogates with complete byte sequences */
264    addTest(root, &TestCESU8, "tsconv/nucnvtst/TestCESU8");
265    addTest(root, &TestUTF16, "tsconv/nucnvtst/TestUTF16");
266    addTest(root, &TestUTF16BE, "tsconv/nucnvtst/TestUTF16BE");
267    addTest(root, &TestUTF16LE, "tsconv/nucnvtst/TestUTF16LE");
268    addTest(root, &TestUTF32, "tsconv/nucnvtst/TestUTF32");
269    addTest(root, &TestUTF32BE, "tsconv/nucnvtst/TestUTF32BE");
270    addTest(root, &TestUTF32LE, "tsconv/nucnvtst/TestUTF32LE");
271 
272 #if !UCONFIG_NO_LEGACY_CONVERSION
273    addTest(root, &TestLMBCS, "tsconv/nucnvtst/TestLMBCS");
274 #endif
275 
276    addTest(root, &TestLATIN1, "tsconv/nucnvtst/TestLATIN1");
277 
278 #if !UCONFIG_NO_LEGACY_CONVERSION
279    addTest(root, &TestSBCS, "tsconv/nucnvtst/TestSBCS");
280 #if !UCONFIG_NO_FILE_IO
281    addTest(root, &TestDBCS, "tsconv/nucnvtst/TestDBCS");
282    addTest(root, &TestICCRunout, "tsconv/nucnvtst/TestICCRunout");
283 #endif
284    addTest(root, &TestMBCS, "tsconv/nucnvtst/TestMBCS");
285 
286 #ifdef U_ENABLE_GENERIC_ISO_2022
287    addTest(root, &TestISO_2022, "tsconv/nucnvtst/TestISO_2022");
288 #endif
289 
290    addTest(root, &TestISO_2022_JP, "tsconv/nucnvtst/TestISO_2022_JP");
291    addTest(root, &TestJIS, "tsconv/nucnvtst/TestJIS");
292    addTest(root, &TestISO_2022_JP_1, "tsconv/nucnvtst/TestISO_2022_JP_1");
293    addTest(root, &TestISO_2022_JP_2, "tsconv/nucnvtst/TestISO_2022_JP_2");
294    addTest(root, &TestISO_2022_KR, "tsconv/nucnvtst/TestISO_2022_KR");
295    addTest(root, &TestISO_2022_KR_1, "tsconv/nucnvtst/TestISO_2022_KR_1");
296    addTest(root, &TestISO_2022_CN, "tsconv/nucnvtst/TestISO_2022_CN");
297    /*
298     * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
299    addTest(root, &TestISO_2022_CN_EXT, "tsconv/nucnvtst/TestISO_2022_CN_EXT");
300    addTest(root, &TestJitterbug915, "tsconv/nucnvtst/TestJitterbug915");
301     */
302    addTest(root, &TestHZ, "tsconv/nucnvtst/TestHZ");
303 #endif
304 
305    addTest(root, &TestSCSU, "tsconv/nucnvtst/TestSCSU");
306 
307 #if !UCONFIG_NO_LEGACY_CONVERSION
308    addTest(root, &TestEBCDIC_STATEFUL, "tsconv/nucnvtst/TestEBCDIC_STATEFUL");
309    addTest(root, &TestGB18030, "tsconv/nucnvtst/TestGB18030");
310    addTest(root, &TestJitterbug255, "tsconv/nucnvtst/TestJitterbug255");
311    addTest(root, &TestEBCDICUS4XML, "tsconv/nucnvtst/TestEBCDICUS4XML");
312    addTest(root, &TestISCII, "tsconv/nucnvtst/TestISCII");
313    addTest(root, &TestJB5275, "tsconv/nucnvtst/TestJB5275");
314    addTest(root, &TestJB5275_1, "tsconv/nucnvtst/TestJB5275_1");
315 #if !UCONFIG_NO_COLLATION
316    addTest(root, &TestJitterbug981, "tsconv/nucnvtst/TestJitterbug981");
317 #endif
318 
319    addTest(root, &TestJitterbug1293, "tsconv/nucnvtst/TestJitterbug1293");
320 #endif
321 
322 
323 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
324    addTest(root, &TestCoverageMBCS, "tsconv/nucnvtst/TestCoverageMBCS");
325 #endif
326 
327    addTest(root, &TestRoundTrippingAllUTF, "tsconv/nucnvtst/TestRoundTrippingAllUTF");
328 
329 #if !UCONFIG_NO_LEGACY_CONVERSION
330    addTest(root, &TestJitterbug2346, "tsconv/nucnvtst/TestJitterbug2346");
331    addTest(root, &TestJitterbug2411, "tsconv/nucnvtst/TestJitterbug2411");
332    addTest(root, &TestJitterbug6175, "tsconv/nucnvtst/TestJitterbug6175");
333 
334    addTest(root, &TestIsFixedWidth, "tsconv/nucnvtst/TestIsFixedWidth");
335 #endif
336 }
337 
338 
339 /* Note that this test already makes use of statics, so it's not really
340    multithread safe.
341    This convenience function lets us make the error messages actually useful.
342 */
343 
setNuConvTestName(const char * codepage,const char * direction)344 static void setNuConvTestName(const char *codepage, const char *direction)
345 {
346     sprintf(gNuConvTestName, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]",
347         codepage,
348         direction,
349         (int)gInBufferSize,
350         (int)gOutBufferSize);
351 }
352 
353 typedef enum
354 {
355   TC_OK       = 0,  /* test was OK */
356   TC_MISMATCH = 1,  /* Match failed - err was printed */
357   TC_FAIL     = 2   /* Test failed, don't print an err because it was already printed. */
358 } ETestConvertResult;
359 
360 /* Note: This function uses global variables and it will not do offset
361 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
testConvertFromU(const UChar * source,int sourceLen,const uint8_t * expect,int expectLen,const char * codepage,const int32_t * expectOffsets,UBool useFallback)362 static ETestConvertResult testConvertFromU( const UChar *source, int sourceLen,  const uint8_t *expect, int expectLen,
363                 const char *codepage, const int32_t *expectOffsets , UBool useFallback)
364 {
365     UErrorCode status = U_ZERO_ERROR;
366     UConverter *conv = 0;
367     char    junkout[NEW_MAX_BUFFER]; /* FIX */
368     int32_t    junokout[NEW_MAX_BUFFER]; /* FIX */
369     char *p;
370     const UChar *src;
371     char *end;
372     char *targ;
373     int32_t *offs;
374     int i;
375     int32_t   realBufferSize;
376     char *realBufferEnd;
377     const UChar *realSourceEnd;
378     const UChar *sourceLimit;
379     UBool checkOffsets = TRUE;
380     UBool doFlush;
381 
382     for(i=0;i<NEW_MAX_BUFFER;i++)
383         junkout[i] = (char)0xF0;
384     for(i=0;i<NEW_MAX_BUFFER;i++)
385         junokout[i] = 0xFF;
386 
387     setNuConvTestName(codepage, "FROM");
388 
389     log_verbose("\n=========  %s\n", gNuConvTestName);
390 
391     conv = my_ucnv_open(codepage, &status);
392 
393     if(U_FAILURE(status))
394     {
395         log_data_err("Couldn't open converter %s\n",codepage);
396         return TC_FAIL;
397     }
398     if(useFallback){
399         ucnv_setFallback(conv,useFallback);
400     }
401 
402     log_verbose("Converter opened..\n");
403 
404     src = source;
405     targ = junkout;
406     offs = junokout;
407 
408     realBufferSize = UPRV_LENGTHOF(junkout);
409     realBufferEnd = junkout + realBufferSize;
410     realSourceEnd = source + sourceLen;
411 
412     if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER )
413         checkOffsets = FALSE;
414 
415     do
416     {
417       end = nct_min(targ + gOutBufferSize, realBufferEnd);
418       sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
419 
420       doFlush = (UBool)(sourceLimit == realSourceEnd);
421 
422       if(targ == realBufferEnd) {
423         log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
424         return TC_FAIL;
425       }
426       log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx  TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE");
427 
428 
429       status = U_ZERO_ERROR;
430 
431       ucnv_fromUnicode (conv,
432                         &targ,
433                         end,
434                         &src,
435                         sourceLimit,
436                         checkOffsets ? offs : NULL,
437                         doFlush, /* flush if we're at the end of the input data */
438                         &status);
439     } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && sourceLimit < realSourceEnd) );
440 
441     if(U_FAILURE(status)) {
442       log_err("Problem doing fromUnicode to %s, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
443       return TC_FAIL;
444     }
445 
446     log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
447                 sourceLen, targ-junkout);
448 
449     if(getTestOption(VERBOSITY_OPTION))
450     {
451       char junk[9999];
452       char offset_str[9999];
453       char *ptr;
454 
455       junk[0] = 0;
456       offset_str[0] = 0;
457       for(ptr = junkout;ptr<targ;ptr++) {
458         sprintf(junk + strlen(junk), "0x%02x, ", (int)(0xFF & *ptr));
459         sprintf(offset_str + strlen(offset_str), "0x%02x, ", (int)(0xFF & junokout[ptr-junkout]));
460       }
461 
462       log_verbose(junk);
463       printSeq((const uint8_t *)expect, expectLen);
464       if ( checkOffsets ) {
465         log_verbose("\nOffsets:");
466         log_verbose(offset_str);
467       }
468       log_verbose("\n");
469     }
470     ucnv_close(conv);
471 
472     if(expectLen != targ-junkout) {
473       log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
474       log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
475       fprintf(stderr, "Got:\n");
476       printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
477       fprintf(stderr, "Expected:\n");
478       printSeqErr((const unsigned char*)expect, expectLen);
479       return TC_MISMATCH;
480     }
481 
482     if (checkOffsets && (expectOffsets != 0) ) {
483       log_verbose("comparing %d offsets..\n", targ-junkout);
484       if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
485         log_err("did not get the expected offsets. %s\n", gNuConvTestName);
486         printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
487         log_err("\n");
488         log_err("Got  :     ");
489         for(p=junkout;p<targ;p++) {
490           log_err("%d,", junokout[p-junkout]);
491         }
492         log_err("\n");
493         log_err("Expected:  ");
494         for(i=0; i<(targ-junkout); i++) {
495           log_err("%d,", expectOffsets[i]);
496         }
497         log_err("\n");
498       }
499     }
500 
501     log_verbose("comparing..\n");
502     if(!memcmp(junkout, expect, expectLen)) {
503       log_verbose("Matches!\n");
504       return TC_OK;
505     } else {
506       log_err("String does not match u->%s\n", gNuConvTestName);
507       printUSeqErr(source, sourceLen);
508       fprintf(stderr, "Got:\n");
509       printSeqErr((const unsigned char *)junkout, expectLen);
510       fprintf(stderr, "Expected:\n");
511       printSeqErr((const unsigned char *)expect, expectLen);
512 
513       return TC_MISMATCH;
514     }
515 }
516 
517 /* Note: This function uses global variables and it will not do offset
518 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
testConvertToU(const uint8_t * source,int sourcelen,const UChar * expect,int expectlen,const char * codepage,const int32_t * expectOffsets,UBool useFallback)519 static ETestConvertResult testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
520                                           const char *codepage, const int32_t *expectOffsets, UBool useFallback)
521 {
522     UErrorCode status = U_ZERO_ERROR;
523     UConverter *conv = 0;
524     UChar    junkout[NEW_MAX_BUFFER]; /* FIX */
525     int32_t    junokout[NEW_MAX_BUFFER]; /* FIX */
526     const char *src;
527     const char *realSourceEnd;
528     const char *srcLimit;
529     UChar *p;
530     UChar *targ;
531     UChar *end;
532     int32_t *offs;
533     int i;
534     UBool   checkOffsets = TRUE;
535 
536     int32_t   realBufferSize;
537     UChar *realBufferEnd;
538 
539 
540     for(i=0;i<NEW_MAX_BUFFER;i++)
541         junkout[i] = 0xFFFE;
542 
543     for(i=0;i<NEW_MAX_BUFFER;i++)
544         junokout[i] = -1;
545 
546     setNuConvTestName(codepage, "TO");
547 
548     log_verbose("\n=========  %s\n", gNuConvTestName);
549 
550     conv = my_ucnv_open(codepage, &status);
551 
552     if(U_FAILURE(status))
553     {
554         log_data_err("Couldn't open converter %s\n",gNuConvTestName);
555         return TC_FAIL;
556     }
557     if(useFallback){
558         ucnv_setFallback(conv,useFallback);
559     }
560     log_verbose("Converter opened..\n");
561 
562     src = (const char *)source;
563     targ = junkout;
564     offs = junokout;
565 
566     realBufferSize = UPRV_LENGTHOF(junkout);
567     realBufferEnd = junkout + realBufferSize;
568     realSourceEnd = src + sourcelen;
569 
570     if ( gOutBufferSize != realBufferSize ||  gInBufferSize != NEW_MAX_BUFFER )
571         checkOffsets = FALSE;
572 
573     do
574     {
575         end = nct_min( targ + gOutBufferSize, realBufferEnd);
576         srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
577 
578         if(targ == realBufferEnd)
579         {
580             log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjet=%08lx %s",targ,gNuConvTestName);
581             return TC_FAIL;
582         }
583         log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
584 
585         /* oldTarg = targ; */
586 
587         status = U_ZERO_ERROR;
588 
589         ucnv_toUnicode (conv,
590                 &targ,
591                 end,
592                 &src,
593                 srcLimit,
594                 checkOffsets ? offs : NULL,
595                 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of hte source data */
596                 &status);
597 
598         /*        offs += (targ-oldTarg); */
599 
600       } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */
601 
602     if(U_FAILURE(status))
603     {
604         log_err("Problem doing %s toUnicode, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
605         return TC_FAIL;
606     }
607 
608     log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
609         sourcelen, targ-junkout);
610     if(getTestOption(VERBOSITY_OPTION))
611     {
612         char junk[9999];
613         char offset_str[9999];
614         UChar *ptr;
615 
616         junk[0] = 0;
617         offset_str[0] = 0;
618 
619         for(ptr = junkout;ptr<targ;ptr++)
620         {
621             sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*ptr);
622             sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[ptr-junkout]);
623         }
624 
625         log_verbose(junk);
626         printUSeq(expect, expectlen);
627         if ( checkOffsets )
628           {
629             log_verbose("\nOffsets:");
630             log_verbose(offset_str);
631           }
632         log_verbose("\n");
633     }
634     ucnv_close(conv);
635 
636     log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
637 
638     if (checkOffsets && (expectOffsets != 0))
639     {
640         if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))){
641             log_err("did not get the expected offsets. %s\n",gNuConvTestName);
642             log_err("Got:      ");
643             for(p=junkout;p<targ;p++) {
644                 log_err("%d,", junokout[p-junkout]);
645             }
646             log_err("\n");
647             log_err("Expected: ");
648             for(i=0; i<(targ-junkout); i++) {
649                 log_err("%d,", expectOffsets[i]);
650             }
651             log_err("\n");
652             log_err("output:   ");
653             for(i=0; i<(targ-junkout); i++) {
654                 log_err("%X,", junkout[i]);
655             }
656             log_err("\n");
657             log_err("input:    ");
658             for(i=0; i<(src-(const char *)source); i++) {
659                 log_err("%X,", (unsigned char)source[i]);
660             }
661             log_err("\n");
662         }
663     }
664 
665     if(!memcmp(junkout, expect, expectlen*2))
666     {
667         log_verbose("Matches!\n");
668         return TC_OK;
669     }
670     else
671     {
672         log_err("String does not match. %s\n", gNuConvTestName);
673         log_verbose("String does not match. %s\n", gNuConvTestName);
674         printf("\nGot:");
675         printUSeqErr(junkout, expectlen);
676         printf("\nExpected:");
677         printUSeqErr(expect, expectlen);
678         return TC_MISMATCH;
679     }
680 }
681 
682 
TestNewConvertWithBufferSizes(int32_t outsize,int32_t insize)683 static void TestNewConvertWithBufferSizes(int32_t outsize, int32_t insize )
684 {
685 /** test chars #1 */
686     /*  1 2 3  1Han 2Han 3Han .  */
687     static const UChar   sampleText[] =
688      { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xD840, 0xDC21 };
689     static const UChar sampleTextRoundTripUnmappable[] =
690     { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xfffd };
691 
692 
693     static const uint8_t expectedUTF8[] =
694      { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E, 0xf0, 0xa0, 0x80, 0xa1 };
695     static const int32_t toUTF8Offs[] =
696      { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07, 0x08, 0x08, 0x08, 0x08 };
697     static const int32_t fmUTF8Offs[] =
698      { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d, 0x000e, 0x000e };
699 
700 #ifdef U_ENABLE_GENERIC_ISO_2022
701     /* Same as UTF8, but with ^[%B preceeding */
702     static const const uint8_t expectedISO2022[] =
703      { 0x1b, 0x25, 0x42, 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E };
704     static const int32_t toISO2022Offs[]     =
705      { -1, -1, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x04,
706        0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07 }; /* right? */
707     static const int32_t fmISO2022Offs[] =
708      { 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x000a, 0x000d, 0x0010 }; /* is this right? */
709 #endif
710 
711     /*  1 2 3 0, <SO> h1 h2 h3 <SI> . EBCDIC_STATEFUL */
712     static const uint8_t expectedIBM930[] =
713      { 0xF1, 0xF2, 0xF3, 0x00, 0x0E, 0x45, 0x41, 0x45, 0x42, 0x45, 0x43, 0x0F, 0x4B, 0x0e, 0xfe, 0xfe, 0x0f };
714     static const int32_t toIBM930Offs[] =
715      { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, -1 };
716     static const int32_t fmIBM930Offs[] =
717      { 0x0000, 0x0001, 0x0002, 0x0003, 0x0005, 0x0007, 0x0009, 0x000c, 0x000e };
718 
719     /* 1 2 3 0 h1 h2 h3 . MBCS*/
720     static const uint8_t expectedIBM943[] =
721      {  0x31, 0x32, 0x33, 0x00, 0x88, 0xea, 0x93, 0xf1, 0x8e, 0x4f, 0x2e, 0xfc, 0xfc };
722     static const int32_t toIBM943Offs    [] =
723      {  0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x08, 0x08 };
724     static const int32_t fmIBM943Offs[] =
725      { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0006, 0x0008, 0x000a, 0x000b };
726 
727     /* 1 2 3 0 h1 h2 h3 . DBCS*/
728     static const uint8_t expectedIBM9027[] =
729      {  0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0x4c, 0x41, 0x4c, 0x48, 0x4c, 0x55, 0xfe, 0xfe, 0xfe, 0xfe };
730     static const int32_t toIBM9027Offs    [] =
731      {  0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08 };
732 
733      /* 1 2 3 0 <?> <?> <?> . SBCS*/
734     static const uint8_t expectedIBM920[] =
735      {  0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2e, 0x1a };
736     static const int32_t toIBM920Offs    [] =
737      {  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
738 
739     /* 1 2 3 0 <?> <?> <?> . SBCS*/
740     static const uint8_t expectedISO88593[] =
741      { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a };
742     static const int32_t toISO88593Offs[]     =
743      { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
744 
745     /* 1 2 3 0 <?> <?> <?> . <?> LATIN_1*/
746     static const uint8_t expectedLATIN1[] =
747      { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a };
748     static const int32_t toLATIN1Offs[]     =
749      { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
750 
751 
752     /*  etc */
753     static const uint8_t expectedUTF16BE[] =
754      { 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x4e, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x00, 0x2e, 0xd8, 0x40, 0xdc, 0x21 };
755     static const int32_t toUTF16BEOffs[]=
756      { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 };
757     static const int32_t fmUTF16BEOffs[] =
758      { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c,  0x000e, 0x0010, 0x0010 };
759 
760     static const uint8_t expectedUTF16LE[] =
761      { 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x4e, 0x2e, 0x00, 0x40, 0xd8, 0x21, 0xdc };
762     static const int32_t toUTF16LEOffs[]=
763      { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06,  0x07, 0x07, 0x08, 0x08, 0x08, 0x08 };
764     static const int32_t fmUTF16LEOffs[] =
765      { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0x0010 };
766 
767     static const uint8_t expectedUTF32BE[] =
768      { 0x00, 0x00, 0x00, 0x31,
769        0x00, 0x00, 0x00, 0x32,
770        0x00, 0x00, 0x00, 0x33,
771        0x00, 0x00, 0x00, 0x00,
772        0x00, 0x00, 0x4e, 0x00,
773        0x00, 0x00, 0x4e, 0x8c,
774        0x00, 0x00, 0x4e, 0x09,
775        0x00, 0x00, 0x00, 0x2e,
776        0x00, 0x02, 0x00, 0x21 };
777     static const int32_t toUTF32BEOffs[]=
778      { 0x00, 0x00, 0x00, 0x00,
779        0x01, 0x01, 0x01, 0x01,
780        0x02, 0x02, 0x02, 0x02,
781        0x03, 0x03, 0x03, 0x03,
782        0x04, 0x04, 0x04, 0x04,
783        0x05, 0x05, 0x05, 0x05,
784        0x06, 0x06, 0x06, 0x06,
785        0x07, 0x07, 0x07, 0x07,
786        0x08, 0x08, 0x08, 0x08,
787        0x08, 0x08, 0x08, 0x08 };
788     static const int32_t fmUTF32BEOffs[] =
789      { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018,  0x001c, 0x0020, 0x0020 };
790 
791     static const uint8_t expectedUTF32LE[] =
792      { 0x31, 0x00, 0x00, 0x00,
793        0x32, 0x00, 0x00, 0x00,
794        0x33, 0x00, 0x00, 0x00,
795        0x00, 0x00, 0x00, 0x00,
796        0x00, 0x4e, 0x00, 0x00,
797        0x8c, 0x4e, 0x00, 0x00,
798        0x09, 0x4e, 0x00, 0x00,
799        0x2e, 0x00, 0x00, 0x00,
800        0x21, 0x00, 0x02, 0x00 };
801     static const int32_t toUTF32LEOffs[]=
802      { 0x00, 0x00, 0x00, 0x00,
803        0x01, 0x01, 0x01, 0x01,
804        0x02, 0x02, 0x02, 0x02,
805        0x03, 0x03, 0x03, 0x03,
806        0x04, 0x04, 0x04, 0x04,
807        0x05, 0x05, 0x05, 0x05,
808        0x06, 0x06, 0x06, 0x06,
809        0x07, 0x07, 0x07, 0x07,
810        0x08, 0x08, 0x08, 0x08,
811        0x08, 0x08, 0x08, 0x08 };
812     static const int32_t fmUTF32LEOffs[] =
813      { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0x0020 };
814 
815 
816 
817 
818 /** Test chars #2 **/
819 
820     /* Sahha [health],  slashed h's */
821     static const UChar malteseUChars[] = { 0x0053, 0x0061, 0x0127, 0x0127, 0x0061 };
822     static const uint8_t expectedMaltese913[] = { 0x53, 0x61, 0xB1, 0xB1, 0x61 };
823 
824     /* LMBCS */
825     static const UChar LMBCSUChars[]     = { 0x0027, 0x010A, 0x0000, 0x0127, 0x2666, 0x0220 };
826     static const uint8_t expectedLMBCS[] = { 0x27, 0x06, 0x04, 0x00, 0x01, 0x73, 0x01, 0x04, 0x14, 0x02, 0x20 };
827     static const int32_t toLMBCSOffs[]   = { 0x00, 0x01, 0x01, 0x02, 0x03, 0x03, 0x04, 0x04 , 0x05, 0x05, 0x05 };
828     static const int32_t fmLMBCSOffs[]   = { 0x0000, 0x0001, 0x0003, 0x0004, 0x0006, 0x0008};
829     /*********************************** START OF CODE finally *************/
830 
831     gInBufferSize = insize;
832     gOutBufferSize = outsize;
833 
834     log_verbose("\n\n\nTesting conversions with InputBufferSize = %d, OutputBufferSize = %d\n", gInBufferSize, gOutBufferSize);
835 
836 
837     /*UTF-8*/
838     testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
839         expectedUTF8, sizeof(expectedUTF8), "UTF8", toUTF8Offs,FALSE );
840 
841     log_verbose("Test surrogate behaviour for UTF8\n");
842     {
843         static const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01 };
844         static const uint8_t expectedUTF8test2[]= { 0xe2, 0x82, 0xac,
845                            0xf0, 0x90, 0x90, 0x81,
846                            0xef, 0xbf, 0xbd
847         };
848         static const int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3 };
849         testConvertFromU(testinput, UPRV_LENGTHOF(testinput),
850                          expectedUTF8test2, sizeof(expectedUTF8test2), "UTF8", offsets,FALSE );
851 
852 
853     }
854 
855 #if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022)
856     /*ISO-2022*/
857     testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
858         expectedISO2022, sizeof(expectedISO2022), "ISO_2022", toISO2022Offs,FALSE );
859 #endif
860 
861     /*UTF16 LE*/
862     testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
863         expectedUTF16LE, sizeof(expectedUTF16LE), "utf-16le", toUTF16LEOffs,FALSE );
864     /*UTF16 BE*/
865     testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
866         expectedUTF16BE, sizeof(expectedUTF16BE), "utf-16be", toUTF16BEOffs,FALSE );
867     /*UTF32 LE*/
868     testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
869         expectedUTF32LE, sizeof(expectedUTF32LE), "utf-32le", toUTF32LEOffs,FALSE );
870     /*UTF32 BE*/
871     testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
872         expectedUTF32BE, sizeof(expectedUTF32BE), "utf-32be", toUTF32BEOffs,FALSE );
873 
874     /*LATIN_1*/
875     testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
876         expectedLATIN1, sizeof(expectedLATIN1), "LATIN_1", toLATIN1Offs,FALSE );
877 
878 #if !UCONFIG_NO_LEGACY_CONVERSION
879     /*EBCDIC_STATEFUL*/
880     testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
881         expectedIBM930, sizeof(expectedIBM930), "ibm-930", toIBM930Offs,FALSE );
882 
883     testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
884         expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE );
885 
886     /*MBCS*/
887 
888     testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
889         expectedIBM943, sizeof(expectedIBM943), "ibm-943", toIBM943Offs,FALSE );
890     /*DBCS*/
891     testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
892         expectedIBM9027, sizeof(expectedIBM9027), "@ibm9027", toIBM9027Offs,FALSE );
893     /*SBCS*/
894     testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
895         expectedIBM920, sizeof(expectedIBM920), "ibm-920", toIBM920Offs,FALSE );
896     /*SBCS*/
897     testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
898         expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE );
899 #endif
900 
901 
902 /****/
903 
904     /*UTF-8*/
905     testConvertToU(expectedUTF8, sizeof(expectedUTF8),
906         sampleText, UPRV_LENGTHOF(sampleText), "utf8", fmUTF8Offs,FALSE);
907 #if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022)
908     /*ISO-2022*/
909     testConvertToU(expectedISO2022, sizeof(expectedISO2022),
910         sampleText, UPRV_LENGTHOF(sampleText), "ISO_2022", fmISO2022Offs,FALSE);
911 #endif
912 
913     /*UTF16 LE*/
914     testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE),
915         sampleText, UPRV_LENGTHOF(sampleText), "utf-16le", fmUTF16LEOffs,FALSE);
916     /*UTF16 BE*/
917     testConvertToU(expectedUTF16BE, sizeof(expectedUTF16BE),
918         sampleText, UPRV_LENGTHOF(sampleText), "utf-16be", fmUTF16BEOffs,FALSE);
919     /*UTF32 LE*/
920     testConvertToU(expectedUTF32LE, sizeof(expectedUTF32LE),
921         sampleText, UPRV_LENGTHOF(sampleText), "utf-32le", fmUTF32LEOffs,FALSE);
922     /*UTF32 BE*/
923     testConvertToU(expectedUTF32BE, sizeof(expectedUTF32BE),
924         sampleText, UPRV_LENGTHOF(sampleText), "utf-32be", fmUTF32BEOffs,FALSE);
925 
926 #if !UCONFIG_NO_LEGACY_CONVERSION
927     /*EBCDIC_STATEFUL*/
928     testConvertToU(expectedIBM930, sizeof(expectedIBM930), sampleTextRoundTripUnmappable,
929             UPRV_LENGTHOF(sampleTextRoundTripUnmappable), "ibm-930", fmIBM930Offs,FALSE);
930     /*MBCS*/
931     testConvertToU(expectedIBM943, sizeof(expectedIBM943),sampleTextRoundTripUnmappable,
932             UPRV_LENGTHOF(sampleTextRoundTripUnmappable), "ibm-943", fmIBM943Offs,FALSE);
933 #endif
934 
935     /* Try it again to make sure it still works */
936     testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE),
937         sampleText, UPRV_LENGTHOF(sampleText), "utf-16le", fmUTF16LEOffs,FALSE);
938 
939 #if !UCONFIG_NO_LEGACY_CONVERSION
940     testConvertToU(expectedMaltese913, sizeof(expectedMaltese913),
941         malteseUChars, UPRV_LENGTHOF(malteseUChars), "latin3", NULL,FALSE);
942 
943     testConvertFromU(malteseUChars, UPRV_LENGTHOF(malteseUChars),
944         expectedMaltese913, sizeof(expectedMaltese913), "iso-8859-3", NULL,FALSE );
945 
946     /*LMBCS*/
947     testConvertFromU(LMBCSUChars, UPRV_LENGTHOF(LMBCSUChars),
948         expectedLMBCS, sizeof(expectedLMBCS), "LMBCS-1", toLMBCSOffs,FALSE );
949     testConvertToU(expectedLMBCS, sizeof(expectedLMBCS),
950         LMBCSUChars, UPRV_LENGTHOF(LMBCSUChars), "LMBCS-1", fmLMBCSOffs,FALSE);
951 #endif
952 
953     /* UTF-7 examples are mostly from http://www.imc.org/rfc2152 */
954     {
955         /* encode directly set D and set O */
956         static const uint8_t utf7[] = {
957             /*
958                 Hi Mom -+Jjo--!
959                 A+ImIDkQ.
960                 +-
961                 +ZeVnLIqe-
962             */
963             0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
964             0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
965             0x2b, 0x2d,
966             0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d
967         };
968         static const UChar unicode[] = {
969             /*
970                 Hi Mom -<WHITE SMILING FACE>-!
971                 A<NOT IDENTICAL TO><ALPHA>.
972                 +
973                 [Japanese word "nihongo"]
974             */
975             0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
976             0x41, 0x2262, 0x0391, 0x2e,
977             0x2b,
978             0x65e5, 0x672c, 0x8a9e
979         };
980         static const int32_t toUnicodeOffsets[] = {
981             0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
982             15, 17, 19, 23,
983             24,
984             27, 29, 32
985         };
986         static const int32_t fromUnicodeOffsets[] = {
987             0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
988             11, 12, 12, 12, 13, 13, 13, 13, 14,
989             15, 15,
990             16, 16, 16, 17, 17, 17, 18, 18, 18, 18
991         };
992 
993         /* same but escaping set O (the exclamation mark) */
994         static const uint8_t utf7Restricted[] = {
995             /*
996                 Hi Mom -+Jjo--+ACE-
997                 A+ImIDkQ.
998                 +-
999                 +ZeVnLIqe-
1000             */
1001             0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x2b, 0x41, 0x43, 0x45, 0x2d,
1002             0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
1003             0x2b, 0x2d,
1004             0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d
1005         };
1006         static const int32_t toUnicodeOffsetsR[] = {
1007             0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 15,
1008             19, 21, 23, 27,
1009             28,
1010             31, 33, 36
1011         };
1012         static const int32_t fromUnicodeOffsetsR[] = {
1013             0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 10, 10, 10, 10,
1014             11, 12, 12, 12, 13, 13, 13, 13, 14,
1015             15, 15,
1016             16, 16, 16, 17, 17, 17, 18, 18, 18, 18
1017         };
1018 
1019         testConvertFromU(unicode, UPRV_LENGTHOF(unicode), utf7, sizeof(utf7), "UTF-7", fromUnicodeOffsets,FALSE);
1020 
1021         testConvertToU(utf7, sizeof(utf7), unicode, UPRV_LENGTHOF(unicode), "UTF-7", toUnicodeOffsets,FALSE);
1022 
1023         testConvertFromU(unicode, UPRV_LENGTHOF(unicode), utf7Restricted, sizeof(utf7Restricted), "UTF-7,version=1", fromUnicodeOffsetsR,FALSE);
1024 
1025         testConvertToU(utf7Restricted, sizeof(utf7Restricted), unicode, UPRV_LENGTHOF(unicode), "UTF-7,version=1", toUnicodeOffsetsR,FALSE);
1026     }
1027 
1028     /*
1029      * IMAP-mailbox-name examples are mostly from http://www.imc.org/rfc2152,
1030      * modified according to RFC 2060,
1031      * and supplemented with the one example in RFC 2060 itself.
1032      */
1033     {
1034         static const uint8_t imap[] = {
1035             /*  Hi Mom -&Jjo--!
1036                 A&ImIDkQ-.
1037                 &-
1038                 &ZeVnLIqe-
1039                 \
1040                 ~peter
1041                 /mail
1042                 /&ZeVnLIqe-
1043                 /&U,BTFw-
1044             */
1045             0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x26, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
1046             0x41, 0x26, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2d, 0x2e,
1047             0x26, 0x2d,
1048             0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
1049             0x5c,
1050             0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
1051             0x2f, 0x6d, 0x61, 0x69, 0x6c,
1052             0x2f, 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
1053             0x2f, 0x26, 0x55, 0x2c, 0x42, 0x54, 0x46, 0x77, 0x2d
1054         };
1055         static const UChar unicode[] = {
1056             /*  Hi Mom -<WHITE SMILING FACE>-!
1057                 A<NOT IDENTICAL TO><ALPHA>.
1058                 &
1059                 [Japanese word "nihongo"]
1060                 \
1061                 ~peter
1062                 /mail
1063                 /<65e5, 672c, 8a9e>
1064                 /<53f0, 5317>
1065             */
1066             0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
1067             0x41, 0x2262, 0x0391, 0x2e,
1068             0x26,
1069             0x65e5, 0x672c, 0x8a9e,
1070             0x5c,
1071             0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
1072             0x2f, 0x6d, 0x61, 0x69, 0x6c,
1073             0x2f, 0x65e5, 0x672c, 0x8a9e,
1074             0x2f, 0x53f0, 0x5317
1075         };
1076         static const int32_t toUnicodeOffsets[] = {
1077             0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
1078             15, 17, 19, 24,
1079             25,
1080             28, 30, 33,
1081             37,
1082             38, 39, 40, 41, 42, 43,
1083             44, 45, 46, 47, 48,
1084             49, 51, 53, 56,
1085             60, 62, 64
1086         };
1087         static const int32_t fromUnicodeOffsets[] = {
1088             0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
1089             11, 12, 12, 12, 13, 13, 13, 13, 13, 14,
1090             15, 15,
1091             16, 16, 16, 17, 17, 17, 18, 18, 18, 18,
1092             19,
1093             20, 21, 22, 23, 24, 25,
1094             26, 27, 28, 29, 30,
1095             31, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34,
1096             35, 36, 36, 36, 37, 37, 37, 37, 37
1097         };
1098 
1099         testConvertFromU(unicode, UPRV_LENGTHOF(unicode), imap, sizeof(imap), "IMAP-mailbox-name", fromUnicodeOffsets,FALSE);
1100 
1101         testConvertToU(imap, sizeof(imap), unicode, UPRV_LENGTHOF(unicode), "IMAP-mailbox-name", toUnicodeOffsets,FALSE);
1102     }
1103 
1104     /* Test UTF-8 bad data handling*/
1105     {
1106         static const uint8_t utf8[]={
1107             0x61,
1108             0xf7, 0xbf, 0xbf, 0xbf,         /* > 10FFFF */
1109             0x00,
1110             0x62,
1111             0xfb, 0xbf, 0xbf, 0xbf, 0xbf,   /* > 10FFFF */
1112             0xfb, 0xbf, 0xbf, 0xbf, 0xbf,   /* > 10FFFF */
1113             0xf4, 0x8f, 0xbf, 0xbf,         /* 10FFFF */
1114             0xdf, 0xbf,                     /* 7ff */
1115             0xbf,                           /* truncated tail */
1116             0xf4, 0x90, 0x80, 0x80,         /* 110000 */
1117             0x02
1118         };
1119 
1120         static const uint16_t utf8Expected[]={
1121             0x0061,
1122             0xfffd, 0xfffd, 0xfffd, 0xfffd,
1123             0x0000,
1124             0x0062,
1125             0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
1126             0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
1127             0xdbff, 0xdfff,
1128             0x07ff,
1129             0xfffd,
1130             0xfffd, 0xfffd, 0xfffd, 0xfffd,
1131             0x0002
1132         };
1133 
1134         static const int32_t utf8Offsets[]={
1135             0,
1136             1, 2, 3, 4,
1137             5,
1138             6,
1139             7, 8, 9, 10, 11,
1140             12, 13, 14, 15, 16,
1141             17, 17,
1142             21,
1143             23,
1144             24, 25, 26, 27,
1145             28
1146         };
1147         testConvertToU(utf8, sizeof(utf8),
1148                        utf8Expected, UPRV_LENGTHOF(utf8Expected), "utf-8", utf8Offsets ,FALSE);
1149 
1150     }
1151 
1152     /* Test UTF-32BE bad data handling*/
1153     {
1154         static const uint8_t utf32[]={
1155             0x00, 0x00, 0x00, 0x61,
1156             0x00, 0x11, 0x00, 0x00,         /* 0x110000 out of range */
1157             0x00, 0x10, 0xff, 0xff,         /* 0x10FFFF in range */
1158             0x00, 0x00, 0x00, 0x62,
1159             0xff, 0xff, 0xff, 0xff,         /* 0xffffffff out of range */
1160             0x7f, 0xff, 0xff, 0xff,         /* 0x7fffffff out of range */
1161             0x00, 0x00, 0x01, 0x62,
1162             0x00, 0x00, 0x02, 0x62
1163         };
1164         static const uint16_t utf32Expected[]={
1165             0x0061,
1166             0xfffd,         /* 0x110000 out of range */
1167             0xDBFF,         /* 0x10FFFF in range */
1168             0xDFFF,
1169             0x0062,
1170             0xfffd,         /* 0xffffffff out of range */
1171             0xfffd,         /* 0x7fffffff out of range */
1172             0x0162,
1173             0x0262
1174         };
1175         static const int32_t utf32Offsets[]={
1176             0, 4, 8, 8, 12, 16, 20, 24, 28
1177         };
1178         static const uint8_t utf32ExpectedBack[]={
1179             0x00, 0x00, 0x00, 0x61,
1180             0x00, 0x00, 0xff, 0xfd,         /* 0x110000 out of range */
1181             0x00, 0x10, 0xff, 0xff,         /* 0x10FFFF in range */
1182             0x00, 0x00, 0x00, 0x62,
1183             0x00, 0x00, 0xff, 0xfd,         /* 0xffffffff out of range */
1184             0x00, 0x00, 0xff, 0xfd,         /* 0x7fffffff out of range */
1185             0x00, 0x00, 0x01, 0x62,
1186             0x00, 0x00, 0x02, 0x62
1187         };
1188         static const int32_t utf32OffsetsBack[]={
1189             0,0,0,0,
1190             1,1,1,1,
1191             2,2,2,2,
1192             4,4,4,4,
1193             5,5,5,5,
1194             6,6,6,6,
1195             7,7,7,7,
1196             8,8,8,8
1197         };
1198 
1199         testConvertToU(utf32, sizeof(utf32),
1200                        utf32Expected, UPRV_LENGTHOF(utf32Expected), "utf-32be", utf32Offsets ,FALSE);
1201         testConvertFromU(utf32Expected, UPRV_LENGTHOF(utf32Expected),
1202             utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32be", utf32OffsetsBack, FALSE);
1203     }
1204 
1205     /* Test UTF-32LE bad data handling*/
1206     {
1207         static const uint8_t utf32[]={
1208             0x61, 0x00, 0x00, 0x00,
1209             0x00, 0x00, 0x11, 0x00,         /* 0x110000 out of range */
1210             0xff, 0xff, 0x10, 0x00,         /* 0x10FFFF in range */
1211             0x62, 0x00, 0x00, 0x00,
1212             0xff, 0xff, 0xff, 0xff,         /* 0xffffffff out of range */
1213             0xff, 0xff, 0xff, 0x7f,         /* 0x7fffffff out of range */
1214             0x62, 0x01, 0x00, 0x00,
1215             0x62, 0x02, 0x00, 0x00,
1216         };
1217 
1218         static const uint16_t utf32Expected[]={
1219             0x0061,
1220             0xfffd,         /* 0x110000 out of range */
1221             0xDBFF,         /* 0x10FFFF in range */
1222             0xDFFF,
1223             0x0062,
1224             0xfffd,         /* 0xffffffff out of range */
1225             0xfffd,         /* 0x7fffffff out of range */
1226             0x0162,
1227             0x0262
1228         };
1229         static const int32_t utf32Offsets[]={
1230             0, 4, 8, 8, 12, 16, 20, 24, 28
1231         };
1232         static const uint8_t utf32ExpectedBack[]={
1233             0x61, 0x00, 0x00, 0x00,
1234             0xfd, 0xff, 0x00, 0x00,         /* 0x110000 out of range */
1235             0xff, 0xff, 0x10, 0x00,         /* 0x10FFFF in range */
1236             0x62, 0x00, 0x00, 0x00,
1237             0xfd, 0xff, 0x00, 0x00,         /* 0xffffffff out of range */
1238             0xfd, 0xff, 0x00, 0x00,         /* 0x7fffffff out of range */
1239             0x62, 0x01, 0x00, 0x00,
1240             0x62, 0x02, 0x00, 0x00
1241         };
1242         static const int32_t utf32OffsetsBack[]={
1243             0,0,0,0,
1244             1,1,1,1,
1245             2,2,2,2,
1246             4,4,4,4,
1247             5,5,5,5,
1248             6,6,6,6,
1249             7,7,7,7,
1250             8,8,8,8
1251         };
1252         testConvertToU(utf32, sizeof(utf32),
1253             utf32Expected, UPRV_LENGTHOF(utf32Expected), "utf-32le", utf32Offsets,FALSE );
1254         testConvertFromU(utf32Expected, UPRV_LENGTHOF(utf32Expected),
1255             utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32le", utf32OffsetsBack, FALSE);
1256     }
1257 }
1258 
TestCoverageMBCS()1259 static void TestCoverageMBCS(){
1260 #if 0
1261     UErrorCode status = U_ZERO_ERROR;
1262     const char *directory = loadTestData(&status);
1263     char* tdpath = NULL;
1264     char* saveDirectory = (char*)malloc(sizeof(char) *(strlen(u_getDataDirectory())+1));
1265     int len = strlen(directory);
1266     char* index=NULL;
1267 
1268     tdpath = (char*) malloc(sizeof(char) * (len * 2));
1269     uprv_strcpy(saveDirectory,u_getDataDirectory());
1270     log_verbose("Retrieved data directory %s \n",saveDirectory);
1271     uprv_strcpy(tdpath,directory);
1272     index=strrchr(tdpath,(char)U_FILE_SEP_CHAR);
1273 
1274     if((unsigned int)(index-tdpath) != (strlen(tdpath)-1)){
1275             *(index+1)=0;
1276     }
1277     u_setDataDirectory(tdpath);
1278     log_verbose("ICU data directory is set to: %s \n" ,tdpath);
1279 #endif
1280 
1281     /*some more test to increase the code coverage in MBCS.  Create an test converter from test1.ucm
1282       which is test file for MBCS conversion with single-byte codepage data.*/
1283     {
1284 
1285         /* MBCS with single byte codepage data test1.ucm*/
1286         const UChar unicodeInput[]    = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0x0003};
1287         const uint8_t expectedtest1[] = { 0x00, 0x05, 0xff, 0x07, 0xff,};
1288         int32_t  totest1Offs[]        = { 0, 1, 2, 3, 5, };
1289 
1290         /*from Unicode*/
1291         testConvertFromU(unicodeInput, UPRV_LENGTHOF(unicodeInput),
1292             expectedtest1, sizeof(expectedtest1), "@test1", totest1Offs,FALSE );
1293     }
1294 
1295     /*some more test to increase the code coverage in MBCS.  Create an test converter from test3.ucm
1296       which is test file for MBCS conversion with three-byte codepage data.*/
1297     {
1298 
1299         /* MBCS with three byte codepage data test3.ucm*/
1300         const UChar unicodeInput[]    = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
1301         const uint8_t expectedtest3[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x0b,  0x07,  0x01, 0x02, 0x0a,  0xff,};
1302         int32_t  totest3Offs[]        = { 0, 1, 2, 3, 3, 3, 4, 6, 6, 6, 8};
1303 
1304         const uint8_t test3input[]    = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x0b,  0x07,  0x01, 0x02, 0x0a, 0x01, 0x02, 0x0c,};
1305         const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
1306         int32_t fromtest3Offs[]       = { 0, 1, 2, 3, 6, 6, 7, 7, 10 };
1307 
1308         /*from Unicode*/
1309         testConvertFromU(unicodeInput, UPRV_LENGTHOF(unicodeInput),
1310             expectedtest3, sizeof(expectedtest3), "@test3", totest3Offs,FALSE );
1311 
1312         /*to Unicode*/
1313         testConvertToU(test3input, sizeof(test3input),
1314             expectedUnicode, UPRV_LENGTHOF(expectedUnicode), "@test3", fromtest3Offs ,FALSE);
1315 
1316     }
1317 
1318     /*some more test to increase the code coverage in MBCS.  Create an test converter from test4.ucm
1319       which is test file for MBCS conversion with four-byte codepage data.*/
1320     {
1321 
1322         /* MBCS with three byte codepage data test4.ucm*/
1323         static const UChar unicodeInput[]    = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
1324         static const uint8_t expectedtest4[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x03, 0x0b,  0x07,  0x01, 0x02, 0x03, 0x0a,  0xff,};
1325         static const int32_t totest4Offs[]   = { 0, 1, 2, 3, 3, 3, 3, 4, 6, 6, 6, 6, 8,};
1326 
1327         static const uint8_t test4input[]    = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x03, 0x0b,  0x07,  0x01, 0x02, 0x03, 0x0a, 0x01, 0x02, 0x03, 0x0c,};
1328         static const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
1329         static const int32_t fromtest4Offs[] = { 0, 1, 2, 3, 7, 7, 8, 8, 12,};
1330 
1331         /*from Unicode*/
1332         testConvertFromU(unicodeInput, UPRV_LENGTHOF(unicodeInput),
1333             expectedtest4, sizeof(expectedtest4), "@test4", totest4Offs,FALSE );
1334 
1335         /*to Unicode*/
1336         testConvertToU(test4input, sizeof(test4input),
1337             expectedUnicode, UPRV_LENGTHOF(expectedUnicode), "@test4", fromtest4Offs,FALSE );
1338 
1339     }
1340 #if 0
1341     free(tdpath);
1342     /* restore the original data directory */
1343     log_verbose("Setting the data directory to %s \n", saveDirectory);
1344     u_setDataDirectory(saveDirectory);
1345     free(saveDirectory);
1346 #endif
1347 
1348 }
1349 
TestConverterType(const char * convName,UConverterType convType)1350 static void TestConverterType(const char *convName, UConverterType convType) {
1351     UConverter* myConverter;
1352     UErrorCode err = U_ZERO_ERROR;
1353 
1354     myConverter = my_ucnv_open(convName, &err);
1355 
1356     if (U_FAILURE(err)) {
1357         log_data_err("Failed to create an %s converter\n", convName);
1358         return;
1359     }
1360     else
1361     {
1362         if (ucnv_getType(myConverter)!=convType) {
1363             log_err("ucnv_getType Failed for %s. Got enum value 0x%X\n",
1364                 convName, convType);
1365         }
1366         else {
1367             log_verbose("ucnv_getType %s ok\n", convName);
1368         }
1369     }
1370     ucnv_close(myConverter);
1371 }
1372 
TestConverterTypesAndStarters()1373 static void TestConverterTypesAndStarters()
1374 {
1375 #if !UCONFIG_NO_LEGACY_CONVERSION
1376     UConverter* myConverter;
1377     UErrorCode err = U_ZERO_ERROR;
1378     UBool mystarters[256];
1379 
1380 /*    const UBool expectedKSCstarters[256] = {
1381         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1382         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1383         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1384         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1385         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1386         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1387         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1388         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1389         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1390         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1391         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1392         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1393         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1394         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1395         FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1396         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1397         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1398         TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE,
1399         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1400         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1401         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1402         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1403         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1404         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1405         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1406         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE};*/
1407 
1408 
1409     log_verbose("Testing KSC, ibm-930, ibm-878  for starters and their conversion types.");
1410 
1411     myConverter = ucnv_open("ksc", &err);
1412     if (U_FAILURE(err)) {
1413       log_data_err("Failed to create an ibm-ksc converter\n");
1414       return;
1415     }
1416     else
1417     {
1418         if (ucnv_getType(myConverter)!=UCNV_MBCS)
1419             log_err("ucnv_getType Failed for ibm-949\n");
1420         else
1421             log_verbose("ucnv_getType ibm-949 ok\n");
1422 
1423         if(myConverter!=NULL)
1424             ucnv_getStarters(myConverter, mystarters, &err);
1425 
1426         /*if (memcmp(expectedKSCstarters, mystarters, sizeof(expectedKSCstarters)))
1427           log_err("Failed ucnv_getStarters for ksc\n");
1428           else
1429           log_verbose("ucnv_getStarters ok\n");*/
1430 
1431     }
1432     ucnv_close(myConverter);
1433 
1434     TestConverterType("ibm-930", UCNV_EBCDIC_STATEFUL);
1435     TestConverterType("ibm-878", UCNV_SBCS);
1436 #endif
1437 
1438     TestConverterType("iso-8859-1", UCNV_LATIN_1);
1439 
1440     TestConverterType("ibm-1208", UCNV_UTF8);
1441 
1442     TestConverterType("utf-8", UCNV_UTF8);
1443     TestConverterType("UTF-16BE", UCNV_UTF16_BigEndian);
1444     TestConverterType("UTF-16LE", UCNV_UTF16_LittleEndian);
1445     TestConverterType("UTF-32BE", UCNV_UTF32_BigEndian);
1446     TestConverterType("UTF-32LE", UCNV_UTF32_LittleEndian);
1447 
1448 #if !UCONFIG_NO_LEGACY_CONVERSION
1449 
1450 #if defined(U_ENABLE_GENERIC_ISO_2022)
1451     TestConverterType("iso-2022", UCNV_ISO_2022);
1452 #endif
1453 
1454     TestConverterType("hz", UCNV_HZ);
1455 #endif
1456 
1457     TestConverterType("scsu", UCNV_SCSU);
1458 
1459 #if !UCONFIG_NO_LEGACY_CONVERSION
1460     TestConverterType("x-iscii-de", UCNV_ISCII);
1461 #endif
1462 
1463     TestConverterType("ascii", UCNV_US_ASCII);
1464     TestConverterType("utf-7", UCNV_UTF7);
1465     TestConverterType("IMAP-mailbox-name", UCNV_IMAP_MAILBOX);
1466     TestConverterType("bocu-1", UCNV_BOCU1);
1467 }
1468 
1469 static void
TestAmbiguousConverter(UConverter * cnv)1470 TestAmbiguousConverter(UConverter *cnv) {
1471     static const char inBytes[3]={ 0x61, 0x5B, 0x5c };
1472     UChar outUnicode[20]={ 0, 0, 0, 0 };
1473 
1474     const char *s;
1475     UChar *u;
1476     UErrorCode errorCode;
1477     UBool isAmbiguous;
1478 
1479     /* try to convert an 'a', a square bracket and a US-ASCII backslash */
1480     errorCode=U_ZERO_ERROR;
1481     s=inBytes;
1482     u=outUnicode;
1483     ucnv_toUnicode(cnv, &u, u+20, &s, s+3, NULL, TRUE, &errorCode);
1484     if(U_FAILURE(errorCode)) {
1485         /* we do not care about general failures in this test; the input may just not be mappable */
1486         return;
1487     }
1488 
1489     if(outUnicode[0]!=0x61 || outUnicode[1]!=0x5B || outUnicode[2]==0xfffd) {
1490         /* not a close ASCII-family encoding, or 0x5c is unassigned/illegal: this test is not applicable */
1491         /* There are some encodings that are partially ASCII based,
1492         like the ISO-7 and GSM series of codepages, which we ignore. */
1493         return;
1494     }
1495 
1496     isAmbiguous=ucnv_isAmbiguous(cnv);
1497 
1498     /* check that outUnicode[1]!=0x5c is exactly the same as ucnv_isAmbiguous() */
1499     if((outUnicode[2]!=0x5c)!=isAmbiguous) {
1500         log_err("error: converter \"%s\" needs a backslash fix: %d but ucnv_isAmbiguous()==%d\n",
1501             ucnv_getName(cnv, &errorCode), outUnicode[2]!=0x5c, isAmbiguous);
1502         return;
1503     }
1504 
1505     if(outUnicode[2]!=0x5c) {
1506         /* needs fixup, fix it */
1507         ucnv_fixFileSeparator(cnv, outUnicode, (int32_t)(u-outUnicode));
1508         if(outUnicode[2]!=0x5c) {
1509             /* the fix failed */
1510             log_err("error: ucnv_fixFileSeparator(%s) failed\n", ucnv_getName(cnv, &errorCode));
1511             return;
1512         }
1513     }
1514 }
1515 
TestAmbiguous()1516 static void TestAmbiguous()
1517 {
1518     UErrorCode status = U_ZERO_ERROR;
1519     UConverter *ascii_cnv = 0, *sjis_cnv = 0, *cnv;
1520     static const char target[] = {
1521         /* "\\usr\\local\\share\\data\\icutest.txt" */
1522         0x5c, 0x75, 0x73, 0x72,
1523         0x5c, 0x6c, 0x6f, 0x63, 0x61, 0x6c,
1524         0x5c, 0x73, 0x68, 0x61, 0x72, 0x65,
1525         0x5c, 0x64, 0x61, 0x74, 0x61,
1526         0x5c, 0x69, 0x63, 0x75, 0x74, 0x65, 0x73, 0x74, 0x2e, 0x74, 0x78, 0x74,
1527         0
1528     };
1529     UChar asciiResult[200], sjisResult[200];
1530     int32_t /*asciiLength = 0,*/ sjisLength = 0, i;
1531     const char *name;
1532 
1533     /* enumerate all converters */
1534     status=U_ZERO_ERROR;
1535     for(i=0; (name=ucnv_getAvailableName(i))!=NULL; ++i) {
1536         cnv=ucnv_open(name, &status);
1537         if(U_SUCCESS(status)) {
1538             TestAmbiguousConverter(cnv);
1539             ucnv_close(cnv);
1540         } else {
1541             log_err("error: unable to open available converter \"%s\"\n", name);
1542             status=U_ZERO_ERROR;
1543         }
1544     }
1545 
1546 #if !UCONFIG_NO_LEGACY_CONVERSION
1547     sjis_cnv = ucnv_open("ibm-943", &status);
1548     if (U_FAILURE(status))
1549     {
1550         log_data_err("Failed to create a SJIS converter\n");
1551         return;
1552     }
1553     ascii_cnv = ucnv_open("LATIN-1", &status);
1554     if (U_FAILURE(status))
1555     {
1556         log_data_err("Failed to create a LATIN-1 converter\n");
1557         ucnv_close(sjis_cnv);
1558         return;
1559     }
1560     /* convert target from SJIS to Unicode */
1561     sjisLength = ucnv_toUChars(sjis_cnv, sjisResult, UPRV_LENGTHOF(sjisResult), target, (int32_t)strlen(target), &status);
1562     if (U_FAILURE(status))
1563     {
1564         log_err("Failed to convert the SJIS string.\n");
1565         ucnv_close(sjis_cnv);
1566         ucnv_close(ascii_cnv);
1567         return;
1568     }
1569     /* convert target from Latin-1 to Unicode */
1570     /*asciiLength =*/ ucnv_toUChars(ascii_cnv, asciiResult, UPRV_LENGTHOF(asciiResult), target, (int32_t)strlen(target), &status);
1571     if (U_FAILURE(status))
1572     {
1573         log_err("Failed to convert the Latin-1 string.\n");
1574         ucnv_close(sjis_cnv);
1575         ucnv_close(ascii_cnv);
1576         return;
1577     }
1578     if (!ucnv_isAmbiguous(sjis_cnv))
1579     {
1580         log_err("SJIS converter should contain ambiguous character mappings.\n");
1581         ucnv_close(sjis_cnv);
1582         ucnv_close(ascii_cnv);
1583         return;
1584     }
1585     if (u_strcmp(sjisResult, asciiResult) == 0)
1586     {
1587         log_err("File separators for SJIS don't need to be fixed.\n");
1588     }
1589     ucnv_fixFileSeparator(sjis_cnv, sjisResult, sjisLength);
1590     if (u_strcmp(sjisResult, asciiResult) != 0)
1591     {
1592         log_err("Fixing file separator for SJIS failed.\n");
1593     }
1594     ucnv_close(sjis_cnv);
1595     ucnv_close(ascii_cnv);
1596 #endif
1597 }
1598 
1599 static void
TestSignatureDetection()1600 TestSignatureDetection(){
1601     /* with null terminated strings */
1602     {
1603         static const char* data[] = {
1604                 "\xFE\xFF\x00\x00",     /* UTF-16BE */
1605                 "\xFF\xFE\x00\x00",     /* UTF-16LE */
1606                 "\xEF\xBB\xBF\x00",     /* UTF-8    */
1607                 "\x0E\xFE\xFF\x00",     /* SCSU     */
1608 
1609                 "\xFE\xFF",             /* UTF-16BE */
1610                 "\xFF\xFE",             /* UTF-16LE */
1611                 "\xEF\xBB\xBF",         /* UTF-8    */
1612                 "\x0E\xFE\xFF",         /* SCSU     */
1613 
1614                 "\xFE\xFF\x41\x42",     /* UTF-16BE */
1615                 "\xFF\xFE\x41\x41",     /* UTF-16LE */
1616                 "\xEF\xBB\xBF\x41",     /* UTF-8    */
1617                 "\x0E\xFE\xFF\x41",     /* SCSU     */
1618 
1619                 "\x2B\x2F\x76\x38\x2D", /* UTF-7    */
1620                 "\x2B\x2F\x76\x38\x41", /* UTF-7    */
1621                 "\x2B\x2F\x76\x39\x41", /* UTF-7    */
1622                 "\x2B\x2F\x76\x2B\x41", /* UTF-7    */
1623                 "\x2B\x2F\x76\x2F\x41",  /* UTF-7    */
1624 
1625                 "\xDD\x73\x66\x73"      /* UTF-EBCDIC */
1626         };
1627         static const char* expected[] = {
1628                 "UTF-16BE",
1629                 "UTF-16LE",
1630                 "UTF-8",
1631                 "SCSU",
1632 
1633                 "UTF-16BE",
1634                 "UTF-16LE",
1635                 "UTF-8",
1636                 "SCSU",
1637 
1638                 "UTF-16BE",
1639                 "UTF-16LE",
1640                 "UTF-8",
1641                 "SCSU",
1642 
1643                 "UTF-7",
1644                 "UTF-7",
1645                 "UTF-7",
1646                 "UTF-7",
1647                 "UTF-7",
1648                 "UTF-EBCDIC"
1649         };
1650         static const int32_t expectedLength[] ={
1651             2,
1652             2,
1653             3,
1654             3,
1655 
1656             2,
1657             2,
1658             3,
1659             3,
1660 
1661             2,
1662             2,
1663             3,
1664             3,
1665 
1666             5,
1667             4,
1668             4,
1669             4,
1670             4,
1671             4
1672         };
1673         int i=0;
1674         UErrorCode err;
1675         int32_t signatureLength = -1;
1676         const char* source = NULL;
1677         const char* enc = NULL;
1678         for( ; i<UPRV_LENGTHOF(data); i++){
1679             err = U_ZERO_ERROR;
1680             source = data[i];
1681             enc = ucnv_detectUnicodeSignature(source, -1 , &signatureLength, &err);
1682             if(U_FAILURE(err)){
1683                 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err));
1684                 continue;
1685             }
1686             if(enc == NULL || strcmp(enc,expected[i]) !=0){
1687                 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc);
1688                 continue;
1689             }
1690             if(signatureLength != expectedLength[i]){
1691                 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]);
1692             }
1693         }
1694     }
1695     {
1696         static const char* data[] = {
1697                 "\xFE\xFF\x00",         /* UTF-16BE */
1698                 "\xFF\xFE\x00",         /* UTF-16LE */
1699                 "\xEF\xBB\xBF\x00",     /* UTF-8    */
1700                 "\x0E\xFE\xFF\x00",     /* SCSU     */
1701                 "\x00\x00\xFE\xFF",     /* UTF-32BE */
1702                 "\xFF\xFE\x00\x00",     /* UTF-32LE */
1703                 "\xFE\xFF",             /* UTF-16BE */
1704                 "\xFF\xFE",             /* UTF-16LE */
1705                 "\xEF\xBB\xBF",         /* UTF-8    */
1706                 "\x0E\xFE\xFF",         /* SCSU     */
1707                 "\x00\x00\xFE\xFF",     /* UTF-32BE */
1708                 "\xFF\xFE\x00\x00",     /* UTF-32LE */
1709                 "\xFE\xFF\x41\x42",     /* UTF-16BE */
1710                 "\xFF\xFE\x41\x41",     /* UTF-16LE */
1711                 "\xEF\xBB\xBF\x41",     /* UTF-8    */
1712                 "\x0E\xFE\xFF\x41",     /* SCSU     */
1713                 "\x00\x00\xFE\xFF\x41", /* UTF-32BE */
1714                 "\xFF\xFE\x00\x00\x42", /* UTF-32LE */
1715                 "\xFB\xEE\x28",         /* BOCU-1   */
1716                 "\xFF\x41\x42"          /* NULL     */
1717         };
1718         static const int len[] = {
1719             3,
1720             3,
1721             4,
1722             4,
1723             4,
1724             4,
1725             2,
1726             2,
1727             3,
1728             3,
1729             4,
1730             4,
1731             4,
1732             4,
1733             4,
1734             4,
1735             5,
1736             5,
1737             3,
1738             3
1739         };
1740 
1741         static const char* expected[] = {
1742                 "UTF-16BE",
1743                 "UTF-16LE",
1744                 "UTF-8",
1745                 "SCSU",
1746                 "UTF-32BE",
1747                 "UTF-32LE",
1748                 "UTF-16BE",
1749                 "UTF-16LE",
1750                 "UTF-8",
1751                 "SCSU",
1752                 "UTF-32BE",
1753                 "UTF-32LE",
1754                 "UTF-16BE",
1755                 "UTF-16LE",
1756                 "UTF-8",
1757                 "SCSU",
1758                 "UTF-32BE",
1759                 "UTF-32LE",
1760                 "BOCU-1",
1761                 NULL
1762         };
1763         static const int32_t expectedLength[] ={
1764             2,
1765             2,
1766             3,
1767             3,
1768             4,
1769             4,
1770             2,
1771             2,
1772             3,
1773             3,
1774             4,
1775             4,
1776             2,
1777             2,
1778             3,
1779             3,
1780             4,
1781             4,
1782             3,
1783             0
1784         };
1785         int i=0;
1786         UErrorCode err;
1787         int32_t signatureLength = -1;
1788         int32_t sourceLength=-1;
1789         const char* source = NULL;
1790         const char* enc = NULL;
1791         for( ; i<UPRV_LENGTHOF(data); i++){
1792             err = U_ZERO_ERROR;
1793             source = data[i];
1794             sourceLength = len[i];
1795             enc = ucnv_detectUnicodeSignature(source, sourceLength , &signatureLength, &err);
1796             if(U_FAILURE(err)){
1797                 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err));
1798                 continue;
1799             }
1800             if(enc == NULL || strcmp(enc,expected[i]) !=0){
1801                 if(expected[i] !=NULL){
1802                  log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc);
1803                  continue;
1804                 }
1805             }
1806             if(signatureLength != expectedLength[i]){
1807                 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]);
1808             }
1809         }
1810     }
1811 }
1812 
TestUTF7()1813 static void TestUTF7() {
1814     /* test input */
1815     static const uint8_t in[]={
1816         /* H - +Jjo- - ! +- +2AHcAQ */
1817         0x48,
1818         0x2d,
1819         0x2b, 0x4a, 0x6a, 0x6f,
1820         0x2d, 0x2d,
1821         0x21,
1822         0x2b, 0x2d,
1823         0x2b, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51
1824     };
1825 
1826     /* expected test results */
1827     static const int32_t results[]={
1828         /* number of bytes read, code point */
1829         1, 0x48,
1830         1, 0x2d,
1831         4, 0x263a, /* <WHITE SMILING FACE> */
1832         2, 0x2d,
1833         1, 0x21,
1834         2, 0x2b,
1835         7, 0x10401
1836     };
1837 
1838     const char *cnvName;
1839     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
1840     UErrorCode errorCode=U_ZERO_ERROR;
1841     UConverter *cnv=ucnv_open("UTF-7", &errorCode);
1842     if(U_FAILURE(errorCode)) {
1843         log_data_err("Unable to open a UTF-7 converter: %s\n", u_errorName(errorCode));
1844         return;
1845     }
1846     TestNextUChar(cnv, source, limit, results, "UTF-7");
1847     /* Test the condition when source >= sourceLimit */
1848     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1849     cnvName = ucnv_getName(cnv, &errorCode);
1850     if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "UTF-7") != 0) {
1851         log_err("UTF-7 converter is called %s: %s\n", cnvName, u_errorName(errorCode));
1852     }
1853     ucnv_close(cnv);
1854 }
1855 
TestIMAP()1856 static void TestIMAP() {
1857     /* test input */
1858     static const uint8_t in[]={
1859         /* H - &Jjo- - ! &- &2AHcAQ- \ */
1860         0x48,
1861         0x2d,
1862         0x26, 0x4a, 0x6a, 0x6f,
1863         0x2d, 0x2d,
1864         0x21,
1865         0x26, 0x2d,
1866         0x26, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51, 0x2d
1867     };
1868 
1869     /* expected test results */
1870     static const int32_t results[]={
1871         /* number of bytes read, code point */
1872         1, 0x48,
1873         1, 0x2d,
1874         4, 0x263a, /* <WHITE SMILING FACE> */
1875         2, 0x2d,
1876         1, 0x21,
1877         2, 0x26,
1878         7, 0x10401
1879     };
1880 
1881     const char *cnvName;
1882     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
1883     UErrorCode errorCode=U_ZERO_ERROR;
1884     UConverter *cnv=ucnv_open("IMAP-mailbox-name", &errorCode);
1885     if(U_FAILURE(errorCode)) {
1886         log_data_err("Unable to open a IMAP-mailbox-name converter: %s\n", u_errorName(errorCode));
1887         return;
1888     }
1889     TestNextUChar(cnv, source, limit, results, "IMAP-mailbox-name");
1890     /* Test the condition when source >= sourceLimit */
1891     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1892     cnvName = ucnv_getName(cnv, &errorCode);
1893     if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "IMAP-mailbox-name") != 0) {
1894         log_err("IMAP-mailbox-name converter is called %s: %s\n", cnvName, u_errorName(errorCode));
1895     }
1896     ucnv_close(cnv);
1897 }
1898 
TestUTF8()1899 static void TestUTF8() {
1900     /* test input */
1901     static const uint8_t in[]={
1902         0x61,
1903         0xc2, 0x80,
1904         0xe0, 0xa0, 0x80,
1905         0xf0, 0x90, 0x80, 0x80,
1906         0xf4, 0x84, 0x8c, 0xa1,
1907         0xf0, 0x90, 0x90, 0x81
1908     };
1909 
1910     /* expected test results */
1911     static const int32_t results[]={
1912         /* number of bytes read, code point */
1913         1, 0x61,
1914         2, 0x80,
1915         3, 0x800,
1916         4, 0x10000,
1917         4, 0x104321,
1918         4, 0x10401
1919     };
1920 
1921     /* error test input */
1922     static const uint8_t in2[]={
1923         0x61,
1924         0xc0, 0x80,                     /* illegal non-shortest form */
1925         0xe0, 0x80, 0x80,               /* illegal non-shortest form */
1926         0xf0, 0x80, 0x80, 0x80,         /* illegal non-shortest form */
1927         0xc0, 0xc0,                     /* illegal trail byte */
1928         0xf4, 0x90, 0x80, 0x80,         /* 0x110000 out of range */
1929         0xf8, 0x80, 0x80, 0x80, 0x80,   /* too long */
1930         0xfe,                           /* illegal byte altogether */
1931         0x62
1932     };
1933 
1934     /* expected error test results */
1935     static const int32_t results2[]={
1936         /* number of bytes read, code point */
1937         1, 0x61,
1938         22, 0x62
1939     };
1940 
1941     UConverterToUCallback cb;
1942     const void *p;
1943 
1944     const char *source=(const char *)in,*limit=(const char *)in+sizeof(in);
1945     UErrorCode errorCode=U_ZERO_ERROR;
1946     UConverter *cnv=ucnv_open("UTF-8", &errorCode);
1947     if(U_FAILURE(errorCode)) {
1948         log_err("Unable to open a UTF-8 converter: %s\n", u_errorName(errorCode));
1949         return;
1950     }
1951     TestNextUChar(cnv, source, limit, results, "UTF-8");
1952     /* Test the condition when source >= sourceLimit */
1953     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1954 
1955     /* test error behavior with a skip callback */
1956     ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
1957     source=(const char *)in2;
1958     limit=(const char *)(in2+sizeof(in2));
1959     TestNextUChar(cnv, source, limit, results2, "UTF-8");
1960 
1961     ucnv_close(cnv);
1962 }
1963 
TestCESU8()1964 static void TestCESU8() {
1965     /* test input */
1966     static const uint8_t in[]={
1967         0x61,
1968         0xc2, 0x80,
1969         0xe0, 0xa0, 0x80,
1970         0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80,
1971         0xed, 0xb0, 0x81, 0xed, 0xa0, 0x82,
1972         0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf,
1973         0xef, 0xbf, 0xbc
1974     };
1975 
1976     /* expected test results */
1977     static const int32_t results[]={
1978         /* number of bytes read, code point */
1979         1, 0x61,
1980         2, 0x80,
1981         3, 0x800,
1982         6, 0x10000,
1983         3, 0xdc01,
1984         -1,0xd802,  /* may read 3 or 6 bytes */
1985         -1,0x10ffff,/* may read 0 or 3 bytes */
1986         3, 0xfffc
1987     };
1988 
1989     /* error test input */
1990     static const uint8_t in2[]={
1991         0x61,
1992         0xc0, 0x80,                     /* illegal non-shortest form */
1993         0xe0, 0x80, 0x80,               /* illegal non-shortest form */
1994         0xf0, 0x80, 0x80, 0x80,         /* illegal non-shortest form */
1995         0xc0, 0xc0,                     /* illegal trail byte */
1996         0xf0, 0x90, 0x80, 0x80,         /* illegal 4-byte supplementary code point */
1997         0xf4, 0x84, 0x8c, 0xa1,         /* illegal 4-byte supplementary code point */
1998         0xf0, 0x90, 0x90, 0x81,         /* illegal 4-byte supplementary code point */
1999         0xf4, 0x90, 0x80, 0x80,         /* 0x110000 out of range */
2000         0xf8, 0x80, 0x80, 0x80, 0x80,   /* too long */
2001         0xfe,                           /* illegal byte altogether */
2002         0x62
2003     };
2004 
2005     /* expected error test results */
2006     static const int32_t results2[]={
2007         /* number of bytes read, code point */
2008         1, 0x61,
2009         34, 0x62
2010     };
2011 
2012     UConverterToUCallback cb;
2013     const void *p;
2014 
2015     const char *source=(const char *)in,*limit=(const char *)in+sizeof(in);
2016     UErrorCode errorCode=U_ZERO_ERROR;
2017     UConverter *cnv=ucnv_open("CESU-8", &errorCode);
2018     if(U_FAILURE(errorCode)) {
2019         log_data_err("Unable to open a CESU-8 converter: %s\n", u_errorName(errorCode));
2020         return;
2021     }
2022     TestNextUChar(cnv, source, limit, results, "CESU-8");
2023     /* Test the condition when source >= sourceLimit */
2024     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2025 
2026     /* test error behavior with a skip callback */
2027     ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2028     source=(const char *)in2;
2029     limit=(const char *)(in2+sizeof(in2));
2030     TestNextUChar(cnv, source, limit, results2, "CESU-8");
2031 
2032     ucnv_close(cnv);
2033 }
2034 
TestUTF16()2035 static void TestUTF16() {
2036     /* test input */
2037     static const uint8_t in1[]={
2038         0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff
2039     };
2040     static const uint8_t in2[]={
2041         0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff
2042     };
2043     static const uint8_t in3[]={
2044         0xfe, 0xfe, 0x4e, 0x00, 0xfe, 0xff, 0xd8, 0x40, 0xdc, 0x01
2045     };
2046 
2047     /* expected test results */
2048     static const int32_t results1[]={
2049         /* number of bytes read, code point */
2050         4, 0x4e00,
2051         2, 0xfeff
2052     };
2053     static const int32_t results2[]={
2054         /* number of bytes read, code point */
2055         4, 0x004e,
2056         2, 0xfffe
2057     };
2058     static const int32_t results3[]={
2059         /* number of bytes read, code point */
2060         2, 0xfefe,
2061         2, 0x4e00,
2062         2, 0xfeff,
2063         4, 0x20001
2064     };
2065 
2066     const char *source, *limit;
2067 
2068     UErrorCode errorCode=U_ZERO_ERROR;
2069     UConverter *cnv=ucnv_open("UTF-16", &errorCode);
2070     if(U_FAILURE(errorCode)) {
2071         log_err("Unable to open a UTF-16 converter: %s\n", u_errorName(errorCode));
2072         return;
2073     }
2074 
2075     source=(const char *)in1, limit=(const char *)in1+sizeof(in1);
2076     TestNextUChar(cnv, source, limit, results1, "UTF-16");
2077 
2078     source=(const char *)in2, limit=(const char *)in2+sizeof(in2);
2079     ucnv_resetToUnicode(cnv);
2080     TestNextUChar(cnv, source, limit, results2, "UTF-16");
2081 
2082     source=(const char *)in3, limit=(const char *)in3+sizeof(in3);
2083     ucnv_resetToUnicode(cnv);
2084     TestNextUChar(cnv, source, limit, results3, "UTF-16");
2085 
2086     /* Test the condition when source >= sourceLimit */
2087     ucnv_resetToUnicode(cnv);
2088     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2089 
2090     ucnv_close(cnv);
2091 }
2092 
TestUTF16BE()2093 static void TestUTF16BE() {
2094     /* test input */
2095     static const uint8_t in[]={
2096         0x00, 0x61,
2097         0x00, 0xc0,
2098         0x00, 0x31,
2099         0x00, 0xf4,
2100         0xce, 0xfe,
2101         0xd8, 0x01, 0xdc, 0x01
2102     };
2103 
2104     /* expected test results */
2105     static const int32_t results[]={
2106         /* number of bytes read, code point */
2107         2, 0x61,
2108         2, 0xc0,
2109         2, 0x31,
2110         2, 0xf4,
2111         2, 0xcefe,
2112         4, 0x10401
2113     };
2114 
2115     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2116     UErrorCode errorCode=U_ZERO_ERROR;
2117     UConverter *cnv=ucnv_open("utf-16be", &errorCode);
2118     if(U_FAILURE(errorCode)) {
2119         log_err("Unable to open a UTF16-BE converter: %s\n", u_errorName(errorCode));
2120         return;
2121     }
2122     TestNextUChar(cnv, source, limit, results, "UTF-16BE");
2123     /* Test the condition when source >= sourceLimit */
2124     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2125     /*Test for the condition where there is an invalid character*/
2126     {
2127         static const uint8_t source2[]={0x61};
2128         ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2129         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character");
2130     }
2131 #if 0
2132     /*
2133      * Test disabled because currently the UTF-16BE/LE converters are supposed
2134      * to not set errors for unpaired surrogates.
2135      * This may change with
2136      * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
2137      */
2138 
2139     /*Test for the condition where there is a surrogate pair*/
2140     {
2141         const uint8_t source2[]={0xd8, 0x01};
2142         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character");
2143     }
2144 #endif
2145     ucnv_close(cnv);
2146 }
2147 
2148 static void
TestUTF16LE()2149 TestUTF16LE() {
2150     /* test input */
2151     static const uint8_t in[]={
2152         0x61, 0x00,
2153         0x31, 0x00,
2154         0x4e, 0x2e,
2155         0x4e, 0x00,
2156         0x01, 0xd8, 0x01, 0xdc
2157     };
2158 
2159     /* expected test results */
2160     static const int32_t results[]={
2161         /* number of bytes read, code point */
2162         2, 0x61,
2163         2, 0x31,
2164         2, 0x2e4e,
2165         2, 0x4e,
2166         4, 0x10401
2167     };
2168 
2169     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2170     UErrorCode errorCode=U_ZERO_ERROR;
2171     UConverter *cnv=ucnv_open("utf-16le", &errorCode);
2172     if(U_FAILURE(errorCode)) {
2173         log_err("Unable to open a UTF16-LE converter: %s\n", u_errorName(errorCode));
2174         return;
2175     }
2176     TestNextUChar(cnv, source, limit, results, "UTF-16LE");
2177     /* Test the condition when source >= sourceLimit */
2178     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2179     /*Test for the condition where there is an invalid character*/
2180     {
2181         static const uint8_t source2[]={0x61};
2182         ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2183         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character");
2184     }
2185 #if 0
2186     /*
2187      * Test disabled because currently the UTF-16BE/LE converters are supposed
2188      * to not set errors for unpaired surrogates.
2189      * This may change with
2190      * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
2191      */
2192 
2193     /*Test for the condition where there is a surrogate character*/
2194     {
2195         static const uint8_t source2[]={0x01, 0xd8};
2196         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character");
2197     }
2198 #endif
2199 
2200     ucnv_close(cnv);
2201 }
2202 
TestUTF32()2203 static void TestUTF32() {
2204     /* test input */
2205     static const uint8_t in1[]={
2206         0x00, 0x00, 0xfe, 0xff,   0x00, 0x10, 0x0f, 0x00,   0x00, 0x00, 0xfe, 0xff
2207     };
2208     static const uint8_t in2[]={
2209         0xff, 0xfe, 0x00, 0x00,   0x00, 0x10, 0x0f, 0x00,   0xfe, 0xff, 0x00, 0x00
2210     };
2211     static const uint8_t in3[]={
2212         0x00, 0x00, 0xfe, 0xfe,   0x00, 0x10, 0x0f, 0x00,   0x00, 0x00, 0xd8, 0x40,   0x00, 0x00, 0xdc, 0x01
2213     };
2214 
2215     /* expected test results */
2216     static const int32_t results1[]={
2217         /* number of bytes read, code point */
2218         8, 0x100f00,
2219         4, 0xfeff
2220     };
2221     static const int32_t results2[]={
2222         /* number of bytes read, code point */
2223         8, 0x0f1000,
2224         4, 0xfffe
2225     };
2226     static const int32_t results3[]={
2227         /* number of bytes read, code point */
2228         4, 0xfefe,
2229         4, 0x100f00,
2230         4, 0xfffd, /* unmatched surrogate */
2231         4, 0xfffd  /* unmatched surrogate */
2232     };
2233 
2234     const char *source, *limit;
2235 
2236     UErrorCode errorCode=U_ZERO_ERROR;
2237     UConverter *cnv=ucnv_open("UTF-32", &errorCode);
2238     if(U_FAILURE(errorCode)) {
2239         log_data_err("Unable to open a UTF-32 converter: %s\n", u_errorName(errorCode));
2240         return;
2241     }
2242 
2243     source=(const char *)in1, limit=(const char *)in1+sizeof(in1);
2244     TestNextUChar(cnv, source, limit, results1, "UTF-32");
2245 
2246     source=(const char *)in2, limit=(const char *)in2+sizeof(in2);
2247     ucnv_resetToUnicode(cnv);
2248     TestNextUChar(cnv, source, limit, results2, "UTF-32");
2249 
2250     source=(const char *)in3, limit=(const char *)in3+sizeof(in3);
2251     ucnv_resetToUnicode(cnv);
2252     TestNextUChar(cnv, source, limit, results3, "UTF-32");
2253 
2254     /* Test the condition when source >= sourceLimit */
2255     ucnv_resetToUnicode(cnv);
2256     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2257 
2258     ucnv_close(cnv);
2259 }
2260 
2261 static void
TestUTF32BE()2262 TestUTF32BE() {
2263     /* test input */
2264     static const uint8_t in[]={
2265         0x00, 0x00, 0x00, 0x61,
2266         0x00, 0x00, 0x30, 0x61,
2267         0x00, 0x00, 0xdc, 0x00,
2268         0x00, 0x00, 0xd8, 0x00,
2269         0x00, 0x00, 0xdf, 0xff,
2270         0x00, 0x00, 0xff, 0xfe,
2271         0x00, 0x10, 0xab, 0xcd,
2272         0x00, 0x10, 0xff, 0xff
2273     };
2274 
2275     /* expected test results */
2276     static const int32_t results[]={
2277         /* number of bytes read, code point */
2278         4, 0x61,
2279         4, 0x3061,
2280         4, 0xfffd,
2281         4, 0xfffd,
2282         4, 0xfffd,
2283         4, 0xfffe,
2284         4, 0x10abcd,
2285         4, 0x10ffff
2286     };
2287 
2288     /* error test input */
2289     static const uint8_t in2[]={
2290         0x00, 0x00, 0x00, 0x61,
2291         0x00, 0x11, 0x00, 0x00,         /* 0x110000 out of range */
2292         0x00, 0x00, 0x00, 0x62,
2293         0xff, 0xff, 0xff, 0xff,         /* 0xffffffff out of range */
2294         0x7f, 0xff, 0xff, 0xff,         /* 0x7fffffff out of range */
2295         0x00, 0x00, 0x01, 0x62,
2296         0x00, 0x00, 0x02, 0x62
2297     };
2298 
2299     /* expected error test results */
2300     static const int32_t results2[]={
2301         /* number of bytes read, code point */
2302         4,  0x61,
2303         8,  0x62,
2304         12, 0x162,
2305         4,  0x262
2306     };
2307 
2308     UConverterToUCallback cb;
2309     const void *p;
2310 
2311     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2312     UErrorCode errorCode=U_ZERO_ERROR;
2313     UConverter *cnv=ucnv_open("UTF-32BE", &errorCode);
2314     if(U_FAILURE(errorCode)) {
2315         log_data_err("Unable to open a UTF-32BE converter: %s\n", u_errorName(errorCode));
2316         return;
2317     }
2318     TestNextUChar(cnv, source, limit, results, "UTF-32BE");
2319 
2320     /* Test the condition when source >= sourceLimit */
2321     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2322 
2323     /* test error behavior with a skip callback */
2324     ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2325     source=(const char *)in2;
2326     limit=(const char *)(in2+sizeof(in2));
2327     TestNextUChar(cnv, source, limit, results2, "UTF-32BE");
2328 
2329     ucnv_close(cnv);
2330 }
2331 
2332 static void
TestUTF32LE()2333 TestUTF32LE() {
2334     /* test input */
2335     static const uint8_t in[]={
2336         0x61, 0x00, 0x00, 0x00,
2337         0x61, 0x30, 0x00, 0x00,
2338         0x00, 0xdc, 0x00, 0x00,
2339         0x00, 0xd8, 0x00, 0x00,
2340         0xff, 0xdf, 0x00, 0x00,
2341         0xfe, 0xff, 0x00, 0x00,
2342         0xcd, 0xab, 0x10, 0x00,
2343         0xff, 0xff, 0x10, 0x00
2344     };
2345 
2346     /* expected test results */
2347     static const int32_t results[]={
2348         /* number of bytes read, code point */
2349         4, 0x61,
2350         4, 0x3061,
2351         4, 0xfffd,
2352         4, 0xfffd,
2353         4, 0xfffd,
2354         4, 0xfffe,
2355         4, 0x10abcd,
2356         4, 0x10ffff
2357     };
2358 
2359     /* error test input */
2360     static const uint8_t in2[]={
2361         0x61, 0x00, 0x00, 0x00,
2362         0x00, 0x00, 0x11, 0x00,         /* 0x110000 out of range */
2363         0x62, 0x00, 0x00, 0x00,
2364         0xff, 0xff, 0xff, 0xff,         /* 0xffffffff out of range */
2365         0xff, 0xff, 0xff, 0x7f,         /* 0x7fffffff out of range */
2366         0x62, 0x01, 0x00, 0x00,
2367         0x62, 0x02, 0x00, 0x00,
2368     };
2369 
2370     /* expected error test results */
2371     static const int32_t results2[]={
2372         /* number of bytes read, code point */
2373         4,  0x61,
2374         8,  0x62,
2375         12, 0x162,
2376         4,  0x262,
2377     };
2378 
2379     UConverterToUCallback cb;
2380     const void *p;
2381 
2382     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2383     UErrorCode errorCode=U_ZERO_ERROR;
2384     UConverter *cnv=ucnv_open("UTF-32LE", &errorCode);
2385     if(U_FAILURE(errorCode)) {
2386         log_data_err("Unable to open a UTF-32LE converter: %s\n", u_errorName(errorCode));
2387         return;
2388     }
2389     TestNextUChar(cnv, source, limit, results, "UTF-32LE");
2390 
2391     /* Test the condition when source >= sourceLimit */
2392     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2393 
2394     /* test error behavior with a skip callback */
2395     ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2396     source=(const char *)in2;
2397     limit=(const char *)(in2+sizeof(in2));
2398     TestNextUChar(cnv, source, limit, results2, "UTF-32LE");
2399 
2400     ucnv_close(cnv);
2401 }
2402 
2403 static void
TestLATIN1()2404 TestLATIN1() {
2405     /* test input */
2406     static const uint8_t in[]={
2407        0x61,
2408        0x31,
2409        0x32,
2410        0xc0,
2411        0xf0,
2412        0xf4,
2413     };
2414 
2415     /* expected test results */
2416     static const int32_t results[]={
2417         /* number of bytes read, code point */
2418         1, 0x61,
2419         1, 0x31,
2420         1, 0x32,
2421         1, 0xc0,
2422         1, 0xf0,
2423         1, 0xf4,
2424     };
2425     static const uint16_t in1[] = {
2426         0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
2427         0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
2428         0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
2429         0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
2430         0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
2431         0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
2432         0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
2433         0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
2434         0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
2435         0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
2436         0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
2437         0xcb, 0x82
2438     };
2439     static const uint8_t out1[] = {
2440         0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
2441         0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
2442         0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
2443         0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
2444         0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
2445         0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
2446         0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
2447         0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
2448         0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
2449         0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
2450         0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
2451         0xcb, 0x82
2452     };
2453     static const uint16_t in2[]={
2454         0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2455         0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2456         0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2457         0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
2458         0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
2459         0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
2460         0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
2461         0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
2462         0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
2463         0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
2464         0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
2465         0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
2466         0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
2467         0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
2468         0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2469         0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2470         0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
2471         0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
2472         0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2473         0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2474         0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
2475         0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
2476         0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
2477         0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
2478         0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
2479         0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
2480         0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
2481         0x37, 0x20, 0x2A, 0x2F,
2482     };
2483     static const unsigned char out2[]={
2484         0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2485         0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2486         0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2487         0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
2488         0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
2489         0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
2490         0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
2491         0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
2492         0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
2493         0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
2494         0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
2495         0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
2496         0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
2497         0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
2498         0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2499         0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2500         0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
2501         0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
2502         0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2503         0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2504         0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
2505         0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
2506         0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
2507         0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
2508         0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
2509         0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
2510         0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
2511         0x37, 0x20, 0x2A, 0x2F,
2512     };
2513     const char *source=(const char *)in;
2514     const char *limit=(const char *)in+sizeof(in);
2515 
2516     UErrorCode errorCode=U_ZERO_ERROR;
2517     UConverter *cnv=ucnv_open("LATIN_1", &errorCode);
2518     if(U_FAILURE(errorCode)) {
2519         log_data_err("Unable to open a LATIN_1 converter: %s\n", u_errorName(errorCode));
2520         return;
2521     }
2522     TestNextUChar(cnv, source, limit, results, "LATIN_1");
2523     /* Test the condition when source >= sourceLimit */
2524     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2525     TestConv((uint16_t*)in1,sizeof(in1)/2,"LATIN_1","LATIN-1",(char*)out1,sizeof(out1));
2526     TestConv((uint16_t*)in2,sizeof(in2)/2,"ASCII","ASCII",(char*)out2,sizeof(out2));
2527 
2528     ucnv_close(cnv);
2529 }
2530 
2531 static void
TestSBCS()2532 TestSBCS() {
2533     /* test input */
2534     static const uint8_t in[]={ 0x61, 0xc0, 0x80, 0xe0, 0xf0, 0xf4};
2535     /* expected test results */
2536     static const int32_t results[]={
2537         /* number of bytes read, code point */
2538         1, 0x61,
2539         1, 0xbf,
2540         1, 0xc4,
2541         1, 0x2021,
2542         1, 0xf8ff,
2543         1, 0x00d9
2544     };
2545 
2546     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2547     UErrorCode errorCode=U_ZERO_ERROR;
2548     UConverter *cnv=ucnv_open("x-mac-turkish", &errorCode);
2549     if(U_FAILURE(errorCode)) {
2550         log_data_err("Unable to open a SBCS(x-mac-turkish) converter: %s\n", u_errorName(errorCode));
2551         return;
2552     }
2553     TestNextUChar(cnv, source, limit, results, "SBCS(x-mac-turkish)");
2554     /* Test the condition when source >= sourceLimit */
2555     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2556     /*Test for Illegal character */ /*
2557     {
2558     static const uint8_t input1[]={ 0xA1 };
2559     const char* illegalsource=(const char*)input1;
2560     TestNextUCharError(cnv, illegalsource, illegalsource+sizeof(illegalsource), U_INVALID_CHAR_FOUND, "source has a illegal character");
2561     }
2562    */
2563     ucnv_close(cnv);
2564 }
2565 
2566 static void
TestDBCS()2567 TestDBCS() {
2568     /* test input */
2569     static const uint8_t in[]={
2570         0x44, 0x6a,
2571         0xc4, 0x9c,
2572         0x7a, 0x74,
2573         0x46, 0xab,
2574         0x42, 0x5b,
2575 
2576     };
2577 
2578     /* expected test results */
2579     static const int32_t results[]={
2580         /* number of bytes read, code point */
2581         2, 0x00a7,
2582         2, 0xe1d2,
2583         2, 0x6962,
2584         2, 0xf842,
2585         2, 0xffe5,
2586     };
2587 
2588     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2589     UErrorCode errorCode=U_ZERO_ERROR;
2590 
2591     UConverter *cnv=my_ucnv_open("@ibm9027", &errorCode);
2592     if(U_FAILURE(errorCode)) {
2593         log_data_err("Unable to open a DBCS(@ibm9027) converter: %s\n", u_errorName(errorCode));
2594         return;
2595     }
2596     TestNextUChar(cnv, source, limit, results, "DBCS(@ibm9027)");
2597     /* Test the condition when source >= sourceLimit */
2598     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2599     /*Test for the condition where there is an invalid character*/
2600     {
2601         static const uint8_t source2[]={0x1a, 0x1b};
2602         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character");
2603     }
2604     /*Test for the condition where we have a truncated char*/
2605     {
2606         static const uint8_t source1[]={0xc4};
2607         ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2608         TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2609     }
2610     ucnv_close(cnv);
2611 }
2612 
2613 static void
TestMBCS()2614 TestMBCS() {
2615     /* test input */
2616     static const uint8_t in[]={
2617         0x01,
2618         0xa6, 0xa3,
2619         0x00,
2620         0xa6, 0xa1,
2621         0x08,
2622         0xc2, 0x76,
2623         0xc2, 0x78,
2624 
2625     };
2626 
2627     /* expected test results */
2628     static const int32_t results[]={
2629         /* number of bytes read, code point */
2630         1, 0x0001,
2631         2, 0x250c,
2632         1, 0x0000,
2633         2, 0x2500,
2634         1, 0x0008,
2635         2, 0xd60c,
2636         2, 0xd60e,
2637     };
2638 
2639     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2640     UErrorCode errorCode=U_ZERO_ERROR;
2641 
2642     UConverter *cnv=ucnv_open("ibm-1363", &errorCode);
2643     if(U_FAILURE(errorCode)) {
2644         log_data_err("Unable to open a MBCS(ibm-1363) converter: %s\n", u_errorName(errorCode));
2645         return;
2646     }
2647     TestNextUChar(cnv, source, limit, results, "MBCS(ibm-1363)");
2648     /* Test the condition when source >= sourceLimit */
2649     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2650     /*Test for the condition where there is an invalid character*/
2651     {
2652         static const uint8_t source2[]={0xa1, 0x80};
2653         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character");
2654     }
2655     /*Test for the condition where we have a truncated char*/
2656     {
2657         static const uint8_t source1[]={0xc4};
2658         ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2659         TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2660     }
2661     ucnv_close(cnv);
2662 
2663 }
2664 
2665 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
2666 static void
TestICCRunout()2667 TestICCRunout() {
2668 /*    { "ibm-1363", :bin{ a2aea2 }, "\u00a1\u001a", :intvector{ 0, 2 }, :int{1}, :int{0}, "\", "?", :bin{""} } */
2669 
2670     const char *cnvName = "ibm-1363";
2671     UErrorCode status = U_ZERO_ERROR;
2672     const char sourceData[] = { (char)0xa2, (char)0xae, (char)0xa2 };
2673     /* UChar   expectUData[] = { 0x00a1, 0x001a }; */
2674     const char *source = sourceData;
2675     const char *sourceLim = sourceData+sizeof(sourceData);
2676     UChar c1, c2, c3;
2677     UConverter *cnv=ucnv_open(cnvName, &status);
2678     if(U_FAILURE(status)) {
2679         log_data_err("Unable to open %s converter: %s\n", cnvName, u_errorName(status));
2680 	return;
2681     }
2682 
2683 #if 0
2684     {
2685     UChar   targetBuf[256];
2686     UChar   *target = targetBuf;
2687     UChar   *targetLim = target+256;
2688     ucnv_toUnicode(cnv, &target, targetLim, &source, sourceLim, NULL, TRUE, &status);
2689 
2690     log_info("After convert: target@%d, source@%d, status%s\n",
2691 	     target-targetBuf, source-sourceData, u_errorName(status));
2692 
2693     if(U_FAILURE(status)) {
2694 	log_err("Failed to convert: %s\n", u_errorName(status));
2695     } else {
2696 
2697     }
2698     }
2699 #endif
2700 
2701     c1=ucnv_getNextUChar(cnv, &source, sourceLim, &status);
2702     log_verbose("c1: U+%04X, source@%d, status %s\n", c1, source-sourceData, u_errorName(status));
2703 
2704     c2=ucnv_getNextUChar(cnv, &source, sourceLim, &status);
2705     log_verbose("c2: U+%04X, source@%d, status %s\n", c2, source-sourceData, u_errorName(status));
2706 
2707     c3=ucnv_getNextUChar(cnv, &source, sourceLim, &status);
2708     log_verbose("c3: U+%04X, source@%d, status %s\n", c3, source-sourceData, u_errorName(status));
2709 
2710     if(status==U_INDEX_OUTOFBOUNDS_ERROR && c3==0xFFFF) {
2711 	log_verbose("OK\n");
2712     } else {
2713 	log_err("FAIL: c3 was not FFFF or err was not U_INDEXOUTOFBOUNDS_ERROR\n");
2714     }
2715 
2716     ucnv_close(cnv);
2717 
2718 }
2719 #endif
2720 
2721 #ifdef U_ENABLE_GENERIC_ISO_2022
2722 
2723 static void
TestISO_2022()2724 TestISO_2022() {
2725     /* test input */
2726     static const uint8_t in[]={
2727         0x1b, 0x25, 0x42,
2728         0x31,
2729         0x32,
2730         0x61,
2731         0xc2, 0x80,
2732         0xe0, 0xa0, 0x80,
2733         0xf0, 0x90, 0x80, 0x80
2734     };
2735 
2736 
2737 
2738     /* expected test results */
2739     static const int32_t results[]={
2740         /* number of bytes read, code point */
2741         4, 0x0031,  /* 4 bytes including the escape sequence */
2742         1, 0x0032,
2743         1, 0x61,
2744         2, 0x80,
2745         3, 0x800,
2746         4, 0x10000
2747     };
2748 
2749     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2750     UErrorCode errorCode=U_ZERO_ERROR;
2751     UConverter *cnv;
2752 
2753     cnv=ucnv_open("ISO_2022", &errorCode);
2754     if(U_FAILURE(errorCode)) {
2755         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
2756         return;
2757     }
2758     TestNextUChar(cnv, source, limit, results, "ISO_2022");
2759 
2760     /* Test the condition when source >= sourceLimit */
2761     TestNextUCharError(cnv, source, source-1, U_ILLEGAL_ARGUMENT_ERROR, "sourceLimit < source");
2762     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2763     /*Test for the condition where we have a truncated char*/
2764     {
2765         static const uint8_t source1[]={0xc4};
2766         ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2767         TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2768     }
2769     /*Test for the condition where there is an invalid character*/
2770     {
2771         static const uint8_t source2[]={0xa1, 0x01};
2772         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_CHAR_FOUND, "an invalid character");
2773     }
2774     ucnv_close(cnv);
2775 }
2776 
2777 #endif
2778 
2779 static void
TestSmallTargetBuffer(const uint16_t * source,const UChar * sourceLimit,UConverter * cnv)2780 TestSmallTargetBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2781     const UChar* uSource;
2782     const UChar* uSourceLimit;
2783     const char* cSource;
2784     const char* cSourceLimit;
2785     UChar *uTargetLimit =NULL;
2786     UChar *uTarget;
2787     char *cTarget;
2788     const char *cTargetLimit;
2789     char *cBuf;
2790     UChar *uBuf; /*,*test;*/
2791     int32_t uBufSize = 120;
2792     int len=0;
2793     int i=2;
2794     UErrorCode errorCode=U_ZERO_ERROR;
2795     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2796     cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
2797     ucnv_reset(cnv);
2798     for(;--i>0; ){
2799         uSource = (UChar*) source;
2800         uSourceLimit=(const UChar*)sourceLimit;
2801         cTarget = cBuf;
2802         uTarget = uBuf;
2803         cSource = cBuf;
2804         cTargetLimit = cBuf;
2805         uTargetLimit = uBuf;
2806 
2807         do{
2808 
2809             cTargetLimit = cTargetLimit+ i;
2810             ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode);
2811             if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2812                errorCode=U_ZERO_ERROR;
2813                 continue;
2814             }
2815 
2816             if(U_FAILURE(errorCode)){
2817                 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2818                 return;
2819             }
2820 
2821         }while (uSource<uSourceLimit);
2822 
2823         cSourceLimit =cTarget;
2824         do{
2825             uTargetLimit=uTargetLimit+i;
2826             ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode);
2827             if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2828                errorCode=U_ZERO_ERROR;
2829                 continue;
2830             }
2831             if(U_FAILURE(errorCode)){
2832                    log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2833                     return;
2834             }
2835         }while(cSource<cSourceLimit);
2836 
2837         uSource = source;
2838         /*test =uBuf;*/
2839         for(len=0;len<(int)(source - sourceLimit);len++){
2840             if(uBuf[len]!=uSource[len]){
2841                 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ;
2842             }
2843         }
2844     }
2845     free(uBuf);
2846     free(cBuf);
2847 }
2848 /* Test for Jitterbug 778 */
TestToAndFromUChars(const uint16_t * source,const UChar * sourceLimit,UConverter * cnv)2849 static void TestToAndFromUChars(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2850     const UChar* uSource;
2851     const UChar* uSourceLimit;
2852     const char* cSource;
2853     UChar *uTargetLimit =NULL;
2854     UChar *uTarget;
2855     char *cTarget;
2856     const char *cTargetLimit;
2857     char *cBuf;
2858     UChar *uBuf,*test;
2859     int32_t uBufSize = 120;
2860     int numCharsInTarget=0;
2861     UErrorCode errorCode=U_ZERO_ERROR;
2862     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2863     cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
2864     uSource = source;
2865     uSourceLimit=sourceLimit;
2866     cTarget = cBuf;
2867     cTargetLimit = cBuf +uBufSize*5;
2868     uTarget = uBuf;
2869     uTargetLimit = uBuf+ uBufSize*5;
2870     ucnv_reset(cnv);
2871     numCharsInTarget=ucnv_fromUChars(cnv, cTarget, (int32_t)(cTargetLimit-cTarget), uSource, (int32_t)(uSourceLimit-uSource), &errorCode);
2872     if(U_FAILURE(errorCode)){
2873         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
2874         return;
2875     }
2876     cSource = cBuf;
2877     test =uBuf;
2878     ucnv_toUChars(cnv,uTarget,(int32_t)(uTargetLimit-uTarget),cSource,numCharsInTarget,&errorCode);
2879     if(U_FAILURE(errorCode)){
2880         log_err("ucnv_toUChars conversion failed, reason %s\n", u_errorName(errorCode));
2881         return;
2882     }
2883     uSource = source;
2884     while(uSource<uSourceLimit){
2885         if(*test!=*uSource){
2886 
2887             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
2888         }
2889         uSource++;
2890         test++;
2891     }
2892     free(uBuf);
2893     free(cBuf);
2894 }
2895 
TestSmallSourceBuffer(const uint16_t * source,const UChar * sourceLimit,UConverter * cnv)2896 static void TestSmallSourceBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2897     const UChar* uSource;
2898     const UChar* uSourceLimit;
2899     const char* cSource;
2900     const char* cSourceLimit;
2901     UChar *uTargetLimit =NULL;
2902     UChar *uTarget;
2903     char *cTarget;
2904     const char *cTargetLimit;
2905     char *cBuf;
2906     UChar *uBuf; /*,*test;*/
2907     int32_t uBufSize = 120;
2908     int len=0;
2909     int i=2;
2910     const UChar *temp = sourceLimit;
2911     UErrorCode errorCode=U_ZERO_ERROR;
2912     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2913     cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
2914 
2915     ucnv_reset(cnv);
2916     for(;--i>0;){
2917         uSource = (UChar*) source;
2918         cTarget = cBuf;
2919         uTarget = uBuf;
2920         cSource = cBuf;
2921         cTargetLimit = cBuf;
2922         uTargetLimit = uBuf+uBufSize*5;
2923         cTargetLimit = cTargetLimit+uBufSize*10;
2924         uSourceLimit=uSource;
2925         do{
2926 
2927             if (uSourceLimit < sourceLimit) {
2928                 uSourceLimit = uSourceLimit+1;
2929             }
2930             ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode);
2931             if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2932                errorCode=U_ZERO_ERROR;
2933                 continue;
2934             }
2935 
2936             if(U_FAILURE(errorCode)){
2937                 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2938                 return;
2939             }
2940 
2941         }while (uSource<temp);
2942 
2943         cSourceLimit =cBuf;
2944         do{
2945             if (cSourceLimit < cBuf + (cTarget - cBuf)) {
2946                 cSourceLimit = cSourceLimit+1;
2947             }
2948             ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode);
2949             if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2950                errorCode=U_ZERO_ERROR;
2951                 continue;
2952             }
2953             if(U_FAILURE(errorCode)){
2954                    log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2955                     return;
2956             }
2957         }while(cSource<cTarget);
2958 
2959         uSource = source;
2960         /*test =uBuf;*/
2961         for(;len<(int)(source - sourceLimit);len++){
2962             if(uBuf[len]!=uSource[len]){
2963                 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ;
2964             }
2965         }
2966     }
2967     free(uBuf);
2968     free(cBuf);
2969 }
2970 static void
TestGetNextUChar2022(UConverter * cnv,const char * source,const char * limit,const uint16_t results[],const char * message)2971 TestGetNextUChar2022(UConverter* cnv, const char* source, const char* limit,
2972                      const uint16_t results[], const char* message){
2973 /*     const char* s0; */
2974      const char* s=(char*)source;
2975      const uint16_t *r=results;
2976      UErrorCode errorCode=U_ZERO_ERROR;
2977      uint32_t c,exC;
2978      ucnv_reset(cnv);
2979      while(s<limit) {
2980 	 /* s0=s; */
2981         c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
2982         if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
2983             break; /* no more significant input */
2984         } else if(U_FAILURE(errorCode)) {
2985             log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode));
2986             break;
2987         } else {
2988             if(U16_IS_LEAD(*r)){
2989                 int i =0, len = 2;
2990                 U16_NEXT(r, i, len, exC);
2991                 r++;
2992             }else{
2993                 exC = *r;
2994             }
2995             if(c!=(uint32_t)(exC))
2996                 log_err("%s ucnv_getNextUChar() Expected:  \\u%04X Got:  \\u%04X \n",message,(uint32_t) (*r),c);
2997         }
2998         r++;
2999     }
3000 }
3001 
TestJitterbug930(const char * enc)3002 static int TestJitterbug930(const char* enc){
3003     UErrorCode err = U_ZERO_ERROR;
3004     UConverter*converter;
3005     char out[80];
3006     char*target = out;
3007     UChar in[4];
3008     const UChar*source = in;
3009     int32_t off[80];
3010     int32_t* offsets = off;
3011     int numOffWritten=0;
3012     UBool flush = 0;
3013     converter = my_ucnv_open(enc, &err);
3014 
3015     in[0] = 0x41;     /* 0x4E00;*/
3016     in[1] = 0x4E01;
3017     in[2] = 0x4E02;
3018     in[3] = 0x4E03;
3019 
3020     memset(off, '*', sizeof(off));
3021 
3022     ucnv_fromUnicode (converter,
3023             &target,
3024             target+2,
3025             &source,
3026             source+3,
3027             offsets,
3028             flush,
3029             &err);
3030 
3031         /* writes three bytes into the output buffer: 41 1B 24
3032         * but offsets contains 0 1 1
3033     */
3034     while(*offsets< off[10]){
3035         numOffWritten++;
3036         offsets++;
3037     }
3038     log_verbose("Testing Jitterbug 930 for encoding %s",enc);
3039     if(numOffWritten!= (int)(target-out)){
3040         log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc, (int)(target-out),numOffWritten);
3041     }
3042 
3043     err = U_ZERO_ERROR;
3044 
3045     memset(off,'*' , sizeof(off));
3046 
3047     flush = 1;
3048     offsets=off;
3049     ucnv_fromUnicode (converter,
3050             &target,
3051             target+4,
3052             &source,
3053             source,
3054             offsets,
3055             flush,
3056             &err);
3057     numOffWritten=0;
3058     while(*offsets< off[10]){
3059         numOffWritten++;
3060         if(*offsets!= -1){
3061             log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc,-1,*offsets) ;
3062         }
3063         offsets++;
3064     }
3065 
3066     /* writes 42 43 7A into output buffer,
3067      * offsets contains -1 -1 -1
3068      */
3069     ucnv_close(converter);
3070     return 0;
3071 }
3072 
3073 static void
TestHZ()3074 TestHZ() {
3075     /* test input */
3076     static const uint16_t in[]={
3077             0x3000, 0x3001, 0x3002, 0x00B7, 0x02C9, 0x02C7, 0x00A8, 0x3003, 0x3005, 0x2014,
3078             0xFF5E, 0x2016, 0x2026, 0x007E, 0x997C, 0x70B3, 0x75C5, 0x5E76, 0x73BB, 0x83E0,
3079             0x64AD, 0x62E8, 0x94B5, 0x000A, 0x6CE2, 0x535A, 0x52C3, 0x640F, 0x94C2, 0x7B94,
3080             0x4F2F, 0x5E1B, 0x8236, 0x000A, 0x8116, 0x818A, 0x6E24, 0x6CCA, 0x9A73, 0x6355,
3081             0x535C, 0x54FA, 0x8865, 0x000A, 0x57E0, 0x4E0D, 0x5E03, 0x6B65, 0x7C3F, 0x90E8,
3082             0x6016, 0x248F, 0x2490, 0x000A, 0x2491, 0x2492, 0x2493, 0x2494, 0x2495, 0x2496,
3083             0x2497, 0x2498, 0x2499, 0x000A, 0x249A, 0x249B, 0x2474, 0x2475, 0x2476, 0x2477,
3084             0x2478, 0x2479, 0x247A, 0x000A, 0x247B, 0x247C, 0x247D, 0x247E, 0x247F, 0x2480,
3085             0x2481, 0x2482, 0x2483, 0x000A, 0x0041, 0x0043, 0x0044, 0x0045, 0x0046, 0x007E,
3086             0x0048, 0x0049, 0x004A, 0x000A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050,
3087             0x0051, 0x0052, 0x0053, 0x000A, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059,
3088             0x005A, 0x005B, 0x005C, 0x000A
3089       };
3090     const UChar* uSource;
3091     const UChar* uSourceLimit;
3092     const char* cSource;
3093     const char* cSourceLimit;
3094     UChar *uTargetLimit =NULL;
3095     UChar *uTarget;
3096     char *cTarget;
3097     const char *cTargetLimit;
3098     char *cBuf = NULL;
3099     UChar *uBuf = NULL;
3100     UChar *test;
3101     int32_t uBufSize = 120;
3102     UErrorCode errorCode=U_ZERO_ERROR;
3103     UConverter *cnv = NULL;
3104     int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3105     int32_t* myOff= offsets;
3106     cnv=ucnv_open("HZ", &errorCode);
3107     if(U_FAILURE(errorCode)) {
3108         log_data_err("Unable to open HZ converter: %s\n", u_errorName(errorCode));
3109         goto cleanup;
3110     }
3111 
3112     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3113     cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3114     uSource = (const UChar*)in;
3115     uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
3116     cTarget = cBuf;
3117     cTargetLimit = cBuf +uBufSize*5;
3118     uTarget = uBuf;
3119     uTargetLimit = uBuf+ uBufSize*5;
3120     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3121     if(U_FAILURE(errorCode)){
3122         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3123         goto cleanup;
3124     }
3125     cSource = cBuf;
3126     cSourceLimit =cTarget;
3127     test =uBuf;
3128     myOff=offsets;
3129     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3130     if(U_FAILURE(errorCode)){
3131         log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3132         goto cleanup;
3133     }
3134     uSource = (const UChar*)in;
3135     while(uSource<uSourceLimit){
3136         if(*test!=*uSource){
3137 
3138             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3139         }
3140         uSource++;
3141         test++;
3142     }
3143     TestGetNextUChar2022(cnv, cBuf, cTarget, in, "HZ encoding");
3144     TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3145     TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3146     TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3147     TestJitterbug930("csISO2022JP");
3148 
3149 cleanup:
3150     ucnv_close(cnv);
3151     free(offsets);
3152     free(uBuf);
3153     free(cBuf);
3154 }
3155 
3156 static void
TestISCII()3157 TestISCII(){
3158         /* test input */
3159     static const uint16_t in[]={
3160         /* test full range of Devanagari */
3161         0x0901,0x0902,0x0903,0x0905,0x0906,0x0907,0x0908,0x0909,0x090A,
3162         0x090B,0x090E,0x090F,0x0910,0x090D,0x0912,0x0913,0x0914,0x0911,
3163         0x0915,0x0916,0x0917,0x0918,0x0919,0x091A,0x091B,0x091C,0x091D,
3164         0x091E,0x091F,0x0920,0x0921,0x0922,0x0923,0x0924,0x0925,0x0926,
3165         0x0927,0x0928,0x0929,0x092A,0x092B,0x092C,0x092D,0x092E,0x092F,
3166         0x095F,0x0930,0x0931,0x0932,0x0933,0x0934,0x0935,0x0936,0x0937,
3167         0x0938,0x0939,0x200D,0x093E,0x093F,0x0940,0x0941,0x0942,0x0943,
3168         0x0946,0x0947,0x0948,0x0945,0x094A,0x094B,0x094C,0x0949,0x094D,
3169         0x093d,0x0966,0x0967,0x0968,0x0969,0x096A,0x096B,0x096C,
3170         0x096D,0x096E,0x096F,
3171         /* test Soft halant*/
3172         0x0915,0x094d, 0x200D,
3173         /* test explicit halant */
3174         0x0915,0x094d, 0x200c,
3175         /* test double danda */
3176         0x965,
3177         /* test ASCII */
3178         0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
3179         0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
3180         0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
3181         /* tests from Lotus */
3182         0x0061,0x0915,0x000D,0x000A,0x0996,0x0043,
3183         0x0930,0x094D,0x200D,
3184         0x0901,0x000D,0x000A,0x0905,0x0985,0x0043,
3185         0x0915,0x0921,0x002B,0x095F,
3186         /* tamil range */
3187         0x0B86, 0xB87, 0xB88,
3188         /* telugu range */
3189         0x0C05, 0x0C02, 0x0C03,0x0c31,
3190         /* kannada range */
3191         0x0C85, 0xC82, 0x0C83,
3192         /* test Abbr sign and Anudatta */
3193         0x0970, 0x952,
3194        /* 0x0958,
3195         0x0959,
3196         0x095A,
3197         0x095B,
3198         0x095C,
3199         0x095D,
3200         0x095E,
3201         0x095F,*/
3202         0x0960 /* Vocallic RRI 0xAB, 0xE9*/,
3203         0x0944 /* Vowel Sign Vocallic RRI 0xDF, 0xE9 */,
3204         0x090C ,
3205         0x0962,
3206         0x0961 /* Vocallic LL 0xa6, 0xE9 */,
3207         0x0963 /* Vowel Sign Vocallic LL 0xdb, 0xE9, */,
3208         0x0950 /* OM Symbol 0xa1, 0xE9,*/,
3209         0x093D /* Avagraha  0xEA, 0xE9*/,
3210         0x0958,
3211         0x0959,
3212         0x095A,
3213         0x095B,
3214         0x095C,
3215         0x095D,
3216         0x095E,
3217         0x0020, 0x094D, 0x0930, 0x0000, 0x00A0
3218       };
3219     static const unsigned char byteArr[]={
3220 
3221         0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,
3222         0xaa,0xab,0xac,0xad,0xae,0xaf,0xb0,0xb1,0xb2,
3223         0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,
3224         0xbc,0xbd,0xbe,0xbf,0xc0,0xc1,0xc2,0xc3,0xc4,
3225         0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,
3226         0xce,0xcf,0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,
3227         0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,
3228         0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,
3229         0xea,0xe9,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,
3230         0xf8,0xf9,0xfa,
3231         /* test soft halant */
3232         0xb3, 0xE8, 0xE9,
3233         /* test explicit halant */
3234         0xb3, 0xE8, 0xE8,
3235         /* test double danda */
3236         0xea, 0xea,
3237         /* test ASCII */
3238         0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
3239         0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
3240         0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
3241         /* test ATR code */
3242 
3243         /* tests from Lotus */
3244         0x61,0xEF,0x42,0xEF,0x30,0xB3,0x0D,0x0A,0xEF,0x43,0xB4,0x43,
3245         0xEF,0x42,0xCF,0xE8,0xD9,
3246         0xEF,0x42,0xA1,0x0D,0x0A,0xEF,0x42,0xA4,0xEF,0x43,0xA4,0x43,
3247         0xEF,0x42,0xB3,0xBF,0x2B,0xEF,0x42,0xCE,
3248         /* tamil range */
3249         0xEF, 0x44, 0xa5, 0xa6, 0xa7,
3250         /* telugu range */
3251         0xEF, 0x45,0xa4, 0xa2, 0xa3,0xd0,
3252         /* kannada range */
3253         0xEF, 0x48,0xa4, 0xa2, 0xa3,
3254         /* anudatta and abbreviation sign */
3255         0xEF, 0x42, 0xF0, 0xBF, 0xF0, 0xB8,
3256 
3257 
3258         0xAA, 0xE9,/* RI + NUKTA 0x0960*/
3259 
3260         0xDF, 0xE9,/* Vowel sign RI + NUKTA 0x0944*/
3261 
3262         0xa6, 0xE9,/* Vowel I + NUKTA 0x090C*/
3263 
3264         0xdb, 0xE9,/* Vowel sign I + Nukta 0x0962*/
3265 
3266         0xa7, 0xE9,/* Vowel II + NUKTA 0x0961*/
3267 
3268         0xdc, 0xE9,/* Vowel sign II + Nukta 0x0963*/
3269 
3270         0xa1, 0xE9,/* chandrabindu + Nukta 0x0950*/
3271 
3272         0xEA, 0xE9, /* Danda + Nukta 0x093D*/
3273 
3274         0xB3, 0xE9, /* Ka + NUKTA */
3275 
3276         0xB4, 0xE9, /* Kha + NUKTA */
3277 
3278         0xB5, 0xE9, /* Ga + NUKTA */
3279 
3280         0xBA, 0xE9,
3281 
3282         0xBF, 0xE9,
3283 
3284         0xC0, 0xE9,
3285 
3286         0xC9, 0xE9,
3287         /* INV halant RA    */
3288         0xD9, 0xE8, 0xCF,
3289         0x00, 0x00A0,
3290         /* just consume unhandled codepoints */
3291         0xEF, 0x30,
3292 
3293     };
3294     testConvertToU(byteArr,(sizeof(byteArr)),in,UPRV_LENGTHOF(in),"x-iscii-de",NULL,TRUE);
3295     TestConv(in,(sizeof(in)/2),"ISCII,version=0","hindi", (char *)byteArr,sizeof(byteArr));
3296 
3297 }
3298 
3299 static void
TestISO_2022_JP()3300 TestISO_2022_JP() {
3301     /* test input */
3302     static const uint16_t in[]={
3303         0x0041,/*0x00E9,*/0x3000, 0x3001, 0x3002, 0x0020, 0x000D, 0x000A,
3304         0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3305         0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3306         0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
3307         0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
3308         0x201D, 0x3014, 0x000D, 0x000A,
3309         0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3310         0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3311         };
3312     const UChar* uSource;
3313     const UChar* uSourceLimit;
3314     const char* cSource;
3315     const char* cSourceLimit;
3316     UChar *uTargetLimit =NULL;
3317     UChar *uTarget;
3318     char *cTarget;
3319     const char *cTargetLimit;
3320     char *cBuf = NULL;
3321     UChar *uBuf = NULL;
3322     UChar *test;
3323     int32_t uBufSize = 120;
3324     UErrorCode errorCode=U_ZERO_ERROR;
3325     UConverter *cnv = NULL;
3326     int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3327     int32_t* myOff= offsets;
3328     cnv=ucnv_open("ISO_2022_JP_1", &errorCode);
3329     if(U_FAILURE(errorCode)) {
3330         log_data_err("Unable to open an ISO_2022_JP_1 converter: %s\n", u_errorName(errorCode));
3331         goto cleanup;
3332     }
3333 
3334     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3335     cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3336     uSource = (const UChar*)in;
3337     uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
3338     cTarget = cBuf;
3339     cTargetLimit = cBuf +uBufSize*5;
3340     uTarget = uBuf;
3341     uTargetLimit = uBuf+ uBufSize*5;
3342     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3343     if(U_FAILURE(errorCode)){
3344         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3345         goto cleanup;
3346     }
3347     cSource = cBuf;
3348     cSourceLimit =cTarget;
3349     test =uBuf;
3350     myOff=offsets;
3351     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3352     if(U_FAILURE(errorCode)){
3353         log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3354         goto cleanup;
3355     }
3356 
3357     uSource = (const UChar*)in;
3358     while(uSource<uSourceLimit){
3359         if(*test!=*uSource){
3360 
3361             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3362         }
3363         uSource++;
3364         test++;
3365     }
3366 
3367     TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3368     TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3369     TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-JP encoding");
3370     TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3371     TestJitterbug930("csISO2022JP");
3372 
3373 cleanup:
3374     ucnv_close(cnv);
3375     free(uBuf);
3376     free(cBuf);
3377     free(offsets);
3378 }
3379 
TestConv(const uint16_t in[],int len,const char * conv,const char * lang,char byteArr[],int byteArrLen)3380 static void TestConv(const uint16_t in[],int len, const char* conv, const char* lang, char byteArr[],int byteArrLen){
3381     const UChar* uSource;
3382     const UChar* uSourceLimit;
3383     const char* cSource;
3384     const char* cSourceLimit;
3385     UChar *uTargetLimit =NULL;
3386     UChar *uTarget;
3387     char *cTarget;
3388     const char *cTargetLimit;
3389     char *cBuf;
3390     UChar *uBuf,*test;
3391     int32_t uBufSize = 120*10;
3392     UErrorCode errorCode=U_ZERO_ERROR;
3393     UConverter *cnv;
3394     int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) );
3395     int32_t* myOff= offsets;
3396     cnv=my_ucnv_open(conv, &errorCode);
3397     if(U_FAILURE(errorCode)) {
3398         log_data_err("Unable to open a %s converter: %s\n", conv, u_errorName(errorCode));
3399         return;
3400     }
3401 
3402     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar));
3403     cBuf =(char*)malloc(uBufSize * sizeof(char));
3404     uSource = (const UChar*)in;
3405     uSourceLimit=uSource+len;
3406     cTarget = cBuf;
3407     cTargetLimit = cBuf +uBufSize;
3408     uTarget = uBuf;
3409     uTargetLimit = uBuf+ uBufSize;
3410     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3411     if(U_FAILURE(errorCode)){
3412         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3413         return;
3414     }
3415     /*log_verbose("length of compressed string for language %s using %s:%i \n",conv,lang,(cTarget-cBuf));*/
3416     cSource = cBuf;
3417     cSourceLimit =cTarget;
3418     test =uBuf;
3419     myOff=offsets;
3420     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3421     if(U_FAILURE(errorCode)){
3422         log_err("ucnv_toUnicode conversion failed, reason: %s\n", u_errorName(errorCode));
3423         return;
3424     }
3425 
3426     uSource = (const UChar*)in;
3427     while(uSource<uSourceLimit){
3428         if(*test!=*uSource){
3429             log_err("for codepage %s : Expected : \\u%04X \t Got: \\u%04X\n",conv,*uSource,(int)*test) ;
3430         }
3431         uSource++;
3432         test++;
3433     }
3434     TestSmallTargetBuffer(in,(const UChar*)&in[len],cnv);
3435     TestSmallSourceBuffer(in,(const UChar*)&in[len],cnv);
3436     TestGetNextUChar2022(cnv, cBuf, cTarget, in, conv);
3437     if(byteArr && byteArrLen!=0){
3438         TestGetNextUChar2022(cnv, byteArr, (byteArr+byteArrLen), in, lang);
3439         TestToAndFromUChars(in,(const UChar*)&in[len],cnv);
3440         {
3441             cSource = byteArr;
3442             cSourceLimit = cSource+byteArrLen;
3443             test=uBuf;
3444             myOff = offsets;
3445             ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3446             if(U_FAILURE(errorCode)){
3447                 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3448                 return;
3449             }
3450 
3451             uSource = (const UChar*)in;
3452             while(uSource<uSourceLimit){
3453                 if(*test!=*uSource){
3454                     log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3455                 }
3456                 uSource++;
3457                 test++;
3458             }
3459         }
3460     }
3461 
3462     ucnv_close(cnv);
3463     free(uBuf);
3464     free(cBuf);
3465     free(offsets);
3466 }
3467 static UChar U_CALLCONV
_charAt(int32_t offset,void * context)3468 _charAt(int32_t offset, void *context) {
3469     return ((char*)context)[offset];
3470 }
3471 
3472 static int32_t
unescape(UChar * dst,int32_t dstLen,const char * src,int32_t srcLen,UErrorCode * status)3473 unescape(UChar* dst, int32_t dstLen,const char* src,int32_t srcLen,UErrorCode *status){
3474     int32_t srcIndex=0;
3475     int32_t dstIndex=0;
3476     if(U_FAILURE(*status)){
3477         return 0;
3478     }
3479     if((dst==NULL && dstLen>0) || (src==NULL ) || dstLen < -1 || srcLen <-1 ){
3480         *status = U_ILLEGAL_ARGUMENT_ERROR;
3481         return 0;
3482     }
3483     if(srcLen==-1){
3484         srcLen = (int32_t)uprv_strlen(src);
3485     }
3486 
3487     for (; srcIndex<srcLen; ) {
3488         UChar32 c = src[srcIndex++];
3489         if (c == 0x005C /*'\\'*/) {
3490             c = u_unescapeAt(_charAt,&srcIndex,srcLen,(void*)src); /* advances i*/
3491             if (c == (UChar32)0xFFFFFFFF) {
3492                 *status=U_INVALID_CHAR_FOUND; /* return empty string */
3493                 break; /* invalid escape sequence */
3494             }
3495         }
3496         if(dstIndex < dstLen){
3497             if(c>0xFFFF){
3498                dst[dstIndex++] = U16_LEAD(c);
3499                if(dstIndex<dstLen){
3500                     dst[dstIndex]=U16_TRAIL(c);
3501                }else{
3502                    *status=U_BUFFER_OVERFLOW_ERROR;
3503                }
3504             }else{
3505                 dst[dstIndex]=(UChar)c;
3506             }
3507 
3508         }else{
3509             *status = U_BUFFER_OVERFLOW_ERROR;
3510         }
3511         dstIndex++; /* for preflighting */
3512     }
3513     return dstIndex;
3514 }
3515 
3516 static void
TestFullRoundtrip(const char * cp)3517 TestFullRoundtrip(const char* cp){
3518     UChar usource[10] ={0};
3519     UChar nsrc[10] = {0};
3520     uint32_t i=1;
3521     int len=0, ulen;
3522     nsrc[0]=0x0061;
3523     /* Test codepoint 0 */
3524     TestConv(usource,1,cp,"",NULL,0);
3525     TestConv(usource,2,cp,"",NULL,0);
3526     nsrc[2]=0x5555;
3527     TestConv(nsrc,3,cp,"",NULL,0);
3528 
3529     for(;i<=0x10FFFF;i++){
3530         if(i==0xD800){
3531             i=0xDFFF;
3532             continue;
3533         }
3534         if(i<=0xFFFF){
3535             usource[0] =(UChar) i;
3536             len=1;
3537         }else{
3538             usource[0]=U16_LEAD(i);
3539             usource[1]=U16_TRAIL(i);
3540             len=2;
3541         }
3542         ulen=len;
3543         if(i==0x80) {
3544             usource[2]=0;
3545         }
3546         /* Test only single code points */
3547         TestConv(usource,ulen,cp,"",NULL,0);
3548         /* Test codepoint repeated twice */
3549         usource[ulen]=usource[0];
3550         usource[ulen+1]=usource[1];
3551         ulen+=len;
3552         TestConv(usource,ulen,cp,"",NULL,0);
3553         /* Test codepoint repeated 3 times */
3554         usource[ulen]=usource[0];
3555         usource[ulen+1]=usource[1];
3556         ulen+=len;
3557         TestConv(usource,ulen,cp,"",NULL,0);
3558         /* Test codepoint in between 2 codepoints */
3559         nsrc[1]=usource[0];
3560         nsrc[2]=usource[1];
3561         nsrc[len+1]=0x5555;
3562         TestConv(nsrc,len+2,cp,"",NULL,0);
3563         uprv_memset(usource,0,sizeof(UChar)*10);
3564     }
3565 }
3566 
3567 static void
TestRoundTrippingAllUTF(void)3568 TestRoundTrippingAllUTF(void){
3569     if(!getTestOption(QUICK_OPTION)){
3570         log_verbose("Running exhaustive round trip test for BOCU-1\n");
3571         TestFullRoundtrip("BOCU-1");
3572         log_verbose("Running exhaustive round trip test for SCSU\n");
3573         TestFullRoundtrip("SCSU");
3574         log_verbose("Running exhaustive round trip test for UTF-8\n");
3575         TestFullRoundtrip("UTF-8");
3576         log_verbose("Running exhaustive round trip test for CESU-8\n");
3577         TestFullRoundtrip("CESU-8");
3578         log_verbose("Running exhaustive round trip test for UTF-16BE\n");
3579         TestFullRoundtrip("UTF-16BE");
3580         log_verbose("Running exhaustive round trip test for UTF-16LE\n");
3581         TestFullRoundtrip("UTF-16LE");
3582         log_verbose("Running exhaustive round trip test for UTF-16\n");
3583         TestFullRoundtrip("UTF-16");
3584         log_verbose("Running exhaustive round trip test for UTF-32BE\n");
3585         TestFullRoundtrip("UTF-32BE");
3586         log_verbose("Running exhaustive round trip test for UTF-32LE\n");
3587         TestFullRoundtrip("UTF-32LE");
3588         log_verbose("Running exhaustive round trip test for UTF-32\n");
3589         TestFullRoundtrip("UTF-32");
3590         log_verbose("Running exhaustive round trip test for UTF-7\n");
3591         TestFullRoundtrip("UTF-7");
3592         log_verbose("Running exhaustive round trip test for UTF-7\n");
3593         TestFullRoundtrip("UTF-7,version=1");
3594         log_verbose("Running exhaustive round trip test for IMAP-mailbox-name\n");
3595         TestFullRoundtrip("IMAP-mailbox-name");
3596         /*
3597          *
3598          * With the update to GB18030 2005 (Ticket #8274), this test will fail because the 2005 version of
3599          * GB18030 contains mappings to actual Unicode codepoints (which were previously mapped to PUA).
3600          * The old mappings remain as fallbacks.
3601          * This test may be reintroduced at a later time.
3602          *
3603          * 110118 - mow
3604          */
3605          /*
3606          log_verbose("Running exhaustive round trip test for GB18030\n");
3607          TestFullRoundtrip("GB18030");
3608          */
3609     }
3610 }
3611 
3612 static void
TestSCSU()3613 TestSCSU() {
3614 
3615     static const uint16_t germanUTF16[]={
3616         0x00d6, 0x006c, 0x0020, 0x0066, 0x006c, 0x0069, 0x0065, 0x00df, 0x0074
3617     };
3618 
3619     static const uint8_t germanSCSU[]={
3620         0xd6, 0x6c, 0x20, 0x66, 0x6c, 0x69, 0x65, 0xdf, 0x74
3621     };
3622 
3623     static const uint16_t russianUTF16[]={
3624         0x041c, 0x043e, 0x0441, 0x043a, 0x0432, 0x0430
3625     };
3626 
3627     static const uint8_t russianSCSU[]={
3628         0x12, 0x9c, 0xbe, 0xc1, 0xba, 0xb2, 0xb0
3629     };
3630 
3631     static const uint16_t japaneseUTF16[]={
3632         0x3000, 0x266a, 0x30ea, 0x30f3, 0x30b4, 0x53ef, 0x611b,
3633         0x3044, 0x3084, 0x53ef, 0x611b, 0x3044, 0x3084, 0x30ea, 0x30f3,
3634         0x30b4, 0x3002, 0x534a, 0x4e16, 0x7d00, 0x3082, 0x524d, 0x306b,
3635         0x6d41, 0x884c, 0x3057, 0x305f, 0x300c, 0x30ea, 0x30f3, 0x30b4,
3636         0x306e, 0x6b4c, 0x300d, 0x304c, 0x3074, 0x3063, 0x305f, 0x308a,
3637         0x3059, 0x308b, 0x304b, 0x3082, 0x3057, 0x308c, 0x306a, 0x3044,
3638         0x3002, 0x7c73, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x30b3, 0x30f3,
3639         0x30d4, 0x30e5, 0x30fc, 0x30bf, 0x793e, 0x306e, 0x30d1, 0x30bd,
3640         0x30b3, 0x30f3, 0x300c, 0x30de, 0x30c3, 0x30af, 0xff08, 0x30de,
3641         0x30c3, 0x30ad, 0x30f3, 0x30c8, 0x30c3, 0x30b7, 0x30e5, 0xff09,
3642         0x300d, 0x3092, 0x3001, 0x3053, 0x3088, 0x306a, 0x304f, 0x611b,
3643         0x3059, 0x308b, 0x4eba, 0x305f, 0x3061, 0x306e, 0x3053, 0x3068,
3644         0x3060, 0x3002, 0x300c, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x4fe1,
3645         0x8005, 0x300d, 0x306a, 0x3093, 0x3066, 0x8a00, 0x3044, 0x65b9,
3646         0x307e, 0x3067, 0x3042, 0x308b, 0x3002
3647     };
3648 
3649     /* SCSUEncoder produces a slightly longer result (179B vs. 178B) because of one different choice:
3650      it uses an SQn once where a longer look-ahead could have shown that SCn is more efficient */
3651     static const uint8_t japaneseSCSU[]={
3652         0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
3653         0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
3654         0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
3655         0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
3656         0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
3657         0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
3658         0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
3659         0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
3660         0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
3661         0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
3662         0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
3663         0xcb, 0x82
3664     };
3665 
3666     static const uint16_t allFeaturesUTF16[]={
3667         0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 0x01df, 0xf000, 0xdbff,
3668         0xdfff, 0x000d, 0x000a, 0x0041, 0x00df, 0x0401, 0x015f, 0x00df,
3669         0x01df, 0xf000, 0xdbff, 0xdfff
3670     };
3671 
3672     /* see comment at japaneseSCSU: the same kind of different choice yields a slightly shorter
3673      * result here (34B vs. 35B)
3674      */
3675     static const uint8_t allFeaturesSCSU[]={
3676         0x41, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x1b, 0x03,
3677         0xdf, 0x1c, 0x88, 0x80, 0x0b, 0xbf, 0xff, 0xff, 0x0d, 0x0a,
3678         0x41, 0x10, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x13,
3679         0xdf, 0x14, 0x80, 0x15, 0xff
3680     };
3681     static const uint16_t monkeyIn[]={
3682         0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
3683         0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
3684         0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
3685         0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
3686         0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
3687         0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
3688         0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
3689         0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
3690         0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
3691         0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
3692         0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
3693         0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3694         0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3695         0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3696         0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
3697         0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
3698         0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
3699         0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
3700         0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A,
3701         /* test non-BMP code points */
3702         0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
3703         0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
3704         0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
3705         0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
3706         0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
3707         0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
3708         0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
3709         0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
3710         0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
3711         0xD869, 0xDED5, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xDBFF, 0xDFFF,
3712         0xDBFF, 0xDFFF, 0xDBFF, 0xDFFF,
3713 
3714 
3715         0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
3716         0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
3717         0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
3718         0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
3719         0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
3720     };
3721     static const char *fTestCases [] = {
3722           "\\ud800\\udc00", /* smallest surrogate*/
3723           "\\ud8ff\\udcff",
3724           "\\udBff\\udFff", /* largest surrogate pair*/
3725           "\\ud834\\udc00",
3726           "\\U0010FFFF",
3727           "Hello \\u9292 \\u9192 World!",
3728           "Hell\\u0429o \\u9292 \\u9192 W\\u00e4rld!",
3729           "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
3730 
3731           "\\u0648\\u06c8", /* catch missing reset*/
3732           "\\u0648\\u06c8",
3733 
3734           "\\u4444\\uE001", /* lowest quotable*/
3735           "\\u4444\\uf2FF", /* highest quotable*/
3736           "\\u4444\\uf188\\u4444",
3737           "\\u4444\\uf188\\uf288",
3738           "\\u4444\\uf188abc\\u0429\\uf288",
3739           "\\u9292\\u2222",
3740           "Hell\\u0429\\u04230o \\u9292 \\u9292W\\u00e4\\u0192rld!",
3741           "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
3742           "Hello World!123456",
3743           "Hello W\\u0081\\u011f\\u0082!", /* Latin 1 run*/
3744 
3745           "abc\\u0301\\u0302",  /* uses SQn for u301 u302*/
3746           "abc\\u4411d",      /* uses SQU*/
3747           "abc\\u4411\\u4412d",/* uses SCU*/
3748           "abc\\u0401\\u0402\\u047f\\u00a5\\u0405", /* uses SQn for ua5*/
3749           "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000", /* SJIS like data*/
3750           "\\u9292\\u2222",
3751           "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000",
3752           "\\u9999\\u3051\\u300c\\u9999\\u9999\\u3060\\u9999\\u3065\\u3065\\u3065\\u300c",
3753           "\\u3000\\u266a\\u30ea\\u30f3\\u30b4\\u53ef\\u611b\\u3044\\u3084\\u53ef\\u611b\\u3044\\u3084\\u30ea\\u30f3\\u30b4\\u3002",
3754 
3755           "", /* empty input*/
3756           "\\u0000", /* smallest BMP character*/
3757           "\\uFFFF", /* largest BMP character*/
3758 
3759           /* regression tests*/
3760           "\\u6441\\ub413\\ua733\\uf8fe\\ueedb\\u587f\\u195f\\u4899\\uf23d\\u49fd\\u0aac\\u5792\\ufc22\\ufc3c\\ufc46\\u00aa",
3761           "\\u00df\\u01df\\uf000\\udbff\\udfff\\u000d\n\\u0041\\u00df\\u0401\\u015f\\u00df\\u01df\\uf000\\udbff\\udfff",
3762           "\\u30f9\\u8321\\u05e5\\u181c\\ud72b\\u2019\\u99c9\\u2f2f\\uc10c\\u82e1\\u2c4d\\u1ebc\\u6013\\u66dc\\ubbde\\u94a5\\u4726\\u74af\\u3083\\u55b9\\u000c",
3763           "\\u0041\\u00df\\u0401\\u015f",
3764           "\\u9066\\u2123abc",
3765           "\\ud266\\u43d7\\u\\ue386\\uc9c0\\u4a6b\\u9222\\u901f\\u7410\\ua63f\\u539b\\u9596\\u482e\\u9d47\\ucfe4\\u7b71\\uc280\\uf26a\\u982f\\u862a\\u4edd\\uf513\\ufda6\\u869d\\u2ee0\\ua216\\u3ff6\\u3c70\\u89c0\\u9576\\ud5ec\\ubfda\\u6cca\\u5bb3\\ubcea\\u554c\\u914e\\ufa4a\\uede3\\u2990\\ud2f5\\u2729\\u5141\\u0f26\\uccd8\\u5413\\ud196\\ubbe2\\u51b9\\u9b48\\u0dc8\\u2195\\u21a2\\u21e9\\u00e4\\u9d92\\u0bc0\\u06c5",
3766           "\\uf95b\\u2458\\u2468\\u0e20\\uf51b\\ue36e\\ubfc1\\u0080\\u02dd\\uf1b5\\u0cf3\\u6059\\u7489",
3767     };
3768     int i=0;
3769     for(;i<UPRV_LENGTHOF(fTestCases);i++){
3770         const char* cSrc = fTestCases[i];
3771         UErrorCode status = U_ZERO_ERROR;
3772         int32_t cSrcLen,srcLen;
3773         UChar* src;
3774         /* UConverter* cnv = ucnv_open("SCSU",&status); */
3775         cSrcLen = srcLen = (int32_t)uprv_strlen(fTestCases[i]);
3776         src = (UChar*) malloc((sizeof(UChar) * srcLen) + sizeof(UChar));
3777         srcLen=unescape(src,srcLen,cSrc,cSrcLen,&status);
3778         log_verbose("Testing roundtrip for src: %s at index :%d\n",cSrc,i);
3779         TestConv(src,srcLen,"SCSU","Coverage",NULL,0);
3780         free(src);
3781     }
3782     TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features", (char *)allFeaturesSCSU,sizeof(allFeaturesSCSU));
3783     TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features",(char *)allFeaturesSCSU,sizeof(allFeaturesSCSU));
3784     TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU));
3785     TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU,locale=ja","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU));
3786     TestConv(germanUTF16,(sizeof(germanUTF16)/2),"SCSU","german",(char *)germanSCSU,sizeof(germanSCSU));
3787     TestConv(russianUTF16,(sizeof(russianUTF16)/2), "SCSU","russian",(char *)russianSCSU,sizeof(russianSCSU));
3788     TestConv(monkeyIn,(sizeof(monkeyIn)/2),"SCSU","monkey",NULL,0);
3789 }
3790 
3791 #if !UCONFIG_NO_LEGACY_CONVERSION
TestJitterbug2346()3792 static void TestJitterbug2346(){
3793     char source[] = { 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a,
3794                       0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a};
3795     uint16_t expected[] = {0x91CD,0x000D,0x000A,0x91CD,0x000D,0x000A};
3796 
3797     UChar uTarget[500]={'\0'};
3798     UChar* utarget=uTarget;
3799     UChar* utargetLimit=uTarget+sizeof(uTarget)/2;
3800 
3801     char cTarget[500]={'\0'};
3802     char* ctarget=cTarget;
3803     char* ctargetLimit=cTarget+sizeof(cTarget);
3804     const char* csource=source;
3805     UChar* temp = expected;
3806     UErrorCode err=U_ZERO_ERROR;
3807 
3808     UConverter* conv =ucnv_open("ISO_2022_JP",&err);
3809     if(U_FAILURE(err)) {
3810         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err));
3811         return;
3812     }
3813     ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(source),NULL,TRUE,&err);
3814     if(U_FAILURE(err)) {
3815         log_err("ISO_2022_JP to Unicode conversion failed: %s\n", u_errorName(err));
3816         return;
3817     }
3818     utargetLimit=utarget;
3819     utarget = uTarget;
3820     while(utarget<utargetLimit){
3821         if(*temp!=*utarget){
3822 
3823             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*utarget,(int)*temp) ;
3824         }
3825         utarget++;
3826         temp++;
3827     }
3828     ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err);
3829     if(U_FAILURE(err)) {
3830         log_err("ISO_2022_JP from Unicode conversion failed: %s\n", u_errorName(err));
3831         return;
3832     }
3833     ctargetLimit=ctarget;
3834     ctarget =cTarget;
3835     ucnv_close(conv);
3836 
3837 
3838 }
3839 
3840 static void
TestISO_2022_JP_1()3841 TestISO_2022_JP_1() {
3842     /* test input */
3843     static const uint16_t in[]={
3844         0x3000, 0x3001, 0x3002, 0x0020, 0xFF0E, 0x30FB, 0xFF1A, 0xFF1B, 0x000D, 0x000A,
3845         0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3846         0x52C8, 0x52CC, 0x52CF, 0x52D1, 0x52D4, 0x52D6, 0x52DB, 0x52DC, 0x000D, 0x000A,
3847         0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3848         0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
3849         0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
3850         0x201D, 0x000D, 0x000A,
3851         0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3852         0x4F94, 0x4F97, 0x52BA, 0x52BB, 0x52BD, 0x52C0, 0x52C4, 0x52C6, 0x000D, 0x000A,
3853         0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3854         0x4F78, 0x4F79, 0x4F7A, 0x4F7D, 0x4F7E, 0x4F81, 0x4F82, 0x4F84, 0x000D, 0x000A,
3855         0x4F85, 0x4F89, 0x4F8A, 0x4F8C, 0x4F8E, 0x4F90, 0x4F92, 0x4F93, 0x000D, 0x000A,
3856         0x52E1, 0x52E5, 0x52E8, 0x52E9, 0x000D, 0x000A
3857       };
3858     const UChar* uSource;
3859     const UChar* uSourceLimit;
3860     const char* cSource;
3861     const char* cSourceLimit;
3862     UChar *uTargetLimit =NULL;
3863     UChar *uTarget;
3864     char *cTarget;
3865     const char *cTargetLimit;
3866     char *cBuf;
3867     UChar *uBuf,*test;
3868     int32_t uBufSize = 120;
3869     UErrorCode errorCode=U_ZERO_ERROR;
3870     UConverter *cnv;
3871 
3872     cnv=ucnv_open("ISO_2022_JP_1", &errorCode);
3873     if(U_FAILURE(errorCode)) {
3874         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
3875         return;
3876     }
3877 
3878     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3879     cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3880     uSource = (const UChar*)in;
3881     uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
3882     cTarget = cBuf;
3883     cTargetLimit = cBuf +uBufSize*5;
3884     uTarget = uBuf;
3885     uTargetLimit = uBuf+ uBufSize*5;
3886     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,TRUE, &errorCode);
3887     if(U_FAILURE(errorCode)){
3888         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3889         return;
3890     }
3891     cSource = cBuf;
3892     cSourceLimit =cTarget;
3893     test =uBuf;
3894     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,TRUE,&errorCode);
3895     if(U_FAILURE(errorCode)){
3896         log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3897         return;
3898     }
3899     uSource = (const UChar*)in;
3900     while(uSource<uSourceLimit){
3901         if(*test!=*uSource){
3902 
3903             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3904         }
3905         uSource++;
3906         test++;
3907     }
3908     /*ucnv_close(cnv);
3909     cnv=ucnv_open("ISO_2022,locale=jp,version=1", &errorCode);*/
3910     /*Test for the condition where there is an invalid character*/
3911     ucnv_reset(cnv);
3912     {
3913         static const uint8_t source2[]={0x0e,0x24,0x053};
3914         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-1]");
3915     }
3916     TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3917     TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3918     ucnv_close(cnv);
3919     free(uBuf);
3920     free(cBuf);
3921 }
3922 
3923 static void
TestISO_2022_JP_2()3924 TestISO_2022_JP_2() {
3925     /* test input */
3926     static const uint16_t in[]={
3927         0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
3928         0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
3929         0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
3930         0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
3931         0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
3932         0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
3933         0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
3934         0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
3935         0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
3936         0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
3937         0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
3938         0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3939         0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3940         0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3941         0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
3942         0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
3943         0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
3944         0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
3945         0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A
3946       };
3947     const UChar* uSource;
3948     const UChar* uSourceLimit;
3949     const char* cSource;
3950     const char* cSourceLimit;
3951     UChar *uTargetLimit =NULL;
3952     UChar *uTarget;
3953     char *cTarget;
3954     const char *cTargetLimit;
3955     char *cBuf = NULL;
3956     UChar *uBuf = NULL;
3957     UChar *test;
3958     int32_t uBufSize = 120;
3959     UErrorCode errorCode=U_ZERO_ERROR;
3960     UConverter *cnv = NULL;
3961     int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3962     int32_t* myOff= offsets;
3963     cnv=ucnv_open("ISO_2022_JP_2", &errorCode);
3964     if(U_FAILURE(errorCode)) {
3965         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
3966         goto cleanup;
3967     }
3968 
3969     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3970     cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3971     uSource = (const UChar*)in;
3972     uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
3973     cTarget = cBuf;
3974     cTargetLimit = cBuf +uBufSize*5;
3975     uTarget = uBuf;
3976     uTargetLimit = uBuf+ uBufSize*5;
3977     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3978     if(U_FAILURE(errorCode)){
3979         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3980         goto cleanup;
3981     }
3982     cSource = cBuf;
3983     cSourceLimit =cTarget;
3984     test =uBuf;
3985     myOff=offsets;
3986     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3987     if(U_FAILURE(errorCode)){
3988         log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3989         goto cleanup;
3990     }
3991     uSource = (const UChar*)in;
3992     while(uSource<uSourceLimit){
3993         if(*test!=*uSource){
3994 
3995             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3996         }
3997         uSource++;
3998         test++;
3999     }
4000     TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4001     TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4002     TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4003     /*Test for the condition where there is an invalid character*/
4004     ucnv_reset(cnv);
4005     {
4006         static const uint8_t source2[]={0x0e,0x24,0x053};
4007         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-2]");
4008     }
4009 
4010 cleanup:
4011     ucnv_close(cnv);
4012     free(uBuf);
4013     free(cBuf);
4014     free(offsets);
4015 }
4016 
4017 static void
TestISO_2022_KR()4018 TestISO_2022_KR() {
4019     /* test input */
4020     static const uint16_t in[]={
4021                     0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D
4022                    ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04
4023                    ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
4024                    ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
4025                    ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2
4026                    ,0x53E3,0x53E4,0x000A,0x000D};
4027     const UChar* uSource;
4028     const UChar* uSourceLimit;
4029     const char* cSource;
4030     const char* cSourceLimit;
4031     UChar *uTargetLimit =NULL;
4032     UChar *uTarget;
4033     char *cTarget;
4034     const char *cTargetLimit;
4035     char *cBuf = NULL;
4036     UChar *uBuf = NULL;
4037     UChar *test;
4038     int32_t uBufSize = 120;
4039     UErrorCode errorCode=U_ZERO_ERROR;
4040     UConverter *cnv = NULL;
4041     int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4042     int32_t* myOff= offsets;
4043     cnv=ucnv_open("ISO_2022,locale=kr", &errorCode);
4044     if(U_FAILURE(errorCode)) {
4045         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4046         goto cleanup;
4047     }
4048 
4049     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4050     cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
4051     uSource = (const UChar*)in;
4052     uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
4053     cTarget = cBuf;
4054     cTargetLimit = cBuf +uBufSize*5;
4055     uTarget = uBuf;
4056     uTargetLimit = uBuf+ uBufSize*5;
4057     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4058     if(U_FAILURE(errorCode)){
4059         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4060         goto cleanup;
4061     }
4062     cSource = cBuf;
4063     cSourceLimit =cTarget;
4064     test =uBuf;
4065     myOff=offsets;
4066     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4067     if(U_FAILURE(errorCode)){
4068         log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4069         goto cleanup;
4070     }
4071     uSource = (const UChar*)in;
4072     while(uSource<uSourceLimit){
4073         if(*test!=*uSource){
4074             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ;
4075         }
4076         uSource++;
4077         test++;
4078     }
4079     TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding");
4080     TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4081     TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4082     TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4083     TestJitterbug930("csISO2022KR");
4084     /*Test for the condition where there is an invalid character*/
4085     ucnv_reset(cnv);
4086     {
4087         static const uint8_t source2[]={0x1b,0x24,0x053};
4088         ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
4089         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]");
4090     }
4091 
4092 cleanup:
4093     ucnv_close(cnv);
4094     free(uBuf);
4095     free(cBuf);
4096     free(offsets);
4097 }
4098 
4099 static void
TestISO_2022_KR_1()4100 TestISO_2022_KR_1() {
4101     /* test input */
4102     static const uint16_t in[]={
4103                     0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D
4104                    ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04
4105                    ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
4106                    ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
4107                    ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2
4108                    ,0x53E3,0x53E4,0x000A,0x000D};
4109     const UChar* uSource;
4110     const UChar* uSourceLimit;
4111     const char* cSource;
4112     const char* cSourceLimit;
4113     UChar *uTargetLimit =NULL;
4114     UChar *uTarget;
4115     char *cTarget;
4116     const char *cTargetLimit;
4117     char *cBuf = NULL;
4118     UChar *uBuf = NULL;
4119     UChar *test;
4120     int32_t uBufSize = 120;
4121     UErrorCode errorCode=U_ZERO_ERROR;
4122     UConverter *cnv = NULL;
4123     int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4124     int32_t* myOff= offsets;
4125     cnv=ucnv_open("ibm-25546", &errorCode);
4126     if(U_FAILURE(errorCode)) {
4127         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4128         goto cleanup;
4129     }
4130 
4131     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4132     cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
4133     uSource = (const UChar*)in;
4134     uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
4135     cTarget = cBuf;
4136     cTargetLimit = cBuf +uBufSize*5;
4137     uTarget = uBuf;
4138     uTargetLimit = uBuf+ uBufSize*5;
4139     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4140     if(U_FAILURE(errorCode)){
4141         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4142         goto cleanup;
4143     }
4144     cSource = cBuf;
4145     cSourceLimit =cTarget;
4146     test =uBuf;
4147     myOff=offsets;
4148     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4149     if(U_FAILURE(errorCode)){
4150         log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4151         goto cleanup;
4152     }
4153     uSource = (const UChar*)in;
4154     while(uSource<uSourceLimit){
4155         if(*test!=*uSource){
4156             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ;
4157         }
4158         uSource++;
4159         test++;
4160     }
4161     ucnv_reset(cnv);
4162     TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding");
4163     TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4164     TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4165     ucnv_reset(cnv);
4166     TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4167         /*Test for the condition where there is an invalid character*/
4168     ucnv_reset(cnv);
4169     {
4170         static const uint8_t source2[]={0x1b,0x24,0x053};
4171         ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
4172         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]");
4173     }
4174 
4175 cleanup:
4176     ucnv_close(cnv);
4177     free(uBuf);
4178     free(cBuf);
4179     free(offsets);
4180 }
4181 
TestJitterbug2411()4182 static void TestJitterbug2411(){
4183     static const char* source = "\x1b\x24\x29\x43\x6b\x6b\x6e\x6e\x6a\x68\x70\x6f\x69\x75\x79\x71\x77\x65\x68\x67\x0A"
4184                          "\x1b\x24\x29\x43\x6a\x61\x73\x64\x66\x6a\x61\x73\x64\x66\x68\x6f\x69\x75\x79\x1b\x24\x29\x43";
4185     UConverter* kr=NULL, *kr1=NULL;
4186     UErrorCode errorCode = U_ZERO_ERROR;
4187     UChar tgt[100]={'\0'};
4188     UChar* target = tgt;
4189     UChar* targetLimit = target+100;
4190     kr=ucnv_open("iso-2022-kr", &errorCode);
4191     if(U_FAILURE(errorCode)) {
4192         log_data_err("Unable to open a iso-2022-kr converter: %s\n", u_errorName(errorCode));
4193         return;
4194     }
4195     ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode);
4196     if(U_FAILURE(errorCode)) {
4197         log_err("iso-2022-kr cannot handle multiple escape sequences : %s\n", u_errorName(errorCode));
4198         return;
4199     }
4200     kr1 = ucnv_open("ibm-25546", &errorCode);
4201     if(U_FAILURE(errorCode)) {
4202         log_data_err("Unable to open a iso-2022-kr_1 converter: %s\n", u_errorName(errorCode));
4203         return;
4204     }
4205     target = tgt;
4206     targetLimit = target+100;
4207     ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode);
4208 
4209     if(U_FAILURE(errorCode)) {
4210         log_err("iso-2022-kr_1 cannot handle multiple escape sequences : %s\n", u_errorName(errorCode));
4211         return;
4212     }
4213 
4214     ucnv_close(kr);
4215     ucnv_close(kr1);
4216 
4217 }
4218 
4219 static void
TestJIS()4220 TestJIS(){
4221     /* From Unicode moved to testdata/conversion.txt */
4222     /*To Unicode*/
4223     {
4224         static const uint8_t sampleTextJIS[] = {
4225             0x1b,0x28,0x48,0x41,0x42, /*jis-Roman*/
4226             0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
4227             0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
4228         };
4229         static const uint16_t expectedISO2022JIS[] = {
4230             0x0041, 0x0042,
4231             0xFF81, 0xFF82,
4232             0x3000
4233         };
4234         static const int32_t  toISO2022JISOffs[]={
4235             3,4,
4236             8,9,
4237             16
4238         };
4239 
4240         static const uint8_t sampleTextJIS7[] = {
4241             0x1b,0x28,0x48,0x41,0x42, /*JIS7-Roman*/
4242             0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
4243             0x1b,0x24,0x42,0x21,0x21,
4244             0x0e,0x41,0x42,0x0f,      /*Test Katakana set with SI and SO */
4245             0x21,0x22,
4246             0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
4247         };
4248         static const uint16_t expectedISO2022JIS7[] = {
4249             0x0041, 0x0042,
4250             0xFF81, 0xFF82,
4251             0x3000,
4252             0xFF81, 0xFF82,
4253             0x3001,
4254             0x3000
4255         };
4256         static const int32_t  toISO2022JIS7Offs[]={
4257             3,4,
4258             8,9,
4259             13,16,
4260             17,
4261             19,27
4262         };
4263         static const uint8_t sampleTextJIS8[] = {
4264             0x1b,0x28,0x48,0x41,0x42, /*JIS8-Roman*/
4265             0xa1,0xc8,0xd9,/*Katakana Set*/
4266             0x1b,0x28,0x42,
4267             0x41,0x42,
4268             0xb1,0xc3, /*Katakana Set*/
4269             0x1b,0x24,0x42,0x21,0x21
4270         };
4271         static const uint16_t expectedISO2022JIS8[] = {
4272             0x0041, 0x0042,
4273             0xff61, 0xff88, 0xff99,
4274             0x0041, 0x0042,
4275             0xff71, 0xff83,
4276             0x3000
4277         };
4278         static const int32_t  toISO2022JIS8Offs[]={
4279             3, 4,  5,  6,
4280             7, 11, 12, 13,
4281             14, 18,
4282         };
4283 
4284         testConvertToU(sampleTextJIS,sizeof(sampleTextJIS),expectedISO2022JIS,
4285             UPRV_LENGTHOF(expectedISO2022JIS),"JIS", toISO2022JISOffs,TRUE);
4286         testConvertToU(sampleTextJIS7,sizeof(sampleTextJIS7),expectedISO2022JIS7,
4287             UPRV_LENGTHOF(expectedISO2022JIS7),"JIS7", toISO2022JIS7Offs,TRUE);
4288         testConvertToU(sampleTextJIS8,sizeof(sampleTextJIS8),expectedISO2022JIS8,
4289             UPRV_LENGTHOF(expectedISO2022JIS8),"JIS8", toISO2022JIS8Offs,TRUE);
4290     }
4291 
4292 }
4293 
4294 
4295 #if 0
4296  ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
4297 
4298 static void TestJitterbug915(){
4299 /* tests for roundtripping of the below sequence
4300 \x1b$)G\x0E#!#"###$#%#&#'#(#)#*#+          / *plane 1 * /
4301 \x1b$*H\x1bN"!\x1bN""\x1bN"#\x1bN"$\x1bN"% / *plane 2 * /
4302 \x1b$+I\x1bO"D\x1bO"E\x1bO"F\x1bO"G\x1bO"H / *plane 3 * /
4303 \x1b$+J\x1bO!D\x1bO!E\x1bO"j\x1bO"k\x1bO"l / *plane 4 * /
4304 \x1b$+K\x1bO!t\x1bO"P\x1bO"Q\x1bO#7\x1bO"\ / *plane 5 * /
4305 \x1b$+L\x1bO!#\x1bO",\x1bO#N\x1bO!n\x1bO#q / *plane 6 * /
4306 \x1b$+M\x1bO"q\x1bO!N\x1bO!j\x1bO#:\x1bO#o / *plane 7 * /
4307 */
4308     static const char cSource[]={
4309         0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
4310         0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
4311         0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
4312         0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
4313         0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
4314         0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
4315         0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x2F, 0x2A, 0x70,
4316         0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
4317         0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
4318         0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
4319         0x1B, 0x4F, 0x22, 0x48, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
4320         0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
4321         0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
4322         0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
4323         0x4F, 0x22, 0x6C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
4324         0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
4325         0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
4326         0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
4327         0x22, 0x5C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
4328         0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
4329         0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
4330         0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
4331         0x23, 0x71, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
4332         0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
4333         0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
4334         0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
4335         0x6F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
4336         0x37, 0x20, 0x2A, 0x2F
4337     };
4338     UChar uTarget[500]={'\0'};
4339     UChar* utarget=uTarget;
4340     UChar* utargetLimit=uTarget+sizeof(uTarget)/2;
4341 
4342     char cTarget[500]={'\0'};
4343     char* ctarget=cTarget;
4344     char* ctargetLimit=cTarget+sizeof(cTarget);
4345     const char* csource=cSource;
4346     const char* tempSrc = cSource;
4347     UErrorCode err=U_ZERO_ERROR;
4348 
4349     UConverter* conv =ucnv_open("ISO_2022_CN_EXT",&err);
4350     if(U_FAILURE(err)) {
4351         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err));
4352         return;
4353     }
4354     ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(cSource),NULL,TRUE,&err);
4355     if(U_FAILURE(err)) {
4356         log_err("iso-2022-CN to Unicode conversion failed: %s\n", u_errorName(err));
4357         return;
4358     }
4359     utargetLimit=utarget;
4360     utarget = uTarget;
4361     ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err);
4362     if(U_FAILURE(err)) {
4363         log_err("iso-2022-CN from Unicode conversion failed: %s\n", u_errorName(err));
4364         return;
4365     }
4366     ctargetLimit=ctarget;
4367     ctarget =cTarget;
4368     while(ctarget<ctargetLimit){
4369         if(*ctarget != *tempSrc){
4370             log_err("j915[%d] Expected : \\x%02X \t Got: \\x%02X\n", (int)(ctarget-cTarget), *ctarget,(int)*tempSrc) ;
4371         }
4372         ++ctarget;
4373         ++tempSrc;
4374     }
4375 
4376     ucnv_close(conv);
4377 }
4378 
4379 static void
4380 TestISO_2022_CN_EXT() {
4381     /* test input */
4382     static const uint16_t in[]={
4383                 /* test Non-BMP code points */
4384          0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
4385          0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
4386          0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
4387          0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
4388          0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
4389          0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
4390          0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
4391          0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
4392          0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
4393          0xD869, 0xDED5,
4394 
4395          0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
4396          0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
4397          0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
4398          0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
4399          0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
4400          0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
4401          0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
4402          0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
4403          0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
4404          0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
4405          0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
4406          0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
4407          0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
4408          0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x3443, 0x3444, 0x000D, 0x000A,
4409          0x3445, 0x3449, 0x344A, 0x344B, 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A,
4410          0x60F6, 0x60F7, 0x60F8, 0x60F9, 0x60FA, 0x60FB, 0x60FC, 0x60FD, 0x000D, 0x000A,
4411          0x60FE, 0x60FF, 0x6100, 0x6101, 0x6102, 0x0041, 0x0042, 0x0043, 0x000D, 0x000A,
4412          0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x004B, 0x000D, 0x000A,
4413 
4414          0x33E7, 0x33E8, 0x33E9, 0x33EA, 0x000D, 0x000A
4415 
4416       };
4417 
4418     const UChar* uSource;
4419     const UChar* uSourceLimit;
4420     const char* cSource;
4421     const char* cSourceLimit;
4422     UChar *uTargetLimit =NULL;
4423     UChar *uTarget;
4424     char *cTarget;
4425     const char *cTargetLimit;
4426     char *cBuf = NULL;
4427     UChar *uBuf = NULL;
4428     UChar *test;
4429     int32_t uBufSize = 180;
4430     UErrorCode errorCode=U_ZERO_ERROR;
4431     UConverter *cnv = NULL;
4432     int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4433     int32_t* myOff= offsets;
4434     cnv=ucnv_open("ISO_2022,locale=cn,version=1", &errorCode);
4435     if(U_FAILURE(errorCode)) {
4436         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4437         goto cleanup;
4438     }
4439 
4440     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4441     cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
4442     uSource = (const UChar*)in;
4443     uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
4444     cTarget = cBuf;
4445     cTargetLimit = cBuf +uBufSize*5;
4446     uTarget = uBuf;
4447     uTargetLimit = uBuf+ uBufSize*5;
4448     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4449     if(U_FAILURE(errorCode)){
4450         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4451         goto cleanup;
4452     }
4453     cSource = cBuf;
4454     cSourceLimit =cTarget;
4455     test =uBuf;
4456     myOff=offsets;
4457     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4458     if(U_FAILURE(errorCode)){
4459         log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4460         goto cleanup;
4461     }
4462     uSource = (const UChar*)in;
4463     while(uSource<uSourceLimit){
4464         if(*test!=*uSource){
4465             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
4466         }
4467         else{
4468             log_verbose("      Got: \\u%04X\n",(int)*test) ;
4469         }
4470         uSource++;
4471         test++;
4472     }
4473     TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4474     TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4475     /*Test for the condition where there is an invalid character*/
4476     ucnv_reset(cnv);
4477     {
4478         static const uint8_t source2[]={0x0e,0x24,0x053};
4479         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN-EXT]");
4480     }
4481 
4482 cleanup:
4483     ucnv_close(cnv);
4484     free(uBuf);
4485     free(cBuf);
4486     free(offsets);
4487 }
4488 #endif
4489 
4490 static void
TestISO_2022_CN()4491 TestISO_2022_CN() {
4492     /* test input */
4493     static const uint16_t in[]={
4494          /* jitterbug 951 */
4495          0xFF2D, 0xFF49, 0xFF58, 0xFF45, 0xFF44, 0x0020, 0xFF43, 0xFF48, 0xFF41, 0xFF52,
4496          0x0020, 0xFF06, 0x0020, 0xFF11, 0xFF12, 0xFF13, 0xFF14, 0xFF15, 0xFF16, 0xFF17,
4497          0xFF18, 0xFF19, 0xFF10, 0x0020, 0xFF4E, 0xFF55, 0xFF4D, 0xFF42, 0xFF45, 0xFF52,
4498          0x0020, 0xFF54, 0xFF45, 0xFF53, 0xFF54, 0x0020, 0xFF4C, 0xFF49, 0xFF4E, 0xFF45,
4499          0x0020, 0x0045, 0x004e, 0x0044,
4500          /**/
4501          0x4E00, 0x4E00, 0x4E01, 0x4E03, 0x60F6, 0x60F7, 0x60F8, 0x60FB, 0x000D, 0x000A,
4502          0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x60FB, 0x60FC, 0x000D, 0x000A,
4503          0x4E07, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x0042, 0x0043, 0x000D, 0x000A,
4504          0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
4505          0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
4506          0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
4507          0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
4508          0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
4509          0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
4510          0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
4511          0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
4512          0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
4513          0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x60FE, 0x60FF, 0x000D, 0x000A,
4514          0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A, 0x60F9, 0x60FA, 0x000D, 0x000A,
4515          0x6100, 0x6101, 0x0041, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
4516          0x247D, 0x247E, 0x247F, 0x2480, 0x2481, 0x2482, 0x2483, 0x2484, 0x2485, 0x2486,
4517          0x2487, 0x2460, 0x2461, 0xFF20, 0xFF21, 0xFF22, 0x0049, 0x004A, 0x000D, 0x000A,
4518 
4519       };
4520     const UChar* uSource;
4521     const UChar* uSourceLimit;
4522     const char* cSource;
4523     const char* cSourceLimit;
4524     UChar *uTargetLimit =NULL;
4525     UChar *uTarget;
4526     char *cTarget;
4527     const char *cTargetLimit;
4528     char *cBuf = NULL;
4529     UChar *uBuf = NULL;
4530     UChar *test;
4531     int32_t uBufSize = 180;
4532     UErrorCode errorCode=U_ZERO_ERROR;
4533     UConverter *cnv = NULL;
4534     int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4535     int32_t* myOff= offsets;
4536     cnv=ucnv_open("ISO_2022,locale=cn,version=0", &errorCode);
4537     if(U_FAILURE(errorCode)) {
4538         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4539         goto cleanup;
4540     }
4541 
4542     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4543     cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
4544     uSource = (const UChar*)in;
4545     uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
4546     cTarget = cBuf;
4547     cTargetLimit = cBuf +uBufSize*5;
4548     uTarget = uBuf;
4549     uTargetLimit = uBuf+ uBufSize*5;
4550     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4551     if(U_FAILURE(errorCode)){
4552         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4553         goto cleanup;
4554     }
4555     cSource = cBuf;
4556     cSourceLimit =cTarget;
4557     test =uBuf;
4558     myOff=offsets;
4559     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4560     if(U_FAILURE(errorCode)){
4561         log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4562         goto cleanup;
4563     }
4564     uSource = (const UChar*)in;
4565     while(uSource<uSourceLimit){
4566         if(*test!=*uSource){
4567             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
4568         }
4569         else{
4570             log_verbose("      Got: \\u%04X\n",(int)*test) ;
4571         }
4572         uSource++;
4573         test++;
4574     }
4575     TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-CN encoding");
4576     TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4577     TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4578     TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4579     TestJitterbug930("csISO2022CN");
4580     /*Test for the condition where there is an invalid character*/
4581     ucnv_reset(cnv);
4582     {
4583         static const uint8_t source2[]={0x0e,0x24,0x053};
4584         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN]");
4585     }
4586 
4587 cleanup:
4588     ucnv_close(cnv);
4589     free(uBuf);
4590     free(cBuf);
4591     free(offsets);
4592 }
4593 
4594 /* Tests for empty segments in ISO-2022-JP/KR/CN, HZ, check that UConverterCallbackReason is UCNV_IRREGULAR */
4595 typedef struct {
4596     const char *    converterName;
4597     const char *    inputText;
4598     int             inputTextLength;
4599 } EmptySegmentTest;
4600 
4601 /* Callback for TestJitterbug6175, should only get called for empty segment errors */
UCNV_TO_U_CALLBACK_EMPTYSEGMENT(const void * context,UConverterToUnicodeArgs * toArgs,const char * codeUnits,int32_t length,UConverterCallbackReason reason,UErrorCode * err)4602 static void UCNV_TO_U_CALLBACK_EMPTYSEGMENT( const void *context, UConverterToUnicodeArgs *toArgs, const char* codeUnits,
4603                                              int32_t length, UConverterCallbackReason reason, UErrorCode * err ) {
4604     // suppress compiler warnings about unused variables
4605     (void)context;
4606     (void)codeUnits;
4607     (void)length;
4608     if (reason > UCNV_IRREGULAR) {
4609         return;
4610     }
4611     if (reason != UCNV_IRREGULAR) {
4612         log_err("toUnicode callback invoked for empty segment but reason is not UCNV_IRREGULAR\n");
4613     }
4614     /* Standard stuff below from UCNV_TO_U_CALLBACK_SUBSTITUTE */
4615     *err = U_ZERO_ERROR;
4616     ucnv_cbToUWriteSub(toArgs,0,err);
4617 }
4618 
4619 enum { kEmptySegmentToUCharsMax = 64 };
TestJitterbug6175(void)4620 static void TestJitterbug6175(void) {
4621     static const char  iso2022jp_a[] = { 0x61, 0x62, 0x1B,0x24,0x42, 0x1B,0x28,0x42, 0x63, 0x64, 0x0D, 0x0A };
4622     static const char  iso2022kr_a[] = { 0x1B,0x24,0x29,0x43, 0x61, 0x0E, 0x0F, 0x62, 0x0D, 0x0A };
4623     static const char  iso2022cn_a[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x0F, 0x1B,0x24,0x2A,0x48, 0x1B,0x4E, 0x6A,0x65, 0x63, 0x0D, 0x0A };
4624     static const char  iso2022cn_b[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x1B,0x24,0x29,0x47, 0x68,0x64, 0x0F, 0x63, 0x0D, 0x0A };
4625     static const char  hzGB2312_a[]  = { 0x61, 0x62, 0x7E,0x7B, 0x7E,0x7D, 0x63, 0x64 };
4626     static const EmptySegmentTest emptySegmentTests[] = {
4627         /* converterName inputText    inputTextLength */
4628         { "ISO-2022-JP", iso2022jp_a, sizeof(iso2022jp_a) },
4629         { "ISO-2022-KR", iso2022kr_a, sizeof(iso2022kr_a) },
4630         { "ISO-2022-CN", iso2022cn_a, sizeof(iso2022cn_a) },
4631         { "ISO-2022-CN", iso2022cn_b, sizeof(iso2022cn_b) },
4632         { "HZ-GB-2312",  hzGB2312_a,  sizeof(hzGB2312_a)  },
4633         /* terminator: */
4634         { NULL,          NULL,        0,                  }
4635     };
4636     const EmptySegmentTest * testPtr;
4637     for (testPtr = emptySegmentTests; testPtr->converterName != NULL; ++testPtr) {
4638         UErrorCode   err = U_ZERO_ERROR;
4639         UConverter * cnv = ucnv_open(testPtr->converterName, &err);
4640         if (U_FAILURE(err)) {
4641             log_data_err("Unable to open %s converter: %s\n", testPtr->converterName, u_errorName(err));
4642             return;
4643         }
4644         ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_EMPTYSEGMENT, NULL, NULL, NULL, &err);
4645         if (U_FAILURE(err)) {
4646             log_data_err("Unable to setToUCallBack for %s converter: %s\n", testPtr->converterName, u_errorName(err));
4647             ucnv_close(cnv);
4648             return;
4649         }
4650         {
4651             UChar         toUChars[kEmptySegmentToUCharsMax];
4652             UChar *       toUCharsPtr = toUChars;
4653             const UChar * toUCharsLimit = toUCharsPtr + kEmptySegmentToUCharsMax;
4654             const char *  inCharsPtr = testPtr->inputText;
4655             const char *  inCharsLimit = inCharsPtr + testPtr->inputTextLength;
4656             ucnv_toUnicode(cnv, &toUCharsPtr, toUCharsLimit, &inCharsPtr, inCharsLimit, NULL, TRUE, &err);
4657         }
4658         ucnv_close(cnv);
4659     }
4660 }
4661 
4662 static void
TestEBCDIC_STATEFUL()4663 TestEBCDIC_STATEFUL() {
4664     /* test input */
4665     static const uint8_t in[]={
4666         0x61,
4667         0x1a,
4668         0x0f, 0x4b,
4669         0x42,
4670         0x40,
4671         0x36,
4672     };
4673 
4674     /* expected test results */
4675     static const int32_t results[]={
4676         /* number of bytes read, code point */
4677         1, 0x002f,
4678         1, 0x0092,
4679         2, 0x002e,
4680         1, 0xff62,
4681         1, 0x0020,
4682         1, 0x0096,
4683 
4684     };
4685     static const uint8_t in2[]={
4686         0x0f,
4687         0xa1,
4688         0x01
4689     };
4690 
4691     /* expected test results */
4692     static const int32_t results2[]={
4693         /* number of bytes read, code point */
4694         2, 0x203E,
4695         1, 0x0001,
4696     };
4697 
4698     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
4699     UErrorCode errorCode=U_ZERO_ERROR;
4700     UConverter *cnv=ucnv_open("ibm-930", &errorCode);
4701     if(U_FAILURE(errorCode)) {
4702         log_data_err("Unable to open a EBCDIC_STATEFUL(ibm-930) converter: %s\n", u_errorName(errorCode));
4703         return;
4704     }
4705     TestNextUChar(cnv, source, limit, results, "EBCDIC_STATEFUL(ibm-930)");
4706     ucnv_reset(cnv);
4707      /* Test the condition when source >= sourceLimit */
4708     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
4709     ucnv_reset(cnv);
4710     /*Test for the condition where source > sourcelimit after consuming the shift chracter */
4711     {
4712         static const uint8_t source1[]={0x0f};
4713         TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_INDEX_OUTOFBOUNDS_ERROR, "a character is truncated");
4714     }
4715     /*Test for the condition where there is an invalid character*/
4716     ucnv_reset(cnv);
4717     {
4718         static const uint8_t source2[]={0x0e, 0x7F, 0xFF};
4719         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [EBCDIC STATEFUL]");
4720     }
4721     ucnv_reset(cnv);
4722     source=(const char*)in2;
4723     limit=(const char*)in2+sizeof(in2);
4724     TestNextUChar(cnv,source,limit,results2,"EBCDIC_STATEFUL(ibm-930),seq#2");
4725     ucnv_close(cnv);
4726 
4727 }
4728 
4729 static void
TestGB18030()4730 TestGB18030() {
4731     /* test input */
4732     static const uint8_t in[]={
4733         0x24,
4734         0x7f,
4735         0x81, 0x30, 0x81, 0x30,
4736         0xa8, 0xbf,
4737         0xa2, 0xe3,
4738         0xd2, 0xbb,
4739         0x82, 0x35, 0x8f, 0x33,
4740         0x84, 0x31, 0xa4, 0x39,
4741         0x90, 0x30, 0x81, 0x30,
4742         0xe3, 0x32, 0x9a, 0x35
4743 #if 0
4744         /*
4745          * Feature removed   markus 2000-oct-26
4746          * Only some codepages must match surrogate pairs into supplementary code points -
4747          * see javadoc for ucnv_getNextUChar() and implementation notes in ucnvmbcs.c .
4748          * GB 18030 provides direct encodings for supplementary code points, therefore
4749          * it must not combine two single-encoded surrogates into one code point.
4750          */
4751         0x83, 0x36, 0xc8, 0x30, 0x83, 0x37, 0xb0, 0x34 /* separately encoded surrogates */
4752 #endif
4753     };
4754 
4755     /* expected test results */
4756     static const int32_t results[]={
4757         /* number of bytes read, code point */
4758         1, 0x24,
4759         1, 0x7f,
4760         4, 0x80,
4761         2, 0x1f9,
4762         2, 0x20ac,
4763         2, 0x4e00,
4764         4, 0x9fa6,
4765         4, 0xffff,
4766         4, 0x10000,
4767         4, 0x10ffff
4768 #if 0
4769         /* Feature removed. See comment above. */
4770         8, 0x10000
4771 #endif
4772     };
4773 
4774 /*    const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); */
4775     UErrorCode errorCode=U_ZERO_ERROR;
4776     UConverter *cnv=ucnv_open("gb18030", &errorCode);
4777     if(U_FAILURE(errorCode)) {
4778         log_data_err("Unable to open a gb18030 converter: %s\n", u_errorName(errorCode));
4779         return;
4780     }
4781     TestNextUChar(cnv, (const char *)in, (const char *)in+sizeof(in), results, "gb18030");
4782     ucnv_close(cnv);
4783 }
4784 
4785 static void
TestLMBCS()4786 TestLMBCS() {
4787     /* LMBCS-1 string */
4788     static const uint8_t pszLMBCS[]={
4789         0x61,
4790         0x01, 0x29,
4791         0x81,
4792         0xA0,
4793         0x0F, 0x27,
4794         0x0F, 0x91,
4795         0x14, 0x0a, 0x74,
4796         0x14, 0xF6, 0x02,
4797         0x14, 0xd8, 0x4d, 0x14, 0xdc, 0x56, /* UTF-16 surrogate pair */
4798         0x10, 0x88, 0xA0,
4799     };
4800 
4801     /* Unicode UChar32 equivalents */
4802     static const UChar32 pszUnicode32[]={
4803         /* code point */
4804         0x00000061,
4805         0x00002013,
4806         0x000000FC,
4807         0x000000E1,
4808         0x00000007,
4809         0x00000091,
4810         0x00000a74,
4811         0x00000200,
4812         0x00023456, /* code point for surrogate pair */
4813         0x00005516
4814     };
4815 
4816 /* Unicode UChar equivalents */
4817     static const UChar pszUnicode[]={
4818         /* code point */
4819         0x0061,
4820         0x2013,
4821         0x00FC,
4822         0x00E1,
4823         0x0007,
4824         0x0091,
4825         0x0a74,
4826         0x0200,
4827         0xD84D, /* low surrogate */
4828         0xDC56, /* high surrogate */
4829         0x5516
4830     };
4831 
4832 /* expected test results */
4833     static const int offsets32[]={
4834         /* number of bytes read, code point */
4835         0,
4836         1,
4837         3,
4838         4,
4839         5,
4840         7,
4841         9,
4842         12,
4843         15,
4844         21,
4845         24
4846     };
4847 
4848 /* expected test results */
4849     static const int offsets[]={
4850         /* number of bytes read, code point */
4851         0,
4852         1,
4853         3,
4854         4,
4855         5,
4856         7,
4857         9,
4858         12,
4859         15,
4860         18,
4861         21,
4862         24
4863     };
4864 
4865 
4866     UConverter *cnv;
4867 
4868 #define NAME_LMBCS_1 "LMBCS-1"
4869 #define NAME_LMBCS_2 "LMBCS-2"
4870 
4871 
4872    /* Some basic open/close/property tests on some LMBCS converters */
4873     {
4874 
4875       char expected_subchars[] = {0x3F};   /* ANSI Question Mark */
4876       char new_subchars [] = {0x7F};       /* subst char used by SmartSuite..*/
4877       char get_subchars [1];
4878       const char * get_name;
4879       UConverter *cnv1;
4880       UConverter *cnv2;
4881 
4882       int8_t len = sizeof(get_subchars);
4883 
4884       UErrorCode errorCode=U_ZERO_ERROR;
4885 
4886       /* Open */
4887       cnv1=ucnv_open(NAME_LMBCS_1, &errorCode);
4888       if(U_FAILURE(errorCode)) {
4889          log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
4890          return;
4891       }
4892       cnv2=ucnv_open(NAME_LMBCS_2, &errorCode);
4893       if(U_FAILURE(errorCode)) {
4894          log_data_err("Unable to open a LMBCS-2 converter: %s\n", u_errorName(errorCode));
4895          return;
4896       }
4897 
4898       /* Name */
4899       get_name = ucnv_getName (cnv1, &errorCode);
4900       if (strcmp(NAME_LMBCS_1,get_name)){
4901          log_err("Unexpected converter name: %s\n", get_name);
4902       }
4903       get_name = ucnv_getName (cnv2, &errorCode);
4904       if (strcmp(NAME_LMBCS_2,get_name)){
4905          log_err("Unexpected converter name: %s\n", get_name);
4906       }
4907 
4908       /* substitution chars */
4909       ucnv_getSubstChars (cnv1, get_subchars, &len, &errorCode);
4910       if(U_FAILURE(errorCode)) {
4911          log_err("Failure on get subst chars: %s\n", u_errorName(errorCode));
4912       }
4913       if (len!=1){
4914          log_err("Unexpected length of sub chars\n");
4915       }
4916       if (get_subchars[0] != expected_subchars[0]){
4917            log_err("Unexpected value of sub chars\n");
4918       }
4919       ucnv_setSubstChars (cnv2,new_subchars, len, &errorCode);
4920       if(U_FAILURE(errorCode)) {
4921          log_err("Failure on set subst chars: %s\n", u_errorName(errorCode));
4922       }
4923       ucnv_getSubstChars (cnv2, get_subchars, &len, &errorCode);
4924       if(U_FAILURE(errorCode)) {
4925          log_err("Failure on get subst chars: %s\n", u_errorName(errorCode));
4926       }
4927       if (len!=1){
4928          log_err("Unexpected length of sub chars\n");
4929       }
4930       if (get_subchars[0] != new_subchars[0]){
4931            log_err("Unexpected value of sub chars\n");
4932       }
4933       ucnv_close(cnv1);
4934       ucnv_close(cnv2);
4935 
4936     }
4937 
4938     /* LMBCS to Unicode - offsets */
4939     {
4940        UErrorCode errorCode=U_ZERO_ERROR;
4941 
4942        const char * pSource = (const char *)pszLMBCS;
4943        const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS);
4944 
4945        UChar Out [sizeof(pszUnicode) + 1];
4946        UChar * pOut = Out;
4947        UChar * OutLimit = Out + UPRV_LENGTHOF(pszUnicode);
4948 
4949        int32_t off [sizeof(offsets)];
4950 
4951       /* last 'offset' in expected results is just the final size.
4952          (Makes other tests easier). Compensate here: */
4953 
4954        off[UPRV_LENGTHOF(offsets)-1] = sizeof(pszLMBCS);
4955 
4956 
4957 
4958       cnv=ucnv_open("lmbcs", &errorCode); /* use generic name for LMBCS-1 */
4959       if(U_FAILURE(errorCode)) {
4960            log_data_err("Unable to open a LMBCS converter: %s\n", u_errorName(errorCode));
4961            return;
4962       }
4963 
4964 
4965 
4966       ucnv_toUnicode (cnv,
4967                       &pOut,
4968                       OutLimit,
4969                       &pSource,
4970                       sourceLimit,
4971                       off,
4972                       TRUE,
4973                       &errorCode);
4974 
4975 
4976        if (memcmp(off,offsets,sizeof(offsets)))
4977        {
4978          log_err("LMBCS->Uni: Calculated offsets do not match expected results\n");
4979        }
4980        if (memcmp(Out,pszUnicode,sizeof(pszUnicode)))
4981        {
4982          log_err("LMBCS->Uni: Calculated codepoints do not match expected results\n");
4983        }
4984        ucnv_close(cnv);
4985     }
4986     {
4987    /* LMBCS to Unicode - getNextUChar */
4988       const char * sourceStart;
4989       const char *source=(const char *)pszLMBCS;
4990       const char *limit=(const char *)pszLMBCS+sizeof(pszLMBCS);
4991       const UChar32 *results= pszUnicode32;
4992       const int *off = offsets32;
4993 
4994       UErrorCode errorCode=U_ZERO_ERROR;
4995       UChar32 uniChar;
4996 
4997       cnv=ucnv_open("LMBCS-1", &errorCode);
4998       if(U_FAILURE(errorCode)) {
4999            log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
5000            return;
5001       }
5002       else
5003       {
5004 
5005          while(source<limit) {
5006             sourceStart=source;
5007             uniChar=ucnv_getNextUChar(cnv, &source, source + (off[1] - off[0]), &errorCode);
5008             if(U_FAILURE(errorCode)) {
5009                   log_err("LMBCS-1 ucnv_getNextUChar() failed: %s\n", u_errorName(errorCode));
5010                   break;
5011             } else if(source-sourceStart != off[1] - off[0] || uniChar != *results) {
5012                log_err("LMBCS-1 ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
5013                    uniChar, (source-sourceStart), *results, *off);
5014                break;
5015             }
5016             results++;
5017             off++;
5018          }
5019        }
5020        ucnv_close(cnv);
5021     }
5022     { /* test locale & optimization group operations: Unicode to LMBCS */
5023 
5024       UErrorCode errorCode=U_ZERO_ERROR;
5025       UConverter *cnv16he = ucnv_open("LMBCS-16,locale=he", &errorCode);
5026       UConverter *cnv16jp = ucnv_open("LMBCS-16,locale=ja_JP", &errorCode);
5027       UConverter *cnv01us = ucnv_open("LMBCS-1,locale=us_EN", &errorCode);
5028       UChar uniString [] = {0x0192}; /* Latin Small letter f with hook */
5029       const UChar * pUniOut = uniString;
5030       UChar * pUniIn = uniString;
5031       uint8_t lmbcsString [4];
5032       const char * pLMBCSOut = (const char *)lmbcsString;
5033       char * pLMBCSIn = (char *)lmbcsString;
5034 
5035       /* 0192 (hook) converts to both group 3 & group 1. input locale should differentiate */
5036       ucnv_fromUnicode (cnv16he,
5037                         &pLMBCSIn, (pLMBCSIn + UPRV_LENGTHOF(lmbcsString)),
5038                         &pUniOut, pUniOut + UPRV_LENGTHOF(uniString),
5039                         NULL, 1, &errorCode);
5040 
5041       if (lmbcsString[0] != 0x3 || lmbcsString[1] != 0x83)
5042       {
5043          log_err("LMBCS-16,locale=he gives unexpected translation\n");
5044       }
5045 
5046       pLMBCSIn= (char *)lmbcsString;
5047       pUniOut = uniString;
5048       ucnv_fromUnicode (cnv01us,
5049                         &pLMBCSIn, (const char *)(lmbcsString + UPRV_LENGTHOF(lmbcsString)),
5050                         &pUniOut, pUniOut + UPRV_LENGTHOF(uniString),
5051                         NULL, 1, &errorCode);
5052 
5053       if (lmbcsString[0] != 0x9F)
5054       {
5055          log_err("LMBCS-1,locale=US gives unexpected translation\n");
5056       }
5057 
5058       /* single byte char from mbcs char set */
5059       lmbcsString[0] = 0xAE;  /* 1/2 width katakana letter small Yo */
5060       pLMBCSOut = (const char *)lmbcsString;
5061       pUniIn = uniString;
5062       ucnv_toUnicode (cnv16jp,
5063                         &pUniIn, pUniIn + 1,
5064                         &pLMBCSOut, (pLMBCSOut + 1),
5065                         NULL, 1, &errorCode);
5066       if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+1 || pUniIn != uniString+1 || uniString[0] != 0xFF6E)
5067       {
5068            log_err("Unexpected results from LMBCS-16 single byte char\n");
5069       }
5070       /* convert to group 1: should be 3 bytes */
5071       pLMBCSIn = (char *)lmbcsString;
5072       pUniOut = uniString;
5073       ucnv_fromUnicode (cnv01us,
5074                         &pLMBCSIn, (const char *)(pLMBCSIn + 3),
5075                         &pUniOut, pUniOut + 1,
5076                         NULL, 1, &errorCode);
5077       if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+3 || pUniOut != uniString+1
5078          || lmbcsString[0] != 0x10 || lmbcsString[1] != 0x10 || lmbcsString[2] != 0xAE)
5079       {
5080            log_err("Unexpected results to LMBCS-1 single byte mbcs char\n");
5081       }
5082       pLMBCSOut = (const char *)lmbcsString;
5083       pUniIn = uniString;
5084       ucnv_toUnicode (cnv01us,
5085                         &pUniIn, pUniIn + 1,
5086                         &pLMBCSOut, (const char *)(pLMBCSOut + 3),
5087                         NULL, 1, &errorCode);
5088       if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+3 || pUniIn != uniString+1 || uniString[0] != 0xFF6E)
5089       {
5090            log_err("Unexpected results from LMBCS-1 single byte mbcs char\n");
5091       }
5092       pLMBCSIn = (char *)lmbcsString;
5093       pUniOut = uniString;
5094       ucnv_fromUnicode (cnv16jp,
5095                         &pLMBCSIn, (const char *)(pLMBCSIn + 1),
5096                         &pUniOut, pUniOut + 1,
5097                         NULL, 1, &errorCode);
5098       if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+1 || pUniOut != uniString+1 || lmbcsString[0] != 0xAE)
5099       {
5100            log_err("Unexpected results to LMBCS-16 single byte mbcs char\n");
5101       }
5102       ucnv_close(cnv16he);
5103       ucnv_close(cnv16jp);
5104       ucnv_close(cnv01us);
5105     }
5106     {
5107        /* Small source buffer testing, LMBCS -> Unicode */
5108 
5109        UErrorCode errorCode=U_ZERO_ERROR;
5110 
5111        const char * pSource = (const char *)pszLMBCS;
5112        const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS);
5113        int codepointCount = 0;
5114 
5115        UChar Out [sizeof(pszUnicode) + 1];
5116        UChar * pOut = Out;
5117        UChar * OutLimit = Out + UPRV_LENGTHOF(pszUnicode);
5118 
5119 
5120        cnv = ucnv_open(NAME_LMBCS_1, &errorCode);
5121        if(U_FAILURE(errorCode)) {
5122            log_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
5123            return;
5124        }
5125 
5126 
5127        while ((pSource < sourceLimit) && U_SUCCESS (errorCode))
5128        {
5129            ucnv_toUnicode (cnv,
5130                &pOut,
5131                OutLimit,
5132                &pSource,
5133                (pSource+1), /* claim that this is a 1- byte buffer */
5134                NULL,
5135                FALSE,    /* FALSE means there might be more chars in the next buffer */
5136                &errorCode);
5137 
5138            if (U_SUCCESS (errorCode))
5139            {
5140                if ((pSource - (const char *)pszLMBCS) == offsets [codepointCount+1])
5141                {
5142                    /* we are on to the next code point: check value */
5143 
5144                    if (Out[0] != pszUnicode[codepointCount]){
5145                        log_err("LMBCS->Uni result %lx should have been %lx \n",
5146                            Out[0], pszUnicode[codepointCount]);
5147                    }
5148 
5149                    pOut = Out; /* reset for accumulating next code point */
5150                    codepointCount++;
5151                }
5152            }
5153            else
5154            {
5155                log_err("Unexpected Error on toUnicode: %s\n", u_errorName(errorCode));
5156            }
5157        }
5158        {
5159          /* limits & surrogate error testing */
5160          char LIn [sizeof(pszLMBCS)];
5161          const char * pLIn = LIn;
5162 
5163          char LOut [sizeof(pszLMBCS)];
5164          char * pLOut = LOut;
5165 
5166          UChar UOut [sizeof(pszUnicode)];
5167          UChar * pUOut = UOut;
5168 
5169          UChar UIn [sizeof(pszUnicode)];
5170          const UChar * pUIn = UIn;
5171 
5172          int32_t off [sizeof(offsets)];
5173          UChar32 uniChar;
5174 
5175          errorCode=U_ZERO_ERROR;
5176 
5177          /* negative source request should always return U_ILLEGAL_ARGUMENT_ERROR */
5178          pUIn++;
5179          ucnv_fromUnicode(cnv, &pLOut, pLOut+1, &pUIn, pUIn-1, off, FALSE, &errorCode);
5180          if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
5181          {
5182             log_err("Unexpected Error on negative source request to ucnv_fromUnicode: %s\n", u_errorName(errorCode));
5183          }
5184          pUIn--;
5185 
5186          errorCode=U_ZERO_ERROR;
5187          ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)(pLIn-1),off,FALSE, &errorCode);
5188          if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
5189          {
5190             log_err("Unexpected Error on negative source request to ucnv_toUnicode: %s\n", u_errorName(errorCode));
5191          }
5192          errorCode=U_ZERO_ERROR;
5193 
5194          uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)(pLIn-1), &errorCode);
5195          if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
5196          {
5197             log_err("Unexpected Error on negative source request to ucnv_getNextUChar: %s\n", u_errorName(errorCode));
5198          }
5199          errorCode=U_ZERO_ERROR;
5200 
5201          /* 0 byte source request - no error, no pointer movement */
5202          ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)pLIn,off,FALSE, &errorCode);
5203          ucnv_fromUnicode(cnv, &pLOut,pLOut+1,&pUIn,pUIn,off,FALSE, &errorCode);
5204          if(U_FAILURE(errorCode)) {
5205             log_err("0 byte source request: unexpected error: %s\n", u_errorName(errorCode));
5206          }
5207          if ((pUOut != UOut) || (pUIn != UIn) || (pLOut != LOut) || (pLIn != LIn))
5208          {
5209               log_err("Unexpected pointer move in 0 byte source request \n");
5210          }
5211          /*0 byte source request - GetNextUChar : error & value == fffe or ffff */
5212          uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)pLIn, &errorCode);
5213          if (errorCode != U_INDEX_OUTOFBOUNDS_ERROR)
5214          {
5215             log_err("Unexpected Error on 0-byte source request to ucnv_getnextUChar: %s\n", u_errorName(errorCode));
5216          }
5217          if (((uint32_t)uniChar - 0xfffe) > 1) /* not 0xfffe<=uniChar<=0xffff */
5218          {
5219             log_err("Unexpected value on 0-byte source request to ucnv_getnextUChar \n");
5220          }
5221          errorCode = U_ZERO_ERROR;
5222 
5223          /* running out of target room : U_BUFFER_OVERFLOW_ERROR */
5224 
5225          pUIn = pszUnicode;
5226          ucnv_fromUnicode(cnv, &pLOut,pLOut+offsets[4],&pUIn,pUIn+UPRV_LENGTHOF(pszUnicode),off,FALSE, &errorCode);
5227          if (errorCode != U_BUFFER_OVERFLOW_ERROR || pLOut != LOut + offsets[4] || pUIn != pszUnicode+4 )
5228          {
5229             log_err("Unexpected results on out of target room to ucnv_fromUnicode\n");
5230          }
5231 
5232          errorCode = U_ZERO_ERROR;
5233 
5234          pLIn = (const char *)pszLMBCS;
5235          ucnv_toUnicode(cnv, &pUOut,pUOut+4,&pLIn,(pLIn+sizeof(pszLMBCS)),off,FALSE, &errorCode);
5236          if (errorCode != U_BUFFER_OVERFLOW_ERROR || pUOut != UOut + 4 || pLIn != (const char *)pszLMBCS+offsets[4])
5237          {
5238             log_err("Unexpected results on out of target room to ucnv_toUnicode\n");
5239          }
5240 
5241          /* unpaired or chopped LMBCS surrogates */
5242 
5243          /* OK high surrogate, Low surrogate is chopped */
5244          LIn [0] = (char)0x14;
5245          LIn [1] = (char)0xD8;
5246          LIn [2] = (char)0x01;
5247          LIn [3] = (char)0x14;
5248          LIn [4] = (char)0xDC;
5249          pLIn = LIn;
5250          errorCode = U_ZERO_ERROR;
5251          pUOut = UOut;
5252 
5253          ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
5254          ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
5255          if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5)
5256          {
5257             log_err("Unexpected results on chopped low surrogate\n");
5258          }
5259 
5260          /* chopped at surrogate boundary */
5261          LIn [0] = (char)0x14;
5262          LIn [1] = (char)0xD8;
5263          LIn [2] = (char)0x01;
5264          pLIn = LIn;
5265          errorCode = U_ZERO_ERROR;
5266          pUOut = UOut;
5267 
5268          ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+3),off,TRUE, &errorCode);
5269          if (UOut[0] != 0xD801 || U_FAILURE(errorCode) || pUOut != UOut + 1 || pLIn != LIn + 3)
5270          {
5271             log_err("Unexpected results on chopped at surrogate boundary \n");
5272          }
5273 
5274          /* unpaired surrogate plus valid Unichar */
5275          LIn [0] = (char)0x14;
5276          LIn [1] = (char)0xD8;
5277          LIn [2] = (char)0x01;
5278          LIn [3] = (char)0x14;
5279          LIn [4] = (char)0xC9;
5280          LIn [5] = (char)0xD0;
5281          pLIn = LIn;
5282          errorCode = U_ZERO_ERROR;
5283          pUOut = UOut;
5284 
5285          ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+6),off,TRUE, &errorCode);
5286          if (UOut[0] != 0xD801 || UOut[1] != 0xC9D0 || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 6)
5287          {
5288             log_err("Unexpected results after unpaired surrogate plus valid Unichar \n");
5289          }
5290 
5291       /* unpaired surrogate plus chopped Unichar */
5292          LIn [0] = (char)0x14;
5293          LIn [1] = (char)0xD8;
5294          LIn [2] = (char)0x01;
5295          LIn [3] = (char)0x14;
5296          LIn [4] = (char)0xC9;
5297 
5298          pLIn = LIn;
5299          errorCode = U_ZERO_ERROR;
5300          pUOut = UOut;
5301 
5302          ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
5303          if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5)
5304          {
5305             log_err("Unexpected results after unpaired surrogate plus chopped Unichar \n");
5306          }
5307 
5308          /* unpaired surrogate plus valid non-Unichar */
5309          LIn [0] = (char)0x14;
5310          LIn [1] = (char)0xD8;
5311          LIn [2] = (char)0x01;
5312          LIn [3] = (char)0x0F;
5313          LIn [4] = (char)0x3B;
5314 
5315          pLIn = LIn;
5316          errorCode = U_ZERO_ERROR;
5317          pUOut = UOut;
5318 
5319          ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
5320          if (UOut[0] != 0xD801 || UOut[1] != 0x1B || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 5)
5321          {
5322             log_err("Unexpected results after unpaired surrogate plus valid non-Unichar\n");
5323          }
5324 
5325          /* unpaired surrogate plus chopped non-Unichar */
5326          LIn [0] = (char)0x14;
5327          LIn [1] = (char)0xD8;
5328          LIn [2] = (char)0x01;
5329          LIn [3] = (char)0x0F;
5330 
5331          pLIn = LIn;
5332          errorCode = U_ZERO_ERROR;
5333          pUOut = UOut;
5334 
5335          ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+4),off,TRUE, &errorCode);
5336 
5337          if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 4)
5338          {
5339             log_err("Unexpected results after unpaired surrogate plus chopped non-Unichar\n");
5340          }
5341        }
5342     }
5343    ucnv_close(cnv);  /* final cleanup */
5344 }
5345 
5346 
TestJitterbug255()5347 static void TestJitterbug255()
5348 {
5349     static const uint8_t testBytes[] = { 0x95, 0xcf, 0x8a, 0xb7, 0x0d, 0x0a, 0x00 };
5350     const char *testBuffer = (const char *)testBytes;
5351     const char *testEnd = (const char *)testBytes + sizeof(testBytes);
5352     UErrorCode status = U_ZERO_ERROR;
5353     /*UChar32 result;*/
5354     UConverter *cnv = 0;
5355 
5356     cnv = ucnv_open("shift-jis", &status);
5357     if (U_FAILURE(status) || cnv == 0) {
5358         log_data_err("Failed to open the converter for SJIS.\n");
5359                 return;
5360     }
5361     while (testBuffer != testEnd)
5362     {
5363         /*result = */ucnv_getNextUChar (cnv, &testBuffer, testEnd , &status);
5364         if (U_FAILURE(status))
5365         {
5366             log_err("Failed to convert the next UChar for SJIS.\n");
5367             break;
5368         }
5369     }
5370     ucnv_close(cnv);
5371 }
5372 
TestEBCDICUS4XML()5373 static void TestEBCDICUS4XML()
5374 {
5375     UChar unicodes_x[] = {0x0000, 0x0000, 0x0000, 0x0000};
5376     static const UChar toUnicodeMaps_x[] = {0x000A, 0x000A, 0x000D, 0x0000};
5377     static const char fromUnicodeMaps_x[] = {0x25, 0x25, 0x0D, 0x00};
5378     static const char newLines_x[] = {0x25, 0x15, 0x0D, 0x00};
5379     char target_x[] = {0x00, 0x00, 0x00, 0x00};
5380     UChar *unicodes = unicodes_x;
5381     const UChar *toUnicodeMaps = toUnicodeMaps_x;
5382     char *target = target_x;
5383     const char* fromUnicodeMaps = fromUnicodeMaps_x, *newLines = newLines_x;
5384     UErrorCode status = U_ZERO_ERROR;
5385     UConverter *cnv = 0;
5386 
5387     cnv = ucnv_open("ebcdic-xml-us", &status);
5388     if (U_FAILURE(status) || cnv == 0) {
5389         log_data_err("Failed to open the converter for EBCDIC-XML-US.\n");
5390         return;
5391     }
5392     ucnv_toUnicode(cnv, &unicodes, unicodes+3, (const char**)&newLines, newLines+3, NULL, TRUE, &status);
5393     if (U_FAILURE(status) || memcmp(unicodes_x, toUnicodeMaps, sizeof(UChar)*3) != 0) {
5394         log_err("To Unicode conversion failed in EBCDICUS4XML test. %s\n",
5395             u_errorName(status));
5396         printUSeqErr(unicodes_x, 3);
5397         printUSeqErr(toUnicodeMaps, 3);
5398     }
5399     status = U_ZERO_ERROR;
5400     ucnv_fromUnicode(cnv, &target, target+3, (const UChar**)&toUnicodeMaps, toUnicodeMaps+3, NULL, TRUE, &status);
5401     if (U_FAILURE(status) || memcmp(target_x, fromUnicodeMaps, sizeof(char)*3) != 0) {
5402         log_err("From Unicode conversion failed in EBCDICUS4XML test. %s\n",
5403             u_errorName(status));
5404         printSeqErr((const unsigned char*)target_x, 3);
5405         printSeqErr((const unsigned char*)fromUnicodeMaps, 3);
5406     }
5407     ucnv_close(cnv);
5408 }
5409 #endif /* #if !UCONFIG_NO_LEGACY_COLLATION */
5410 
5411 #if !UCONFIG_NO_COLLATION
5412 
TestJitterbug981()5413 static void TestJitterbug981(){
5414     const UChar* rules;
5415     int32_t rules_length, target_cap, bytes_needed, buff_size;
5416     UErrorCode status = U_ZERO_ERROR;
5417     UConverter *utf8cnv;
5418     UCollator* myCollator;
5419     char *buff;
5420     int numNeeded=0;
5421     utf8cnv = ucnv_open ("utf8", &status);
5422     if(U_FAILURE(status)){
5423         log_err("Could not open UTF-8 converter. Error: %s\n", u_errorName(status));
5424         return;
5425     }
5426     myCollator = ucol_open("zh", &status);
5427     if(U_FAILURE(status)){
5428         log_data_err("Could not open collator for zh locale. Error: %s\n", u_errorName(status));
5429         ucnv_close(utf8cnv);
5430         return;
5431     }
5432 
5433     rules = ucol_getRules(myCollator, &rules_length);
5434     if(rules_length == 0) {
5435         log_data_err("missing zh tailoring rule string\n");
5436         ucol_close(myCollator);
5437         ucnv_close(utf8cnv);
5438         return;
5439     }
5440     buff_size = rules_length * ucnv_getMaxCharSize(utf8cnv);
5441     buff = malloc(buff_size);
5442 
5443     target_cap = 0;
5444     do {
5445         ucnv_reset(utf8cnv);
5446         status = U_ZERO_ERROR;
5447         if(target_cap >= buff_size) {
5448             log_err("wanted %d bytes, only %d available\n", target_cap, buff_size);
5449             break;
5450         }
5451         bytes_needed = ucnv_fromUChars(utf8cnv, buff, target_cap,
5452             rules, rules_length, &status);
5453         target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1;
5454         if(numNeeded!=0 && numNeeded!= bytes_needed){
5455             log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
5456             break;
5457         }
5458         numNeeded = bytes_needed;
5459     } while (status == U_BUFFER_OVERFLOW_ERROR);
5460     ucol_close(myCollator);
5461     ucnv_close(utf8cnv);
5462     free(buff);
5463 }
5464 
5465 #endif
5466 
5467 #if !UCONFIG_NO_LEGACY_CONVERSION
TestJitterbug1293()5468 static void TestJitterbug1293(){
5469     static const UChar src[] = {0x30DE, 0x30A4, 0x5E83, 0x544A, 0x30BF, 0x30A4, 0x30D7,0x000};
5470     char target[256];
5471     UErrorCode status = U_ZERO_ERROR;
5472     UConverter* conv=NULL;
5473     int32_t target_cap, bytes_needed, numNeeded = 0;
5474     conv = ucnv_open("shift-jis",&status);
5475     if(U_FAILURE(status)){
5476       log_data_err("Could not open Shift-Jis converter. Error: %s", u_errorName(status));
5477       return;
5478     }
5479 
5480     do{
5481         target_cap =0;
5482         bytes_needed = ucnv_fromUChars(conv,target,256,src,u_strlen(src),&status);
5483         target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1;
5484         if(numNeeded!=0 && numNeeded!= bytes_needed){
5485           log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
5486         }
5487         numNeeded = bytes_needed;
5488     } while (status == U_BUFFER_OVERFLOW_ERROR);
5489     if(U_FAILURE(status)){
5490       log_err("An error occured in ucnv_fromUChars. Error: %s", u_errorName(status));
5491       return;
5492     }
5493     ucnv_close(conv);
5494 }
5495 #endif
5496 
TestJB5275_1()5497 static void TestJB5275_1(){
5498 
5499     static const char* data = "\x3B\xB3\x0A" /* Easy characters */
5500                                 "\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test */
5501                                 /* Switch script: */
5502                                 "\xEF\x43\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Bengali test */
5503                                 "\x3B\xB3\x0A" /* Easy characters - new line, so should default!*/
5504                                 "\xEF\x40\x3B\xB3\x0A";
5505     static const UChar expected[] ={
5506             0x003b, 0x0a15, 0x000a, /* Easy characters */
5507             0x0a5c, 0x0a4d, 0x0a39, 0x0a5c, 0x0a4d, 0x0a39, 0x000a, /* Gurmukhi test */
5508             0x09dd, 0x09dc, 0x09cd, 0x09b9, 0x000a, /* Switch script: to Bengali*/
5509             0x003b, 0x0a15, 0x000a, /* Easy characters - new line, so should default!*/
5510             0x003b, 0x0a15, 0x000a /* Back to Gurmukhi*/
5511     };
5512 
5513     UErrorCode status = U_ZERO_ERROR;
5514     UConverter* conv = ucnv_open("iscii-gur", &status);
5515     UChar dest[100] = {'\0'};
5516     UChar* target = dest;
5517     UChar* targetLimit = dest+100;
5518     const char* source = data;
5519     const char* sourceLimit = data+strlen(data);
5520     const UChar* exp = expected;
5521 
5522     if (U_FAILURE(status)) {
5523         log_data_err("Unable to open converter: iscii-gur got errorCode: %s\n", u_errorName(status));
5524         return;
5525     }
5526 
5527     log_verbose("Testing switching back to default script when new line is encountered.\n");
5528     ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &status);
5529     if(U_FAILURE(status)){
5530         log_err("conversion failed: %s \n", u_errorName(status));
5531     }
5532     targetLimit = target;
5533     target = dest;
5534     printUSeq(target, (int)(targetLimit-target));
5535     while(target<targetLimit){
5536         if(*exp!=*target){
5537             log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp, *target);
5538         }
5539         target++;
5540         exp++;
5541     }
5542     ucnv_close(conv);
5543 }
5544 
TestJB5275()5545 static void TestJB5275(){
5546     static const char* data =
5547     /* "\xEF\x42\xEF\x41\xA4\xD5\xE5\xB3\xEA\x0A"  unsupported sequence \xEF\x41 */
5548     /* "\xEF\x42\xEF\x41\xD4\xDA\xB3\xE8\xEA\x0A"  unsupported sequence \xEF\x41  */
5549     /* "\xEF\x44\xEF\x41\xC8\xE1\x8B\xDB\xB3\xE8 \xB3\xE4\xC1\xE8\x0A"  unsupported sequence \xEF\x41 */
5550         "\xEF\x4B\xC0\xE9\xBF\xE9\xE8\xD8\x0A"  /* Gurmukhi test */
5551         "\xEF\x4A\xC0\xD4\xBF\xD4\xE8\xD8\x0A"  /* Gujarati test */
5552         "\xEF\x48\x38\xB3\x0A"  /* Kannada test */
5553         "\xEF\x49\x39\xB3\x0A"  /* Malayalam test */
5554         "\xEF\x4A\x3A\xB3\x0A"  /* Gujarati test */
5555         "\xEF\x4B\x3B\xB3\x0A"  /* Punjabi test */
5556         /* "\xEF\x4C\x3C\xB3\x0A"  unsupported sequence \xEF\x41 */;
5557     static const UChar expected[] ={
5558         0x0A5C, 0x0A4D, 0x0A39, 0x0A5C, 0x0A4D, 0x0A39, 0x000A, /* Gurmukhi test */
5559         0x0AA2, 0x0AB5, 0x0AA1, 0x0AB5, 0x0ACD, 0x0AB9, 0x000A,     /* Gujarati test */
5560         0x0038, 0x0C95, 0x000A, /* Kannada test */
5561         0x0039, 0x0D15, 0x000A, /* Malayalam test */
5562         0x003A, 0x0A95, 0x000A, /* Gujarati test */
5563         0x003B, 0x0A15, 0x000A, /* Punjabi test */
5564     };
5565 
5566     UErrorCode status = U_ZERO_ERROR;
5567     UConverter* conv = ucnv_open("iscii", &status);
5568     UChar dest[100] = {'\0'};
5569     UChar* target = dest;
5570     UChar* targetLimit = dest+100;
5571     const char* source = data;
5572     const char* sourceLimit = data+strlen(data);
5573     const UChar* exp = expected;
5574     ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &status);
5575     if(U_FAILURE(status)){
5576         log_data_err("conversion failed: %s \n", u_errorName(status));
5577     }
5578     targetLimit = target;
5579     target = dest;
5580 
5581     printUSeq(target, (int)(targetLimit-target));
5582 
5583     while(target<targetLimit){
5584         if(*exp!=*target){
5585             log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp, *target);
5586         }
5587         target++;
5588         exp++;
5589     }
5590     ucnv_close(conv);
5591 }
5592 
5593 static void
TestIsFixedWidth()5594 TestIsFixedWidth() {
5595     UErrorCode status = U_ZERO_ERROR;
5596     UConverter *cnv = NULL;
5597     int32_t i;
5598 
5599     const char *fixedWidth[] = {
5600             "US-ASCII",
5601             "UTF32",
5602             "ibm-5478_P100-1995"
5603     };
5604 
5605     const char *notFixedWidth[] = {
5606             "GB18030",
5607             "UTF8",
5608             "windows-949-2000",
5609             "UTF16"
5610     };
5611 
5612     for (i = 0; i < UPRV_LENGTHOF(fixedWidth); i++) {
5613         cnv = ucnv_open(fixedWidth[i], &status);
5614         if (cnv == NULL || U_FAILURE(status)) {
5615             log_data_err("Error open converter: %s - %s \n", fixedWidth[i], u_errorName(status));
5616             continue;
5617         }
5618 
5619         if (!ucnv_isFixedWidth(cnv, &status)) {
5620             log_err("%s is a fixedWidth converter but returned FALSE.\n", fixedWidth[i]);
5621         }
5622         ucnv_close(cnv);
5623     }
5624 
5625     for (i = 0; i < UPRV_LENGTHOF(notFixedWidth); i++) {
5626         cnv = ucnv_open(notFixedWidth[i], &status);
5627         if (cnv == NULL || U_FAILURE(status)) {
5628             log_data_err("Error open converter: %s - %s \n", notFixedWidth[i], u_errorName(status));
5629             continue;
5630         }
5631 
5632         if (ucnv_isFixedWidth(cnv, &status)) {
5633             log_err("%s is NOT a fixedWidth converter but returned TRUE.\n", notFixedWidth[i]);
5634         }
5635         ucnv_close(cnv);
5636     }
5637 }
5638