• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4  * COPYRIGHT:
5  * Copyright (c) 1997-2016, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  ********************************************************************/
8 /*******************************************************************************
9 *
10 * File nucnvtst.c
11 *
12 * Modification History:
13 *        Name                     Description
14 *    Steven R. Loomis     7/8/1999      Adding input buffer test
15 ********************************************************************************
16 */
17 #include <stdio.h>
18 #include "cstring.h"
19 #include "unicode/uloc.h"
20 #include "unicode/ucnv.h"
21 #include "unicode/ucnv_err.h"
22 #include "unicode/ucnv_cb.h"
23 #include "cintltst.h"
24 #include "unicode/utypes.h"
25 #include "unicode/ustring.h"
26 #include "unicode/ucol.h"
27 #include "unicode/utf16.h"
28 #include "cmemory.h"
29 #include "nucnvtst.h"
30 
31 static void TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message);
32 static void TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message);
33 #if !UCONFIG_NO_COLLATION
34 static void TestJitterbug981(void);
35 #endif
36 #if !UCONFIG_NO_LEGACY_CONVERSION
37 static void TestJitterbug1293(void);
38 #endif
39 static void TestNewConvertWithBufferSizes(int32_t osize, int32_t isize) ;
40 static void TestConverterTypesAndStarters(void);
41 static void TestAmbiguous(void);
42 static void TestSignatureDetection(void);
43 static void TestUTF7(void);
44 static void TestIMAP(void);
45 static void TestUTF8(void);
46 static void TestCESU8(void);
47 static void TestUTF16(void);
48 static void TestUTF16BE(void);
49 static void TestUTF16LE(void);
50 static void TestUTF32(void);
51 static void TestUTF32BE(void);
52 static void TestUTF32LE(void);
53 static void TestLATIN1(void);
54 
55 #if !UCONFIG_NO_LEGACY_CONVERSION
56 static void TestSBCS(void);
57 static void TestDBCS(void);
58 static void TestMBCS(void);
59 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
60 static void TestICCRunout(void);
61 #endif
62 
63 #ifdef U_ENABLE_GENERIC_ISO_2022
64 static void TestISO_2022(void);
65 #endif
66 
67 static void TestISO_2022_JP(void);
68 static void TestISO_2022_JP_1(void);
69 static void TestISO_2022_JP_2(void);
70 static void TestISO_2022_KR(void);
71 static void TestISO_2022_KR_1(void);
72 static void TestISO_2022_CN(void);
73 #if 0
74    /*
75     * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
76     */
77 static void TestISO_2022_CN_EXT(void);
78 #endif
79 static void TestJIS(void);
80 static void TestHZ(void);
81 #endif
82 
83 static void TestSCSU(void);
84 
85 #if !UCONFIG_NO_LEGACY_CONVERSION
86 static void TestEBCDIC_STATEFUL(void);
87 static void TestGB18030(void);
88 static void TestLMBCS(void);
89 static void TestJitterbug255(void);
90 static void TestEBCDICUS4XML(void);
91 #if 0
92    /*
93     * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
94     */
95 static void TestJitterbug915(void);
96 #endif
97 static void TestISCII(void);
98 
99 static void TestCoverageMBCS(void);
100 static void TestJitterbug2346(void);
101 static void TestJitterbug2411(void);
102 static void TestJB5275(void);
103 static void TestJB5275_1(void);
104 static void TestJitterbug6175(void);
105 
106 static void TestIsFixedWidth(void);
107 #endif
108 
109 static void TestInBufSizes(void);
110 
111 static void TestRoundTrippingAllUTF(void);
112 static void TestConv(const uint16_t in[],
113                      int len,
114                      const char* conv,
115                      const char* lang,
116                      char byteArr[],
117                      int byteArrLen);
118 
119 /* open a converter, using test data if it begins with '@' */
120 static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err);
121 
122 
123 #define NEW_MAX_BUFFER 999
124 
125 static int32_t  gInBufferSize = NEW_MAX_BUFFER;
126 static int32_t  gOutBufferSize = NEW_MAX_BUFFER;
127 static char     gNuConvTestName[1024];
128 
129 #define nct_min(x,y)  ((x<y) ? x : y)
130 
my_ucnv_open(const char * cnv,UErrorCode * err)131 static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err)
132 {
133   if(cnv && cnv[0] == '@') {
134     return ucnv_openPackage(loadTestData(err), cnv+1, err);
135   } else {
136     return ucnv_open(cnv, err);
137   }
138 }
139 
printSeq(const unsigned char * a,int len)140 static void printSeq(const unsigned char* a, int len)
141 {
142     int i=0;
143     log_verbose("{");
144     while (i<len)
145         log_verbose("0x%02x ", a[i++]);
146     log_verbose("}\n");
147 }
148 
printUSeq(const UChar * a,int len)149 static void printUSeq(const UChar* a, int len)
150 {
151     int i=0;
152     log_verbose("{U+");
153     while (i<len) log_verbose("0x%04x ", a[i++]);
154     log_verbose("}\n");
155 }
156 
printSeqErr(const unsigned char * a,int len)157 static void printSeqErr(const unsigned char* a, int len)
158 {
159     int i=0;
160     fprintf(stderr, "{");
161     while (i<len)
162         fprintf(stderr, "0x%02x ", a[i++]);
163     fprintf(stderr, "}\n");
164 }
165 
printUSeqErr(const UChar * a,int len)166 static void printUSeqErr(const UChar* a, int len)
167 {
168     int i=0;
169     fprintf(stderr, "{U+");
170     while (i<len)
171         fprintf(stderr, "0x%04x ", a[i++]);
172     fprintf(stderr,"}\n");
173 }
174 
175 static void
TestNextUChar(UConverter * cnv,const char * source,const char * limit,const int32_t results[],const char * message)176 TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message)
177 {
178      const char* s0;
179      const char* s=(char*)source;
180      const int32_t *r=results;
181      UErrorCode errorCode=U_ZERO_ERROR;
182      UChar32 c;
183 
184      while(s<limit) {
185         s0=s;
186         c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
187         if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
188             break; /* no more significant input */
189         } else if(U_FAILURE(errorCode)) {
190             log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode));
191             break;
192         } else if(
193             /* test the expected number of input bytes only if >=0 */
194             (*r>=0 && (int32_t)(s-s0)!=*r) ||
195             c!=*(r+1)
196         ) {
197             log_err("%s ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
198                 message, c, (s-s0), *(r+1), *r);
199             break;
200         }
201         r+=2;
202     }
203 }
204 
205 static void
TestNextUCharError(UConverter * cnv,const char * source,const char * limit,UErrorCode expected,const char * message)206 TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message)
207 {
208      const char* s=(char*)source;
209      UErrorCode errorCode=U_ZERO_ERROR;
210      uint32_t c;
211      c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
212      if(errorCode != expected){
213         log_err("FAIL: Expected:%s when %s-----Got:%s\n", myErrorName(expected), message, myErrorName(errorCode));
214      }
215      if(c != 0xFFFD && c != 0xffff){
216         log_err("FAIL: Expected return value of 0xfffd or 0xffff when %s-----Got 0x%lx\n", message, c);
217      }
218 
219 }
220 
TestInBufSizes(void)221 static void TestInBufSizes(void)
222 {
223   TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,1);
224 #if 1
225   TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,2);
226   TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,3);
227   TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,4);
228   TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,5);
229   TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,6);
230   TestNewConvertWithBufferSizes(1,1);
231   TestNewConvertWithBufferSizes(2,3);
232   TestNewConvertWithBufferSizes(3,2);
233 #endif
234 }
235 
TestOutBufSizes(void)236 static void TestOutBufSizes(void)
237 {
238 #if 1
239   TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,NEW_MAX_BUFFER);
240   TestNewConvertWithBufferSizes(1,NEW_MAX_BUFFER);
241   TestNewConvertWithBufferSizes(2,NEW_MAX_BUFFER);
242   TestNewConvertWithBufferSizes(3,NEW_MAX_BUFFER);
243   TestNewConvertWithBufferSizes(4,NEW_MAX_BUFFER);
244   TestNewConvertWithBufferSizes(5,NEW_MAX_BUFFER);
245 
246 #endif
247 }
248 
249 
addTestNewConvert(TestNode ** root)250 void addTestNewConvert(TestNode** root)
251 {
252 #if !UCONFIG_NO_FILE_IO
253    addTest(root, &TestInBufSizes, "tsconv/nucnvtst/TestInBufSizes");
254    addTest(root, &TestOutBufSizes, "tsconv/nucnvtst/TestOutBufSizes");
255 #endif
256    addTest(root, &TestConverterTypesAndStarters, "tsconv/nucnvtst/TestConverterTypesAndStarters");
257    addTest(root, &TestAmbiguous, "tsconv/nucnvtst/TestAmbiguous");
258    addTest(root, &TestSignatureDetection, "tsconv/nucnvtst/TestSignatureDetection");
259    addTest(root, &TestUTF7, "tsconv/nucnvtst/TestUTF7");
260    addTest(root, &TestIMAP, "tsconv/nucnvtst/TestIMAP");
261    addTest(root, &TestUTF8, "tsconv/nucnvtst/TestUTF8");
262 
263    /* test ucnv_getNextUChar() for charsets that encode single surrogates with complete byte sequences */
264    addTest(root, &TestCESU8, "tsconv/nucnvtst/TestCESU8");
265    addTest(root, &TestUTF16, "tsconv/nucnvtst/TestUTF16");
266    addTest(root, &TestUTF16BE, "tsconv/nucnvtst/TestUTF16BE");
267    addTest(root, &TestUTF16LE, "tsconv/nucnvtst/TestUTF16LE");
268    addTest(root, &TestUTF32, "tsconv/nucnvtst/TestUTF32");
269    addTest(root, &TestUTF32BE, "tsconv/nucnvtst/TestUTF32BE");
270    addTest(root, &TestUTF32LE, "tsconv/nucnvtst/TestUTF32LE");
271 
272 #if !UCONFIG_NO_LEGACY_CONVERSION
273    addTest(root, &TestLMBCS, "tsconv/nucnvtst/TestLMBCS");
274 #endif
275 
276    addTest(root, &TestLATIN1, "tsconv/nucnvtst/TestLATIN1");
277 
278 #if !UCONFIG_NO_LEGACY_CONVERSION
279    addTest(root, &TestSBCS, "tsconv/nucnvtst/TestSBCS");
280 #if !UCONFIG_NO_FILE_IO
281    addTest(root, &TestDBCS, "tsconv/nucnvtst/TestDBCS");
282    addTest(root, &TestICCRunout, "tsconv/nucnvtst/TestICCRunout");
283 #endif
284    addTest(root, &TestMBCS, "tsconv/nucnvtst/TestMBCS");
285 
286 #ifdef U_ENABLE_GENERIC_ISO_2022
287    addTest(root, &TestISO_2022, "tsconv/nucnvtst/TestISO_2022");
288 #endif
289 
290    addTest(root, &TestISO_2022_JP, "tsconv/nucnvtst/TestISO_2022_JP");
291    addTest(root, &TestJIS, "tsconv/nucnvtst/TestJIS");
292    addTest(root, &TestISO_2022_JP_1, "tsconv/nucnvtst/TestISO_2022_JP_1");
293    // android-changed (no have ISO_2022_JP_2) -- addTest(root, &TestISO_2022_JP_2, "tsconv/nucnvtst/TestISO_2022_JP_2");
294    addTest(root, &TestISO_2022_KR, "tsconv/nucnvtst/TestISO_2022_KR");
295    addTest(root, &TestISO_2022_KR_1, "tsconv/nucnvtst/TestISO_2022_KR_1");
296    // android-changed (no ISO-2022-CN) -- addTest(root, &TestISO_2022_CN, "tsconv/nucnvtst/TestISO_2022_CN");
297    /*
298     * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
299    addTest(root, &TestISO_2022_CN_EXT, "tsconv/nucnvtst/TestISO_2022_CN_EXT");
300    addTest(root, &TestJitterbug915, "tsconv/nucnvtst/TestJitterbug915");
301     */
302    addTest(root, &TestHZ, "tsconv/nucnvtst/TestHZ");
303 #endif
304 
305    addTest(root, &TestSCSU, "tsconv/nucnvtst/TestSCSU");
306 
307 #if !UCONFIG_NO_LEGACY_CONVERSION
308    addTest(root, &TestEBCDIC_STATEFUL, "tsconv/nucnvtst/TestEBCDIC_STATEFUL");
309    addTest(root, &TestGB18030, "tsconv/nucnvtst/TestGB18030");
310    addTest(root, &TestJitterbug255, "tsconv/nucnvtst/TestJitterbug255");
311    addTest(root, &TestEBCDICUS4XML, "tsconv/nucnvtst/TestEBCDICUS4XML");
312    addTest(root, &TestISCII, "tsconv/nucnvtst/TestISCII");
313    addTest(root, &TestJB5275, "tsconv/nucnvtst/TestJB5275");
314    addTest(root, &TestJB5275_1, "tsconv/nucnvtst/TestJB5275_1");
315 #if !UCONFIG_NO_COLLATION
316    addTest(root, &TestJitterbug981, "tsconv/nucnvtst/TestJitterbug981");
317 #endif
318 
319    addTest(root, &TestJitterbug1293, "tsconv/nucnvtst/TestJitterbug1293");
320 #endif
321 
322 
323 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
324    addTest(root, &TestCoverageMBCS, "tsconv/nucnvtst/TestCoverageMBCS");
325 #endif
326 
327    addTest(root, &TestRoundTrippingAllUTF, "tsconv/nucnvtst/TestRoundTrippingAllUTF");
328 
329 #if !UCONFIG_NO_LEGACY_CONVERSION
330    addTest(root, &TestJitterbug2346, "tsconv/nucnvtst/TestJitterbug2346");
331    addTest(root, &TestJitterbug2411, "tsconv/nucnvtst/TestJitterbug2411");
332    // android-removed (no full ISO2022 CJK tables)  -- addTest(root, &TestJitterbug6175, "tsconv/nucnvtst/TestJitterbug6175");
333    addTest(root, &TestIsFixedWidth, "tsconv/nucnvtst/TestIsFixedWidth");
334 #endif
335 }
336 
337 
338 /* Note that this test already makes use of statics, so it's not really
339    multithread safe.
340    This convenience function lets us make the error messages actually useful.
341 */
342 
setNuConvTestName(const char * codepage,const char * direction)343 static void setNuConvTestName(const char *codepage, const char *direction)
344 {
345     sprintf(gNuConvTestName, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]",
346         codepage,
347         direction,
348         (int)gInBufferSize,
349         (int)gOutBufferSize);
350 }
351 
352 typedef enum
353 {
354   TC_OK       = 0,  /* test was OK */
355   TC_MISMATCH = 1,  /* Match failed - err was printed */
356   TC_FAIL     = 2   /* Test failed, don't print an err because it was already printed. */
357 } ETestConvertResult;
358 
359 /* Note: This function uses global variables and it will not do offset
360 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
testConvertFromU(const UChar * source,int sourceLen,const uint8_t * expect,int expectLen,const char * codepage,const int32_t * expectOffsets,UBool useFallback)361 static ETestConvertResult testConvertFromU( const UChar *source, int sourceLen,  const uint8_t *expect, int expectLen,
362                 const char *codepage, const int32_t *expectOffsets , UBool useFallback)
363 {
364     UErrorCode status = U_ZERO_ERROR;
365     UConverter *conv = 0;
366     char    junkout[NEW_MAX_BUFFER]; /* FIX */
367     int32_t    junokout[NEW_MAX_BUFFER]; /* FIX */
368     char *p;
369     const UChar *src;
370     char *end;
371     char *targ;
372     int32_t *offs;
373     int i;
374     int32_t   realBufferSize;
375     char *realBufferEnd;
376     const UChar *realSourceEnd;
377     const UChar *sourceLimit;
378     UBool checkOffsets = TRUE;
379     UBool doFlush;
380 
381     for(i=0;i<NEW_MAX_BUFFER;i++)
382         junkout[i] = (char)0xF0;
383     for(i=0;i<NEW_MAX_BUFFER;i++)
384         junokout[i] = 0xFF;
385 
386     setNuConvTestName(codepage, "FROM");
387 
388     log_verbose("\n=========  %s\n", gNuConvTestName);
389 
390     conv = my_ucnv_open(codepage, &status);
391 
392     if(U_FAILURE(status))
393     {
394         log_data_err("Couldn't open converter %s\n",codepage);
395         return TC_FAIL;
396     }
397     if(useFallback){
398         ucnv_setFallback(conv,useFallback);
399     }
400 
401     log_verbose("Converter opened..\n");
402 
403     src = source;
404     targ = junkout;
405     offs = junokout;
406 
407     realBufferSize = UPRV_LENGTHOF(junkout);
408     realBufferEnd = junkout + realBufferSize;
409     realSourceEnd = source + sourceLen;
410 
411     if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER )
412         checkOffsets = FALSE;
413 
414     do
415     {
416       end = nct_min(targ + gOutBufferSize, realBufferEnd);
417       sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
418 
419       doFlush = (UBool)(sourceLimit == realSourceEnd);
420 
421       if(targ == realBufferEnd) {
422         log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
423         return TC_FAIL;
424       }
425       log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx  TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE");
426 
427 
428       status = U_ZERO_ERROR;
429 
430       ucnv_fromUnicode (conv,
431                         &targ,
432                         end,
433                         &src,
434                         sourceLimit,
435                         checkOffsets ? offs : NULL,
436                         doFlush, /* flush if we're at the end of the input data */
437                         &status);
438     } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && sourceLimit < realSourceEnd) );
439 
440     if(U_FAILURE(status)) {
441       log_err("Problem doing fromUnicode to %s, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
442       return TC_FAIL;
443     }
444 
445     log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
446                 sourceLen, targ-junkout);
447 
448     if(getTestOption(VERBOSITY_OPTION))
449     {
450       char junk[9999];
451       char offset_str[9999];
452       char *ptr;
453 
454       junk[0] = 0;
455       offset_str[0] = 0;
456       for(ptr = junkout;ptr<targ;ptr++) {
457         sprintf(junk + strlen(junk), "0x%02x, ", (int)(0xFF & *ptr));
458         sprintf(offset_str + strlen(offset_str), "0x%02x, ", (int)(0xFF & junokout[ptr-junkout]));
459       }
460 
461       log_verbose(junk);
462       printSeq((const uint8_t *)expect, expectLen);
463       if ( checkOffsets ) {
464         log_verbose("\nOffsets:");
465         log_verbose(offset_str);
466       }
467       log_verbose("\n");
468     }
469     ucnv_close(conv);
470 
471     if(expectLen != targ-junkout) {
472       log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
473       log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
474       fprintf(stderr, "Got:\n");
475       printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
476       fprintf(stderr, "Expected:\n");
477       printSeqErr((const unsigned char*)expect, expectLen);
478       return TC_MISMATCH;
479     }
480 
481     if (checkOffsets && (expectOffsets != 0) ) {
482       log_verbose("comparing %d offsets..\n", targ-junkout);
483       if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
484         log_err("did not get the expected offsets. %s\n", gNuConvTestName);
485         printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
486         log_err("\n");
487         log_err("Got  :     ");
488         for(p=junkout;p<targ;p++) {
489           log_err("%d,", junokout[p-junkout]);
490         }
491         log_err("\n");
492         log_err("Expected:  ");
493         for(i=0; i<(targ-junkout); i++) {
494           log_err("%d,", expectOffsets[i]);
495         }
496         log_err("\n");
497       }
498     }
499 
500     log_verbose("comparing..\n");
501     if(!memcmp(junkout, expect, expectLen)) {
502       log_verbose("Matches!\n");
503       return TC_OK;
504     } else {
505       log_err("String does not match u->%s\n", gNuConvTestName);
506       printUSeqErr(source, sourceLen);
507       fprintf(stderr, "Got:\n");
508       printSeqErr((const unsigned char *)junkout, expectLen);
509       fprintf(stderr, "Expected:\n");
510       printSeqErr((const unsigned char *)expect, expectLen);
511 
512       return TC_MISMATCH;
513     }
514 }
515 
516 /* Note: This function uses global variables and it will not do offset
517 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
testConvertToU(const uint8_t * source,int sourcelen,const UChar * expect,int expectlen,const char * codepage,const int32_t * expectOffsets,UBool useFallback)518 static ETestConvertResult testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
519                                           const char *codepage, const int32_t *expectOffsets, UBool useFallback)
520 {
521     UErrorCode status = U_ZERO_ERROR;
522     UConverter *conv = 0;
523     UChar    junkout[NEW_MAX_BUFFER]; /* FIX */
524     int32_t    junokout[NEW_MAX_BUFFER]; /* FIX */
525     const char *src;
526     const char *realSourceEnd;
527     const char *srcLimit;
528     UChar *p;
529     UChar *targ;
530     UChar *end;
531     int32_t *offs;
532     int i;
533     UBool   checkOffsets = TRUE;
534 
535     int32_t   realBufferSize;
536     UChar *realBufferEnd;
537 
538 
539     for(i=0;i<NEW_MAX_BUFFER;i++)
540         junkout[i] = 0xFFFE;
541 
542     for(i=0;i<NEW_MAX_BUFFER;i++)
543         junokout[i] = -1;
544 
545     setNuConvTestName(codepage, "TO");
546 
547     log_verbose("\n=========  %s\n", gNuConvTestName);
548 
549     conv = my_ucnv_open(codepage, &status);
550 
551     if(U_FAILURE(status))
552     {
553         log_data_err("Couldn't open converter %s\n",gNuConvTestName);
554         return TC_FAIL;
555     }
556     if(useFallback){
557         ucnv_setFallback(conv,useFallback);
558     }
559     log_verbose("Converter opened..\n");
560 
561     src = (const char *)source;
562     targ = junkout;
563     offs = junokout;
564 
565     realBufferSize = UPRV_LENGTHOF(junkout);
566     realBufferEnd = junkout + realBufferSize;
567     realSourceEnd = src + sourcelen;
568 
569     if ( gOutBufferSize != realBufferSize ||  gInBufferSize != NEW_MAX_BUFFER )
570         checkOffsets = FALSE;
571 
572     do
573     {
574         end = nct_min( targ + gOutBufferSize, realBufferEnd);
575         srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
576 
577         if(targ == realBufferEnd)
578         {
579             log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjet=%08lx %s",targ,gNuConvTestName);
580             return TC_FAIL;
581         }
582         log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
583 
584         /* oldTarg = targ; */
585 
586         status = U_ZERO_ERROR;
587 
588         ucnv_toUnicode (conv,
589                 &targ,
590                 end,
591                 &src,
592                 srcLimit,
593                 checkOffsets ? offs : NULL,
594                 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of hte source data */
595                 &status);
596 
597         /*        offs += (targ-oldTarg); */
598 
599       } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */
600 
601     if(U_FAILURE(status))
602     {
603         log_err("Problem doing %s toUnicode, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
604         return TC_FAIL;
605     }
606 
607     log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
608         sourcelen, targ-junkout);
609     if(getTestOption(VERBOSITY_OPTION))
610     {
611         char junk[9999];
612         char offset_str[9999];
613         UChar *ptr;
614 
615         junk[0] = 0;
616         offset_str[0] = 0;
617 
618         for(ptr = junkout;ptr<targ;ptr++)
619         {
620             sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*ptr);
621             sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[ptr-junkout]);
622         }
623 
624         log_verbose(junk);
625         printUSeq(expect, expectlen);
626         if ( checkOffsets )
627           {
628             log_verbose("\nOffsets:");
629             log_verbose(offset_str);
630           }
631         log_verbose("\n");
632     }
633     ucnv_close(conv);
634 
635     log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
636 
637     if (checkOffsets && (expectOffsets != 0))
638     {
639         if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))){
640             log_err("did not get the expected offsets. %s\n",gNuConvTestName);
641             log_err("Got:      ");
642             for(p=junkout;p<targ;p++) {
643                 log_err("%d,", junokout[p-junkout]);
644             }
645             log_err("\n");
646             log_err("Expected: ");
647             for(i=0; i<(targ-junkout); i++) {
648                 log_err("%d,", expectOffsets[i]);
649             }
650             log_err("\n");
651             log_err("output:   ");
652             for(i=0; i<(targ-junkout); i++) {
653                 log_err("%X,", junkout[i]);
654             }
655             log_err("\n");
656             log_err("input:    ");
657             for(i=0; i<(src-(const char *)source); i++) {
658                 log_err("%X,", (unsigned char)source[i]);
659             }
660             log_err("\n");
661         }
662     }
663 
664     if(!memcmp(junkout, expect, expectlen*2))
665     {
666         log_verbose("Matches!\n");
667         return TC_OK;
668     }
669     else
670     {
671         log_err("String does not match. %s\n", gNuConvTestName);
672         log_verbose("String does not match. %s\n", gNuConvTestName);
673         printf("\nGot:");
674         printUSeqErr(junkout, expectlen);
675         printf("\nExpected:");
676         printUSeqErr(expect, expectlen);
677         return TC_MISMATCH;
678     }
679 }
680 
681 
TestNewConvertWithBufferSizes(int32_t outsize,int32_t insize)682 static void TestNewConvertWithBufferSizes(int32_t outsize, int32_t insize )
683 {
684 /** test chars #1 */
685     /*  1 2 3  1Han 2Han 3Han .  */
686     static const UChar   sampleText[] =
687      { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xD840, 0xDC21 };
688     static const UChar sampleTextRoundTripUnmappable[] =
689     { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xfffd };
690 
691 
692     static const uint8_t expectedUTF8[] =
693      { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E, 0xf0, 0xa0, 0x80, 0xa1 };
694     static const int32_t toUTF8Offs[] =
695      { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07, 0x08, 0x08, 0x08, 0x08 };
696     static const int32_t fmUTF8Offs[] =
697      { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d, 0x000e, 0x000e };
698 
699 #ifdef U_ENABLE_GENERIC_ISO_2022
700     /* Same as UTF8, but with ^[%B preceeding */
701     static const const uint8_t expectedISO2022[] =
702      { 0x1b, 0x25, 0x42, 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E };
703     static const int32_t toISO2022Offs[]     =
704      { -1, -1, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x04,
705        0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07 }; /* right? */
706     static const int32_t fmISO2022Offs[] =
707      { 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x000a, 0x000d, 0x0010 }; /* is this right? */
708 #endif
709 
710     /*  1 2 3 0, <SO> h1 h2 h3 <SI> . EBCDIC_STATEFUL */
711     static const uint8_t expectedIBM930[] =
712      { 0xF1, 0xF2, 0xF3, 0x00, 0x0E, 0x45, 0x41, 0x45, 0x42, 0x45, 0x43, 0x0F, 0x4B, 0x0e, 0xfe, 0xfe, 0x0f };
713     static const int32_t toIBM930Offs[] =
714      { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, -1 };
715     static const int32_t fmIBM930Offs[] =
716      { 0x0000, 0x0001, 0x0002, 0x0003, 0x0005, 0x0007, 0x0009, 0x000c, 0x000e };
717 
718     /* 1 2 3 0 h1 h2 h3 . MBCS*/
719     static const uint8_t expectedIBM943[] =
720      {  0x31, 0x32, 0x33, 0x00, 0x88, 0xea, 0x93, 0xf1, 0x8e, 0x4f, 0x2e, 0xfc, 0xfc };
721     static const int32_t toIBM943Offs    [] =
722      {  0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x08, 0x08 };
723     static const int32_t fmIBM943Offs[] =
724      { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0006, 0x0008, 0x000a, 0x000b };
725 
726     /* 1 2 3 0 h1 h2 h3 . DBCS*/
727     static const uint8_t expectedIBM9027[] =
728      {  0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0x4c, 0x41, 0x4c, 0x48, 0x4c, 0x55, 0xfe, 0xfe, 0xfe, 0xfe };
729     static const int32_t toIBM9027Offs    [] =
730      {  0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08 };
731 
732      /* 1 2 3 0 <?> <?> <?> . SBCS*/
733     static const uint8_t expectedIBM920[] =
734      {  0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2e, 0x1a };
735     static const int32_t toIBM920Offs    [] =
736      {  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
737 
738     /* 1 2 3 0 <?> <?> <?> . SBCS*/
739     static const uint8_t expectedISO88593[] =
740      { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a };
741     static const int32_t toISO88593Offs[]     =
742      { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
743 
744     /* 1 2 3 0 <?> <?> <?> . <?> LATIN_1*/
745     static const uint8_t expectedLATIN1[] =
746      { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a };
747     static const int32_t toLATIN1Offs[]     =
748      { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
749 
750 
751     /*  etc */
752     static const uint8_t expectedUTF16BE[] =
753      { 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x4e, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x00, 0x2e, 0xd8, 0x40, 0xdc, 0x21 };
754     static const int32_t toUTF16BEOffs[]=
755      { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 };
756     static const int32_t fmUTF16BEOffs[] =
757      { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c,  0x000e, 0x0010, 0x0010 };
758 
759     static const uint8_t expectedUTF16LE[] =
760      { 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x4e, 0x2e, 0x00, 0x40, 0xd8, 0x21, 0xdc };
761     static const int32_t toUTF16LEOffs[]=
762      { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06,  0x07, 0x07, 0x08, 0x08, 0x08, 0x08 };
763     static const int32_t fmUTF16LEOffs[] =
764      { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0x0010 };
765 
766     static const uint8_t expectedUTF32BE[] =
767      { 0x00, 0x00, 0x00, 0x31,
768        0x00, 0x00, 0x00, 0x32,
769        0x00, 0x00, 0x00, 0x33,
770        0x00, 0x00, 0x00, 0x00,
771        0x00, 0x00, 0x4e, 0x00,
772        0x00, 0x00, 0x4e, 0x8c,
773        0x00, 0x00, 0x4e, 0x09,
774        0x00, 0x00, 0x00, 0x2e,
775        0x00, 0x02, 0x00, 0x21 };
776     static const int32_t toUTF32BEOffs[]=
777      { 0x00, 0x00, 0x00, 0x00,
778        0x01, 0x01, 0x01, 0x01,
779        0x02, 0x02, 0x02, 0x02,
780        0x03, 0x03, 0x03, 0x03,
781        0x04, 0x04, 0x04, 0x04,
782        0x05, 0x05, 0x05, 0x05,
783        0x06, 0x06, 0x06, 0x06,
784        0x07, 0x07, 0x07, 0x07,
785        0x08, 0x08, 0x08, 0x08,
786        0x08, 0x08, 0x08, 0x08 };
787     static const int32_t fmUTF32BEOffs[] =
788      { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018,  0x001c, 0x0020, 0x0020 };
789 
790     static const uint8_t expectedUTF32LE[] =
791      { 0x31, 0x00, 0x00, 0x00,
792        0x32, 0x00, 0x00, 0x00,
793        0x33, 0x00, 0x00, 0x00,
794        0x00, 0x00, 0x00, 0x00,
795        0x00, 0x4e, 0x00, 0x00,
796        0x8c, 0x4e, 0x00, 0x00,
797        0x09, 0x4e, 0x00, 0x00,
798        0x2e, 0x00, 0x00, 0x00,
799        0x21, 0x00, 0x02, 0x00 };
800     static const int32_t toUTF32LEOffs[]=
801      { 0x00, 0x00, 0x00, 0x00,
802        0x01, 0x01, 0x01, 0x01,
803        0x02, 0x02, 0x02, 0x02,
804        0x03, 0x03, 0x03, 0x03,
805        0x04, 0x04, 0x04, 0x04,
806        0x05, 0x05, 0x05, 0x05,
807        0x06, 0x06, 0x06, 0x06,
808        0x07, 0x07, 0x07, 0x07,
809        0x08, 0x08, 0x08, 0x08,
810        0x08, 0x08, 0x08, 0x08 };
811     static const int32_t fmUTF32LEOffs[] =
812      { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0x0020 };
813 
814 
815 
816 
817 /** Test chars #2 **/
818 
819     /* Sahha [health],  slashed h's */
820     static const UChar malteseUChars[] = { 0x0053, 0x0061, 0x0127, 0x0127, 0x0061 };
821     static const uint8_t expectedMaltese913[] = { 0x53, 0x61, 0xB1, 0xB1, 0x61 };
822 
823     /* LMBCS */
824     static const UChar LMBCSUChars[]     = { 0x0027, 0x010A, 0x0000, 0x0127, 0x2666, 0x0220 };
825     static const uint8_t expectedLMBCS[] = { 0x27, 0x06, 0x04, 0x00, 0x01, 0x73, 0x01, 0x04, 0x14, 0x02, 0x20 };
826     static const int32_t toLMBCSOffs[]   = { 0x00, 0x01, 0x01, 0x02, 0x03, 0x03, 0x04, 0x04 , 0x05, 0x05, 0x05 };
827     static const int32_t fmLMBCSOffs[]   = { 0x0000, 0x0001, 0x0003, 0x0004, 0x0006, 0x0008};
828     /*********************************** START OF CODE finally *************/
829 
830     gInBufferSize = insize;
831     gOutBufferSize = outsize;
832 
833     log_verbose("\n\n\nTesting conversions with InputBufferSize = %d, OutputBufferSize = %d\n", gInBufferSize, gOutBufferSize);
834 
835 
836     /*UTF-8*/
837     testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
838         expectedUTF8, sizeof(expectedUTF8), "UTF8", toUTF8Offs,FALSE );
839 
840     log_verbose("Test surrogate behaviour for UTF8\n");
841     {
842         static const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01 };
843         static const uint8_t expectedUTF8test2[]= { 0xe2, 0x82, 0xac,
844                            0xf0, 0x90, 0x90, 0x81,
845                            0xef, 0xbf, 0xbd
846         };
847         static const int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3 };
848         testConvertFromU(testinput, UPRV_LENGTHOF(testinput),
849                          expectedUTF8test2, sizeof(expectedUTF8test2), "UTF8", offsets,FALSE );
850 
851 
852     }
853 
854 #if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022)
855     /*ISO-2022*/
856     testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
857         expectedISO2022, sizeof(expectedISO2022), "ISO_2022", toISO2022Offs,FALSE );
858 #endif
859 
860     /*UTF16 LE*/
861     testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
862         expectedUTF16LE, sizeof(expectedUTF16LE), "utf-16le", toUTF16LEOffs,FALSE );
863     /*UTF16 BE*/
864     testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
865         expectedUTF16BE, sizeof(expectedUTF16BE), "utf-16be", toUTF16BEOffs,FALSE );
866     /*UTF32 LE*/
867     testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
868         expectedUTF32LE, sizeof(expectedUTF32LE), "utf-32le", toUTF32LEOffs,FALSE );
869     /*UTF32 BE*/
870     testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
871         expectedUTF32BE, sizeof(expectedUTF32BE), "utf-32be", toUTF32BEOffs,FALSE );
872 
873     /*LATIN_1*/
874     testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
875         expectedLATIN1, sizeof(expectedLATIN1), "LATIN_1", toLATIN1Offs,FALSE );
876 
877 #if !UCONFIG_NO_LEGACY_CONVERSION
878     /*EBCDIC_STATEFUL*/
879     testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
880         expectedIBM930, sizeof(expectedIBM930), "ibm-930", toIBM930Offs,FALSE );
881 
882     testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
883         expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE );
884 
885     /*MBCS*/
886 
887     testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
888         expectedIBM943, sizeof(expectedIBM943), "ibm-943", toIBM943Offs,FALSE );
889     /*DBCS*/
890     testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
891         expectedIBM9027, sizeof(expectedIBM9027), "@ibm9027", toIBM9027Offs,FALSE );
892     /*SBCS*/
893     testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
894         expectedIBM920, sizeof(expectedIBM920), "ibm-920", toIBM920Offs,FALSE );
895     /*SBCS*/
896     testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
897         expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE );
898 #endif
899 
900 
901 /****/
902 
903     /*UTF-8*/
904     testConvertToU(expectedUTF8, sizeof(expectedUTF8),
905         sampleText, UPRV_LENGTHOF(sampleText), "utf8", fmUTF8Offs,FALSE);
906 #if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022)
907     /*ISO-2022*/
908     testConvertToU(expectedISO2022, sizeof(expectedISO2022),
909         sampleText, UPRV_LENGTHOF(sampleText), "ISO_2022", fmISO2022Offs,FALSE);
910 #endif
911 
912     /*UTF16 LE*/
913     testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE),
914         sampleText, UPRV_LENGTHOF(sampleText), "utf-16le", fmUTF16LEOffs,FALSE);
915     /*UTF16 BE*/
916     testConvertToU(expectedUTF16BE, sizeof(expectedUTF16BE),
917         sampleText, UPRV_LENGTHOF(sampleText), "utf-16be", fmUTF16BEOffs,FALSE);
918     /*UTF32 LE*/
919     testConvertToU(expectedUTF32LE, sizeof(expectedUTF32LE),
920         sampleText, UPRV_LENGTHOF(sampleText), "utf-32le", fmUTF32LEOffs,FALSE);
921     /*UTF32 BE*/
922     testConvertToU(expectedUTF32BE, sizeof(expectedUTF32BE),
923         sampleText, UPRV_LENGTHOF(sampleText), "utf-32be", fmUTF32BEOffs,FALSE);
924 
925 #if !UCONFIG_NO_LEGACY_CONVERSION
926     /*EBCDIC_STATEFUL*/
927     testConvertToU(expectedIBM930, sizeof(expectedIBM930), sampleTextRoundTripUnmappable,
928             UPRV_LENGTHOF(sampleTextRoundTripUnmappable), "ibm-930", fmIBM930Offs,FALSE);
929     /*MBCS*/
930     testConvertToU(expectedIBM943, sizeof(expectedIBM943),sampleTextRoundTripUnmappable,
931             UPRV_LENGTHOF(sampleTextRoundTripUnmappable), "ibm-943", fmIBM943Offs,FALSE);
932 #endif
933 
934     /* Try it again to make sure it still works */
935     testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE),
936         sampleText, UPRV_LENGTHOF(sampleText), "utf-16le", fmUTF16LEOffs,FALSE);
937 
938 #if !UCONFIG_NO_LEGACY_CONVERSION
939     testConvertToU(expectedMaltese913, sizeof(expectedMaltese913),
940         malteseUChars, UPRV_LENGTHOF(malteseUChars), "latin3", NULL,FALSE);
941 
942     testConvertFromU(malteseUChars, UPRV_LENGTHOF(malteseUChars),
943         expectedMaltese913, sizeof(expectedMaltese913), "iso-8859-3", NULL,FALSE );
944 
945     /*LMBCS*/
946     testConvertFromU(LMBCSUChars, UPRV_LENGTHOF(LMBCSUChars),
947         expectedLMBCS, sizeof(expectedLMBCS), "LMBCS-1", toLMBCSOffs,FALSE );
948     testConvertToU(expectedLMBCS, sizeof(expectedLMBCS),
949         LMBCSUChars, UPRV_LENGTHOF(LMBCSUChars), "LMBCS-1", fmLMBCSOffs,FALSE);
950 #endif
951 
952     /* UTF-7 examples are mostly from http://www.imc.org/rfc2152 */
953     {
954         /* encode directly set D and set O */
955         static const uint8_t utf7[] = {
956             /*
957                 Hi Mom -+Jjo--!
958                 A+ImIDkQ.
959                 +-
960                 +ZeVnLIqe-
961             */
962             0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
963             0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
964             0x2b, 0x2d,
965             0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d
966         };
967         static const UChar unicode[] = {
968             /*
969                 Hi Mom -<WHITE SMILING FACE>-!
970                 A<NOT IDENTICAL TO><ALPHA>.
971                 +
972                 [Japanese word "nihongo"]
973             */
974             0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
975             0x41, 0x2262, 0x0391, 0x2e,
976             0x2b,
977             0x65e5, 0x672c, 0x8a9e
978         };
979         static const int32_t toUnicodeOffsets[] = {
980             0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
981             15, 17, 19, 23,
982             24,
983             27, 29, 32
984         };
985         static const int32_t fromUnicodeOffsets[] = {
986             0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
987             11, 12, 12, 12, 13, 13, 13, 13, 14,
988             15, 15,
989             16, 16, 16, 17, 17, 17, 18, 18, 18, 18
990         };
991 
992         /* same but escaping set O (the exclamation mark) */
993         static const uint8_t utf7Restricted[] = {
994             /*
995                 Hi Mom -+Jjo--+ACE-
996                 A+ImIDkQ.
997                 +-
998                 +ZeVnLIqe-
999             */
1000             0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x2b, 0x41, 0x43, 0x45, 0x2d,
1001             0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
1002             0x2b, 0x2d,
1003             0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d
1004         };
1005         static const int32_t toUnicodeOffsetsR[] = {
1006             0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 15,
1007             19, 21, 23, 27,
1008             28,
1009             31, 33, 36
1010         };
1011         static const int32_t fromUnicodeOffsetsR[] = {
1012             0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 10, 10, 10, 10,
1013             11, 12, 12, 12, 13, 13, 13, 13, 14,
1014             15, 15,
1015             16, 16, 16, 17, 17, 17, 18, 18, 18, 18
1016         };
1017 
1018         testConvertFromU(unicode, UPRV_LENGTHOF(unicode), utf7, sizeof(utf7), "UTF-7", fromUnicodeOffsets,FALSE);
1019 
1020         testConvertToU(utf7, sizeof(utf7), unicode, UPRV_LENGTHOF(unicode), "UTF-7", toUnicodeOffsets,FALSE);
1021 
1022         testConvertFromU(unicode, UPRV_LENGTHOF(unicode), utf7Restricted, sizeof(utf7Restricted), "UTF-7,version=1", fromUnicodeOffsetsR,FALSE);
1023 
1024         testConvertToU(utf7Restricted, sizeof(utf7Restricted), unicode, UPRV_LENGTHOF(unicode), "UTF-7,version=1", toUnicodeOffsetsR,FALSE);
1025     }
1026 
1027     /*
1028      * IMAP-mailbox-name examples are mostly from http://www.imc.org/rfc2152,
1029      * modified according to RFC 2060,
1030      * and supplemented with the one example in RFC 2060 itself.
1031      */
1032     {
1033         static const uint8_t imap[] = {
1034             /*  Hi Mom -&Jjo--!
1035                 A&ImIDkQ-.
1036                 &-
1037                 &ZeVnLIqe-
1038                 \
1039                 ~peter
1040                 /mail
1041                 /&ZeVnLIqe-
1042                 /&U,BTFw-
1043             */
1044             0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x26, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
1045             0x41, 0x26, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2d, 0x2e,
1046             0x26, 0x2d,
1047             0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
1048             0x5c,
1049             0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
1050             0x2f, 0x6d, 0x61, 0x69, 0x6c,
1051             0x2f, 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
1052             0x2f, 0x26, 0x55, 0x2c, 0x42, 0x54, 0x46, 0x77, 0x2d
1053         };
1054         static const UChar unicode[] = {
1055             /*  Hi Mom -<WHITE SMILING FACE>-!
1056                 A<NOT IDENTICAL TO><ALPHA>.
1057                 &
1058                 [Japanese word "nihongo"]
1059                 \
1060                 ~peter
1061                 /mail
1062                 /<65e5, 672c, 8a9e>
1063                 /<53f0, 5317>
1064             */
1065             0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
1066             0x41, 0x2262, 0x0391, 0x2e,
1067             0x26,
1068             0x65e5, 0x672c, 0x8a9e,
1069             0x5c,
1070             0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
1071             0x2f, 0x6d, 0x61, 0x69, 0x6c,
1072             0x2f, 0x65e5, 0x672c, 0x8a9e,
1073             0x2f, 0x53f0, 0x5317
1074         };
1075         static const int32_t toUnicodeOffsets[] = {
1076             0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
1077             15, 17, 19, 24,
1078             25,
1079             28, 30, 33,
1080             37,
1081             38, 39, 40, 41, 42, 43,
1082             44, 45, 46, 47, 48,
1083             49, 51, 53, 56,
1084             60, 62, 64
1085         };
1086         static const int32_t fromUnicodeOffsets[] = {
1087             0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
1088             11, 12, 12, 12, 13, 13, 13, 13, 13, 14,
1089             15, 15,
1090             16, 16, 16, 17, 17, 17, 18, 18, 18, 18,
1091             19,
1092             20, 21, 22, 23, 24, 25,
1093             26, 27, 28, 29, 30,
1094             31, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34,
1095             35, 36, 36, 36, 37, 37, 37, 37, 37
1096         };
1097 
1098         testConvertFromU(unicode, UPRV_LENGTHOF(unicode), imap, sizeof(imap), "IMAP-mailbox-name", fromUnicodeOffsets,FALSE);
1099 
1100         testConvertToU(imap, sizeof(imap), unicode, UPRV_LENGTHOF(unicode), "IMAP-mailbox-name", toUnicodeOffsets,FALSE);
1101     }
1102 
1103     /* Test UTF-8 bad data handling*/
1104     {
1105         static const uint8_t utf8[]={
1106             0x61,
1107             0xf7, 0xbf, 0xbf, 0xbf,         /* > 10FFFF */
1108             0x00,
1109             0x62,
1110             0xfb, 0xbf, 0xbf, 0xbf, 0xbf,   /* > 10FFFF */
1111             0xfb, 0xbf, 0xbf, 0xbf, 0xbf,   /* > 10FFFF */
1112             0xf4, 0x8f, 0xbf, 0xbf,         /* 10FFFF */
1113             0xdf, 0xbf,                     /* 7ff */
1114             0xbf,                           /* truncated tail */
1115             0xf4, 0x90, 0x80, 0x80,         /* 110000 */
1116             0x02
1117         };
1118 
1119         static const uint16_t utf8Expected[]={
1120             0x0061,
1121             0xfffd, 0xfffd, 0xfffd, 0xfffd,
1122             0x0000,
1123             0x0062,
1124             0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
1125             0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
1126             0xdbff, 0xdfff,
1127             0x07ff,
1128             0xfffd,
1129             0xfffd, 0xfffd, 0xfffd, 0xfffd,
1130             0x0002
1131         };
1132 
1133         static const int32_t utf8Offsets[]={
1134             0,
1135             1, 2, 3, 4,
1136             5,
1137             6,
1138             7, 8, 9, 10, 11,
1139             12, 13, 14, 15, 16,
1140             17, 17,
1141             21,
1142             23,
1143             24, 25, 26, 27,
1144             28
1145         };
1146         testConvertToU(utf8, sizeof(utf8),
1147                        utf8Expected, UPRV_LENGTHOF(utf8Expected), "utf-8", utf8Offsets ,FALSE);
1148 
1149     }
1150 
1151     /* Test UTF-32BE bad data handling*/
1152     {
1153         static const uint8_t utf32[]={
1154             0x00, 0x00, 0x00, 0x61,
1155             0x00, 0x11, 0x00, 0x00,         /* 0x110000 out of range */
1156             0x00, 0x10, 0xff, 0xff,         /* 0x10FFFF in range */
1157             0x00, 0x00, 0x00, 0x62,
1158             0xff, 0xff, 0xff, 0xff,         /* 0xffffffff out of range */
1159             0x7f, 0xff, 0xff, 0xff,         /* 0x7fffffff out of range */
1160             0x00, 0x00, 0x01, 0x62,
1161             0x00, 0x00, 0x02, 0x62
1162         };
1163         static const uint16_t utf32Expected[]={
1164             0x0061,
1165             0xfffd,         /* 0x110000 out of range */
1166             0xDBFF,         /* 0x10FFFF in range */
1167             0xDFFF,
1168             0x0062,
1169             0xfffd,         /* 0xffffffff out of range */
1170             0xfffd,         /* 0x7fffffff out of range */
1171             0x0162,
1172             0x0262
1173         };
1174         static const int32_t utf32Offsets[]={
1175             0, 4, 8, 8, 12, 16, 20, 24, 28
1176         };
1177         static const uint8_t utf32ExpectedBack[]={
1178             0x00, 0x00, 0x00, 0x61,
1179             0x00, 0x00, 0xff, 0xfd,         /* 0x110000 out of range */
1180             0x00, 0x10, 0xff, 0xff,         /* 0x10FFFF in range */
1181             0x00, 0x00, 0x00, 0x62,
1182             0x00, 0x00, 0xff, 0xfd,         /* 0xffffffff out of range */
1183             0x00, 0x00, 0xff, 0xfd,         /* 0x7fffffff out of range */
1184             0x00, 0x00, 0x01, 0x62,
1185             0x00, 0x00, 0x02, 0x62
1186         };
1187         static const int32_t utf32OffsetsBack[]={
1188             0,0,0,0,
1189             1,1,1,1,
1190             2,2,2,2,
1191             4,4,4,4,
1192             5,5,5,5,
1193             6,6,6,6,
1194             7,7,7,7,
1195             8,8,8,8
1196         };
1197 
1198         testConvertToU(utf32, sizeof(utf32),
1199                        utf32Expected, UPRV_LENGTHOF(utf32Expected), "utf-32be", utf32Offsets ,FALSE);
1200         testConvertFromU(utf32Expected, UPRV_LENGTHOF(utf32Expected),
1201             utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32be", utf32OffsetsBack, FALSE);
1202     }
1203 
1204     /* Test UTF-32LE bad data handling*/
1205     {
1206         static const uint8_t utf32[]={
1207             0x61, 0x00, 0x00, 0x00,
1208             0x00, 0x00, 0x11, 0x00,         /* 0x110000 out of range */
1209             0xff, 0xff, 0x10, 0x00,         /* 0x10FFFF in range */
1210             0x62, 0x00, 0x00, 0x00,
1211             0xff, 0xff, 0xff, 0xff,         /* 0xffffffff out of range */
1212             0xff, 0xff, 0xff, 0x7f,         /* 0x7fffffff out of range */
1213             0x62, 0x01, 0x00, 0x00,
1214             0x62, 0x02, 0x00, 0x00,
1215         };
1216 
1217         static const uint16_t utf32Expected[]={
1218             0x0061,
1219             0xfffd,         /* 0x110000 out of range */
1220             0xDBFF,         /* 0x10FFFF in range */
1221             0xDFFF,
1222             0x0062,
1223             0xfffd,         /* 0xffffffff out of range */
1224             0xfffd,         /* 0x7fffffff out of range */
1225             0x0162,
1226             0x0262
1227         };
1228         static const int32_t utf32Offsets[]={
1229             0, 4, 8, 8, 12, 16, 20, 24, 28
1230         };
1231         static const uint8_t utf32ExpectedBack[]={
1232             0x61, 0x00, 0x00, 0x00,
1233             0xfd, 0xff, 0x00, 0x00,         /* 0x110000 out of range */
1234             0xff, 0xff, 0x10, 0x00,         /* 0x10FFFF in range */
1235             0x62, 0x00, 0x00, 0x00,
1236             0xfd, 0xff, 0x00, 0x00,         /* 0xffffffff out of range */
1237             0xfd, 0xff, 0x00, 0x00,         /* 0x7fffffff out of range */
1238             0x62, 0x01, 0x00, 0x00,
1239             0x62, 0x02, 0x00, 0x00
1240         };
1241         static const int32_t utf32OffsetsBack[]={
1242             0,0,0,0,
1243             1,1,1,1,
1244             2,2,2,2,
1245             4,4,4,4,
1246             5,5,5,5,
1247             6,6,6,6,
1248             7,7,7,7,
1249             8,8,8,8
1250         };
1251         testConvertToU(utf32, sizeof(utf32),
1252             utf32Expected, UPRV_LENGTHOF(utf32Expected), "utf-32le", utf32Offsets,FALSE );
1253         testConvertFromU(utf32Expected, UPRV_LENGTHOF(utf32Expected),
1254             utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32le", utf32OffsetsBack, FALSE);
1255     }
1256 }
1257 
TestCoverageMBCS()1258 static void TestCoverageMBCS(){
1259 #if 0
1260     UErrorCode status = U_ZERO_ERROR;
1261     const char *directory = loadTestData(&status);
1262     char* tdpath = NULL;
1263     char* saveDirectory = (char*)malloc(sizeof(char) *(strlen(u_getDataDirectory())+1));
1264     int len = strlen(directory);
1265     char* index=NULL;
1266 
1267     tdpath = (char*) malloc(sizeof(char) * (len * 2));
1268     uprv_strcpy(saveDirectory,u_getDataDirectory());
1269     log_verbose("Retrieved data directory %s \n",saveDirectory);
1270     uprv_strcpy(tdpath,directory);
1271     index=strrchr(tdpath,(char)U_FILE_SEP_CHAR);
1272 
1273     if((unsigned int)(index-tdpath) != (strlen(tdpath)-1)){
1274             *(index+1)=0;
1275     }
1276     u_setDataDirectory(tdpath);
1277     log_verbose("ICU data directory is set to: %s \n" ,tdpath);
1278 #endif
1279 
1280     /*some more test to increase the code coverage in MBCS.  Create an test converter from test1.ucm
1281       which is test file for MBCS conversion with single-byte codepage data.*/
1282     {
1283 
1284         /* MBCS with single byte codepage data test1.ucm*/
1285         const UChar unicodeInput[]    = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0x0003};
1286         const uint8_t expectedtest1[] = { 0x00, 0x05, 0xff, 0x07, 0xff,};
1287         int32_t  totest1Offs[]        = { 0, 1, 2, 3, 5, };
1288 
1289         /*from Unicode*/
1290         testConvertFromU(unicodeInput, UPRV_LENGTHOF(unicodeInput),
1291             expectedtest1, sizeof(expectedtest1), "@test1", totest1Offs,FALSE );
1292     }
1293 
1294     /*some more test to increase the code coverage in MBCS.  Create an test converter from test3.ucm
1295       which is test file for MBCS conversion with three-byte codepage data.*/
1296     {
1297 
1298         /* MBCS with three byte codepage data test3.ucm*/
1299         const UChar unicodeInput[]    = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
1300         const uint8_t expectedtest3[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x0b,  0x07,  0x01, 0x02, 0x0a,  0xff,};
1301         int32_t  totest3Offs[]        = { 0, 1, 2, 3, 3, 3, 4, 6, 6, 6, 8};
1302 
1303         const uint8_t test3input[]    = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x0b,  0x07,  0x01, 0x02, 0x0a, 0x01, 0x02, 0x0c,};
1304         const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
1305         int32_t fromtest3Offs[]       = { 0, 1, 2, 3, 6, 6, 7, 7, 10 };
1306 
1307         /*from Unicode*/
1308         testConvertFromU(unicodeInput, UPRV_LENGTHOF(unicodeInput),
1309             expectedtest3, sizeof(expectedtest3), "@test3", totest3Offs,FALSE );
1310 
1311         /*to Unicode*/
1312         testConvertToU(test3input, sizeof(test3input),
1313             expectedUnicode, UPRV_LENGTHOF(expectedUnicode), "@test3", fromtest3Offs ,FALSE);
1314 
1315     }
1316 
1317     /*some more test to increase the code coverage in MBCS.  Create an test converter from test4.ucm
1318       which is test file for MBCS conversion with four-byte codepage data.*/
1319     {
1320 
1321         /* MBCS with three byte codepage data test4.ucm*/
1322         static const UChar unicodeInput[]    = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
1323         static const uint8_t expectedtest4[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x03, 0x0b,  0x07,  0x01, 0x02, 0x03, 0x0a,  0xff,};
1324         static const int32_t totest4Offs[]   = { 0, 1, 2, 3, 3, 3, 3, 4, 6, 6, 6, 6, 8,};
1325 
1326         static const uint8_t test4input[]    = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x03, 0x0b,  0x07,  0x01, 0x02, 0x03, 0x0a, 0x01, 0x02, 0x03, 0x0c,};
1327         static const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
1328         static const int32_t fromtest4Offs[] = { 0, 1, 2, 3, 7, 7, 8, 8, 12,};
1329 
1330         /*from Unicode*/
1331         testConvertFromU(unicodeInput, UPRV_LENGTHOF(unicodeInput),
1332             expectedtest4, sizeof(expectedtest4), "@test4", totest4Offs,FALSE );
1333 
1334         /*to Unicode*/
1335         testConvertToU(test4input, sizeof(test4input),
1336             expectedUnicode, UPRV_LENGTHOF(expectedUnicode), "@test4", fromtest4Offs,FALSE );
1337 
1338     }
1339 #if 0
1340     free(tdpath);
1341     /* restore the original data directory */
1342     log_verbose("Setting the data directory to %s \n", saveDirectory);
1343     u_setDataDirectory(saveDirectory);
1344     free(saveDirectory);
1345 #endif
1346 
1347 }
1348 
TestConverterType(const char * convName,UConverterType convType)1349 static void TestConverterType(const char *convName, UConverterType convType) {
1350     UConverter* myConverter;
1351     UErrorCode err = U_ZERO_ERROR;
1352 
1353     myConverter = my_ucnv_open(convName, &err);
1354 
1355     if (U_FAILURE(err)) {
1356         log_data_err("Failed to create an %s converter\n", convName);
1357         return;
1358     }
1359     else
1360     {
1361         if (ucnv_getType(myConverter)!=convType) {
1362             log_err("ucnv_getType Failed for %s. Got enum value 0x%X\n",
1363                 convName, convType);
1364         }
1365         else {
1366             log_verbose("ucnv_getType %s ok\n", convName);
1367         }
1368     }
1369     ucnv_close(myConverter);
1370 }
1371 
TestConverterTypesAndStarters()1372 static void TestConverterTypesAndStarters()
1373 {
1374 #if !UCONFIG_NO_LEGACY_CONVERSION
1375     UConverter* myConverter;
1376     UErrorCode err = U_ZERO_ERROR;
1377     UBool mystarters[256];
1378 
1379 /*    const UBool expectedKSCstarters[256] = {
1380         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1381         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1382         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1383         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1384         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1385         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1386         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1387         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1388         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1389         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1390         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1391         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1392         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1393         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1394         FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1395         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1396         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1397         TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE,
1398         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1399         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1400         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1401         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1402         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1403         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1404         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1405         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE};*/
1406 
1407 
1408     log_verbose("Testing KSC, ibm-930, ibm-878  for starters and their conversion types.");
1409 
1410     myConverter = ucnv_open("ksc", &err);
1411     if (U_FAILURE(err)) {
1412       log_data_err("Failed to create an ibm-ksc converter\n");
1413       return;
1414     }
1415     else
1416     {
1417         if (ucnv_getType(myConverter)!=UCNV_MBCS)
1418             log_err("ucnv_getType Failed for ibm-949\n");
1419         else
1420             log_verbose("ucnv_getType ibm-949 ok\n");
1421 
1422         if(myConverter!=NULL)
1423             ucnv_getStarters(myConverter, mystarters, &err);
1424 
1425         /*if (memcmp(expectedKSCstarters, mystarters, sizeof(expectedKSCstarters)))
1426           log_err("Failed ucnv_getStarters for ksc\n");
1427           else
1428           log_verbose("ucnv_getStarters ok\n");*/
1429 
1430     }
1431     ucnv_close(myConverter);
1432 
1433     TestConverterType("ibm-930", UCNV_EBCDIC_STATEFUL);
1434     TestConverterType("ibm-878", UCNV_SBCS);
1435 #endif
1436 
1437     TestConverterType("iso-8859-1", UCNV_LATIN_1);
1438 
1439     TestConverterType("ibm-1208", UCNV_UTF8);
1440 
1441     TestConverterType("utf-8", UCNV_UTF8);
1442     TestConverterType("UTF-16BE", UCNV_UTF16_BigEndian);
1443     TestConverterType("UTF-16LE", UCNV_UTF16_LittleEndian);
1444     TestConverterType("UTF-32BE", UCNV_UTF32_BigEndian);
1445     TestConverterType("UTF-32LE", UCNV_UTF32_LittleEndian);
1446 
1447 #if !UCONFIG_NO_LEGACY_CONVERSION
1448 
1449 #if defined(U_ENABLE_GENERIC_ISO_2022)
1450     TestConverterType("iso-2022", UCNV_ISO_2022);
1451 #endif
1452 
1453     TestConverterType("hz", UCNV_HZ);
1454 #endif
1455 
1456     TestConverterType("scsu", UCNV_SCSU);
1457 
1458 #if !UCONFIG_NO_LEGACY_CONVERSION
1459     TestConverterType("x-iscii-de", UCNV_ISCII);
1460 #endif
1461 
1462     TestConverterType("ascii", UCNV_US_ASCII);
1463     TestConverterType("utf-7", UCNV_UTF7);
1464     TestConverterType("IMAP-mailbox-name", UCNV_IMAP_MAILBOX);
1465     TestConverterType("bocu-1", UCNV_BOCU1);
1466 }
1467 
1468 static void
TestAmbiguousConverter(UConverter * cnv)1469 TestAmbiguousConverter(UConverter *cnv) {
1470     static const char inBytes[3]={ 0x61, 0x5B, 0x5c };
1471     UChar outUnicode[20]={ 0, 0, 0, 0 };
1472 
1473     const char *s;
1474     UChar *u;
1475     UErrorCode errorCode;
1476     UBool isAmbiguous;
1477 
1478     /* try to convert an 'a', a square bracket and a US-ASCII backslash */
1479     errorCode=U_ZERO_ERROR;
1480     s=inBytes;
1481     u=outUnicode;
1482     ucnv_toUnicode(cnv, &u, u+20, &s, s+3, NULL, TRUE, &errorCode);
1483     if(U_FAILURE(errorCode)) {
1484         /* we do not care about general failures in this test; the input may just not be mappable */
1485         return;
1486     }
1487 
1488     if(outUnicode[0]!=0x61 || outUnicode[1]!=0x5B || outUnicode[2]==0xfffd) {
1489         /* not a close ASCII-family encoding, or 0x5c is unassigned/illegal: this test is not applicable */
1490         /* There are some encodings that are partially ASCII based,
1491         like the ISO-7 and GSM series of codepages, which we ignore. */
1492         return;
1493     }
1494 
1495     isAmbiguous=ucnv_isAmbiguous(cnv);
1496 
1497     /* check that outUnicode[1]!=0x5c is exactly the same as ucnv_isAmbiguous() */
1498     if((outUnicode[2]!=0x5c)!=isAmbiguous) {
1499         log_err("error: converter \"%s\" needs a backslash fix: %d but ucnv_isAmbiguous()==%d\n",
1500             ucnv_getName(cnv, &errorCode), outUnicode[2]!=0x5c, isAmbiguous);
1501         return;
1502     }
1503 
1504     if(outUnicode[2]!=0x5c) {
1505         /* needs fixup, fix it */
1506         ucnv_fixFileSeparator(cnv, outUnicode, (int32_t)(u-outUnicode));
1507         if(outUnicode[2]!=0x5c) {
1508             /* the fix failed */
1509             log_err("error: ucnv_fixFileSeparator(%s) failed\n", ucnv_getName(cnv, &errorCode));
1510             return;
1511         }
1512     }
1513 }
1514 
TestAmbiguous()1515 static void TestAmbiguous()
1516 {
1517     UErrorCode status = U_ZERO_ERROR;
1518     UConverter *ascii_cnv = 0, *sjis_cnv = 0, *cnv;
1519     static const char target[] = {
1520         /* "\\usr\\local\\share\\data\\icutest.txt" */
1521         0x5c, 0x75, 0x73, 0x72,
1522         0x5c, 0x6c, 0x6f, 0x63, 0x61, 0x6c,
1523         0x5c, 0x73, 0x68, 0x61, 0x72, 0x65,
1524         0x5c, 0x64, 0x61, 0x74, 0x61,
1525         0x5c, 0x69, 0x63, 0x75, 0x74, 0x65, 0x73, 0x74, 0x2e, 0x74, 0x78, 0x74,
1526         0
1527     };
1528     UChar asciiResult[200], sjisResult[200];
1529     int32_t /*asciiLength = 0,*/ sjisLength = 0, i;
1530     const char *name;
1531 
1532     /* enumerate all converters */
1533     status=U_ZERO_ERROR;
1534     for(i=0; (name=ucnv_getAvailableName(i))!=NULL; ++i) {
1535         cnv=ucnv_open(name, &status);
1536         if(U_SUCCESS(status)) {
1537             /* BEGIN android-changed. To save space Android does not build full ISO-2022-CN CJK tables. */
1538             const char* cnvName = ucnv_getName(cnv, &status);
1539             if (strlen(cnvName) < 8 ||
1540                 strncmp(cnvName, "ISO_2022_CN", 8) != 0) {
1541             TestAmbiguousConverter(cnv);
1542             }
1543             /* END android-changed */
1544             ucnv_close(cnv);
1545         } else {
1546             log_err("error: unable to open available converter \"%s\"\n", name);
1547             status=U_ZERO_ERROR;
1548         }
1549     }
1550 
1551 #if !UCONFIG_NO_LEGACY_CONVERSION
1552     sjis_cnv = ucnv_open("ibm-943", &status);
1553     if (U_FAILURE(status))
1554     {
1555         log_data_err("Failed to create a SJIS converter\n");
1556         return;
1557     }
1558     ascii_cnv = ucnv_open("LATIN-1", &status);
1559     if (U_FAILURE(status))
1560     {
1561         log_data_err("Failed to create a LATIN-1 converter\n");
1562         ucnv_close(sjis_cnv);
1563         return;
1564     }
1565     /* convert target from SJIS to Unicode */
1566     sjisLength = ucnv_toUChars(sjis_cnv, sjisResult, UPRV_LENGTHOF(sjisResult), target, (int32_t)strlen(target), &status);
1567     if (U_FAILURE(status))
1568     {
1569         log_err("Failed to convert the SJIS string.\n");
1570         ucnv_close(sjis_cnv);
1571         ucnv_close(ascii_cnv);
1572         return;
1573     }
1574     /* convert target from Latin-1 to Unicode */
1575     /*asciiLength =*/ ucnv_toUChars(ascii_cnv, asciiResult, UPRV_LENGTHOF(asciiResult), target, (int32_t)strlen(target), &status);
1576     if (U_FAILURE(status))
1577     {
1578         log_err("Failed to convert the Latin-1 string.\n");
1579         ucnv_close(sjis_cnv);
1580         ucnv_close(ascii_cnv);
1581         return;
1582     }
1583     if (!ucnv_isAmbiguous(sjis_cnv))
1584     {
1585         log_err("SJIS converter should contain ambiguous character mappings.\n");
1586         ucnv_close(sjis_cnv);
1587         ucnv_close(ascii_cnv);
1588         return;
1589     }
1590     if (u_strcmp(sjisResult, asciiResult) == 0)
1591     {
1592         log_err("File separators for SJIS don't need to be fixed.\n");
1593     }
1594     ucnv_fixFileSeparator(sjis_cnv, sjisResult, sjisLength);
1595     if (u_strcmp(sjisResult, asciiResult) != 0)
1596     {
1597         log_err("Fixing file separator for SJIS failed.\n");
1598     }
1599     ucnv_close(sjis_cnv);
1600     ucnv_close(ascii_cnv);
1601 #endif
1602 }
1603 
1604 static void
TestSignatureDetection()1605 TestSignatureDetection(){
1606     /* with null terminated strings */
1607     {
1608         static const char* data[] = {
1609                 "\xFE\xFF\x00\x00",     /* UTF-16BE */
1610                 "\xFF\xFE\x00\x00",     /* UTF-16LE */
1611                 "\xEF\xBB\xBF\x00",     /* UTF-8    */
1612                 "\x0E\xFE\xFF\x00",     /* SCSU     */
1613 
1614                 "\xFE\xFF",             /* UTF-16BE */
1615                 "\xFF\xFE",             /* UTF-16LE */
1616                 "\xEF\xBB\xBF",         /* UTF-8    */
1617                 "\x0E\xFE\xFF",         /* SCSU     */
1618 
1619                 "\xFE\xFF\x41\x42",     /* UTF-16BE */
1620                 "\xFF\xFE\x41\x41",     /* UTF-16LE */
1621                 "\xEF\xBB\xBF\x41",     /* UTF-8    */
1622                 "\x0E\xFE\xFF\x41",     /* SCSU     */
1623 
1624                 "\x2B\x2F\x76\x38\x2D", /* UTF-7    */
1625                 "\x2B\x2F\x76\x38\x41", /* UTF-7    */
1626                 "\x2B\x2F\x76\x39\x41", /* UTF-7    */
1627                 "\x2B\x2F\x76\x2B\x41", /* UTF-7    */
1628                 "\x2B\x2F\x76\x2F\x41",  /* UTF-7    */
1629 
1630                 "\xDD\x73\x66\x73"      /* UTF-EBCDIC */
1631         };
1632         static const char* expected[] = {
1633                 "UTF-16BE",
1634                 "UTF-16LE",
1635                 "UTF-8",
1636                 "SCSU",
1637 
1638                 "UTF-16BE",
1639                 "UTF-16LE",
1640                 "UTF-8",
1641                 "SCSU",
1642 
1643                 "UTF-16BE",
1644                 "UTF-16LE",
1645                 "UTF-8",
1646                 "SCSU",
1647 
1648                 "UTF-7",
1649                 "UTF-7",
1650                 "UTF-7",
1651                 "UTF-7",
1652                 "UTF-7",
1653                 "UTF-EBCDIC"
1654         };
1655         static const int32_t expectedLength[] ={
1656             2,
1657             2,
1658             3,
1659             3,
1660 
1661             2,
1662             2,
1663             3,
1664             3,
1665 
1666             2,
1667             2,
1668             3,
1669             3,
1670 
1671             5,
1672             4,
1673             4,
1674             4,
1675             4,
1676             4
1677         };
1678         int i=0;
1679         UErrorCode err;
1680         int32_t signatureLength = -1;
1681         const char* source = NULL;
1682         const char* enc = NULL;
1683         for( ; i<UPRV_LENGTHOF(data); i++){
1684             err = U_ZERO_ERROR;
1685             source = data[i];
1686             enc = ucnv_detectUnicodeSignature(source, -1 , &signatureLength, &err);
1687             if(U_FAILURE(err)){
1688                 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err));
1689                 continue;
1690             }
1691             if(enc == NULL || strcmp(enc,expected[i]) !=0){
1692                 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc);
1693                 continue;
1694             }
1695             if(signatureLength != expectedLength[i]){
1696                 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]);
1697             }
1698         }
1699     }
1700     {
1701         static const char* data[] = {
1702                 "\xFE\xFF\x00",         /* UTF-16BE */
1703                 "\xFF\xFE\x00",         /* UTF-16LE */
1704                 "\xEF\xBB\xBF\x00",     /* UTF-8    */
1705                 "\x0E\xFE\xFF\x00",     /* SCSU     */
1706                 "\x00\x00\xFE\xFF",     /* UTF-32BE */
1707                 "\xFF\xFE\x00\x00",     /* UTF-32LE */
1708                 "\xFE\xFF",             /* UTF-16BE */
1709                 "\xFF\xFE",             /* UTF-16LE */
1710                 "\xEF\xBB\xBF",         /* UTF-8    */
1711                 "\x0E\xFE\xFF",         /* SCSU     */
1712                 "\x00\x00\xFE\xFF",     /* UTF-32BE */
1713                 "\xFF\xFE\x00\x00",     /* UTF-32LE */
1714                 "\xFE\xFF\x41\x42",     /* UTF-16BE */
1715                 "\xFF\xFE\x41\x41",     /* UTF-16LE */
1716                 "\xEF\xBB\xBF\x41",     /* UTF-8    */
1717                 "\x0E\xFE\xFF\x41",     /* SCSU     */
1718                 "\x00\x00\xFE\xFF\x41", /* UTF-32BE */
1719                 "\xFF\xFE\x00\x00\x42", /* UTF-32LE */
1720                 "\xFB\xEE\x28",         /* BOCU-1   */
1721                 "\xFF\x41\x42"          /* NULL     */
1722         };
1723         static const int len[] = {
1724             3,
1725             3,
1726             4,
1727             4,
1728             4,
1729             4,
1730             2,
1731             2,
1732             3,
1733             3,
1734             4,
1735             4,
1736             4,
1737             4,
1738             4,
1739             4,
1740             5,
1741             5,
1742             3,
1743             3
1744         };
1745 
1746         static const char* expected[] = {
1747                 "UTF-16BE",
1748                 "UTF-16LE",
1749                 "UTF-8",
1750                 "SCSU",
1751                 "UTF-32BE",
1752                 "UTF-32LE",
1753                 "UTF-16BE",
1754                 "UTF-16LE",
1755                 "UTF-8",
1756                 "SCSU",
1757                 "UTF-32BE",
1758                 "UTF-32LE",
1759                 "UTF-16BE",
1760                 "UTF-16LE",
1761                 "UTF-8",
1762                 "SCSU",
1763                 "UTF-32BE",
1764                 "UTF-32LE",
1765                 "BOCU-1",
1766                 NULL
1767         };
1768         static const int32_t expectedLength[] ={
1769             2,
1770             2,
1771             3,
1772             3,
1773             4,
1774             4,
1775             2,
1776             2,
1777             3,
1778             3,
1779             4,
1780             4,
1781             2,
1782             2,
1783             3,
1784             3,
1785             4,
1786             4,
1787             3,
1788             0
1789         };
1790         int i=0;
1791         UErrorCode err;
1792         int32_t signatureLength = -1;
1793         int32_t sourceLength=-1;
1794         const char* source = NULL;
1795         const char* enc = NULL;
1796         for( ; i<UPRV_LENGTHOF(data); i++){
1797             err = U_ZERO_ERROR;
1798             source = data[i];
1799             sourceLength = len[i];
1800             enc = ucnv_detectUnicodeSignature(source, sourceLength , &signatureLength, &err);
1801             if(U_FAILURE(err)){
1802                 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err));
1803                 continue;
1804             }
1805             if(enc == NULL || strcmp(enc,expected[i]) !=0){
1806                 if(expected[i] !=NULL){
1807                  log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc);
1808                  continue;
1809                 }
1810             }
1811             if(signatureLength != expectedLength[i]){
1812                 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]);
1813             }
1814         }
1815     }
1816 }
1817 
TestUTF7()1818 static void TestUTF7() {
1819     /* test input */
1820     static const uint8_t in[]={
1821         /* H - +Jjo- - ! +- +2AHcAQ */
1822         0x48,
1823         0x2d,
1824         0x2b, 0x4a, 0x6a, 0x6f,
1825         0x2d, 0x2d,
1826         0x21,
1827         0x2b, 0x2d,
1828         0x2b, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51
1829     };
1830 
1831     /* expected test results */
1832     static const int32_t results[]={
1833         /* number of bytes read, code point */
1834         1, 0x48,
1835         1, 0x2d,
1836         4, 0x263a, /* <WHITE SMILING FACE> */
1837         2, 0x2d,
1838         1, 0x21,
1839         2, 0x2b,
1840         7, 0x10401
1841     };
1842 
1843     const char *cnvName;
1844     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
1845     UErrorCode errorCode=U_ZERO_ERROR;
1846     UConverter *cnv=ucnv_open("UTF-7", &errorCode);
1847     if(U_FAILURE(errorCode)) {
1848         log_data_err("Unable to open a UTF-7 converter: %s\n", u_errorName(errorCode));
1849         return;
1850     }
1851     TestNextUChar(cnv, source, limit, results, "UTF-7");
1852     /* Test the condition when source >= sourceLimit */
1853     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1854     cnvName = ucnv_getName(cnv, &errorCode);
1855     if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "UTF-7") != 0) {
1856         log_err("UTF-7 converter is called %s: %s\n", cnvName, u_errorName(errorCode));
1857     }
1858     ucnv_close(cnv);
1859 }
1860 
TestIMAP()1861 static void TestIMAP() {
1862     /* test input */
1863     static const uint8_t in[]={
1864         /* H - &Jjo- - ! &- &2AHcAQ- \ */
1865         0x48,
1866         0x2d,
1867         0x26, 0x4a, 0x6a, 0x6f,
1868         0x2d, 0x2d,
1869         0x21,
1870         0x26, 0x2d,
1871         0x26, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51, 0x2d
1872     };
1873 
1874     /* expected test results */
1875     static const int32_t results[]={
1876         /* number of bytes read, code point */
1877         1, 0x48,
1878         1, 0x2d,
1879         4, 0x263a, /* <WHITE SMILING FACE> */
1880         2, 0x2d,
1881         1, 0x21,
1882         2, 0x26,
1883         7, 0x10401
1884     };
1885 
1886     const char *cnvName;
1887     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
1888     UErrorCode errorCode=U_ZERO_ERROR;
1889     UConverter *cnv=ucnv_open("IMAP-mailbox-name", &errorCode);
1890     if(U_FAILURE(errorCode)) {
1891         log_data_err("Unable to open a IMAP-mailbox-name converter: %s\n", u_errorName(errorCode));
1892         return;
1893     }
1894     TestNextUChar(cnv, source, limit, results, "IMAP-mailbox-name");
1895     /* Test the condition when source >= sourceLimit */
1896     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1897     cnvName = ucnv_getName(cnv, &errorCode);
1898     if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "IMAP-mailbox-name") != 0) {
1899         log_err("IMAP-mailbox-name converter is called %s: %s\n", cnvName, u_errorName(errorCode));
1900     }
1901     ucnv_close(cnv);
1902 }
1903 
TestUTF8()1904 static void TestUTF8() {
1905     /* test input */
1906     static const uint8_t in[]={
1907         0x61,
1908         0xc2, 0x80,
1909         0xe0, 0xa0, 0x80,
1910         0xf0, 0x90, 0x80, 0x80,
1911         0xf4, 0x84, 0x8c, 0xa1,
1912         0xf0, 0x90, 0x90, 0x81
1913     };
1914 
1915     /* expected test results */
1916     static const int32_t results[]={
1917         /* number of bytes read, code point */
1918         1, 0x61,
1919         2, 0x80,
1920         3, 0x800,
1921         4, 0x10000,
1922         4, 0x104321,
1923         4, 0x10401
1924     };
1925 
1926     /* error test input */
1927     static const uint8_t in2[]={
1928         0x61,
1929         0xc0, 0x80,                     /* illegal non-shortest form */
1930         0xe0, 0x80, 0x80,               /* illegal non-shortest form */
1931         0xf0, 0x80, 0x80, 0x80,         /* illegal non-shortest form */
1932         0xc0, 0xc0,                     /* illegal trail byte */
1933         0xf4, 0x90, 0x80, 0x80,         /* 0x110000 out of range */
1934         0xf8, 0x80, 0x80, 0x80, 0x80,   /* too long */
1935         0xfe,                           /* illegal byte altogether */
1936         0x62
1937     };
1938 
1939     /* expected error test results */
1940     static const int32_t results2[]={
1941         /* number of bytes read, code point */
1942         1, 0x61,
1943         22, 0x62
1944     };
1945 
1946     UConverterToUCallback cb;
1947     const void *p;
1948 
1949     const char *source=(const char *)in,*limit=(const char *)in+sizeof(in);
1950     UErrorCode errorCode=U_ZERO_ERROR;
1951     UConverter *cnv=ucnv_open("UTF-8", &errorCode);
1952     if(U_FAILURE(errorCode)) {
1953         log_err("Unable to open a UTF-8 converter: %s\n", u_errorName(errorCode));
1954         return;
1955     }
1956     TestNextUChar(cnv, source, limit, results, "UTF-8");
1957     /* Test the condition when source >= sourceLimit */
1958     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1959 
1960     /* test error behavior with a skip callback */
1961     ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
1962     source=(const char *)in2;
1963     limit=(const char *)(in2+sizeof(in2));
1964     TestNextUChar(cnv, source, limit, results2, "UTF-8");
1965 
1966     ucnv_close(cnv);
1967 }
1968 
TestCESU8()1969 static void TestCESU8() {
1970     /* test input */
1971     static const uint8_t in[]={
1972         0x61,
1973         0xc2, 0x80,
1974         0xe0, 0xa0, 0x80,
1975         0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80,
1976         0xed, 0xb0, 0x81, 0xed, 0xa0, 0x82,
1977         0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf,
1978         0xef, 0xbf, 0xbc
1979     };
1980 
1981     /* expected test results */
1982     static const int32_t results[]={
1983         /* number of bytes read, code point */
1984         1, 0x61,
1985         2, 0x80,
1986         3, 0x800,
1987         6, 0x10000,
1988         3, 0xdc01,
1989         -1,0xd802,  /* may read 3 or 6 bytes */
1990         -1,0x10ffff,/* may read 0 or 3 bytes */
1991         3, 0xfffc
1992     };
1993 
1994     /* error test input */
1995     static const uint8_t in2[]={
1996         0x61,
1997         0xc0, 0x80,                     /* illegal non-shortest form */
1998         0xe0, 0x80, 0x80,               /* illegal non-shortest form */
1999         0xf0, 0x80, 0x80, 0x80,         /* illegal non-shortest form */
2000         0xc0, 0xc0,                     /* illegal trail byte */
2001         0xf0, 0x90, 0x80, 0x80,         /* illegal 4-byte supplementary code point */
2002         0xf4, 0x84, 0x8c, 0xa1,         /* illegal 4-byte supplementary code point */
2003         0xf0, 0x90, 0x90, 0x81,         /* illegal 4-byte supplementary code point */
2004         0xf4, 0x90, 0x80, 0x80,         /* 0x110000 out of range */
2005         0xf8, 0x80, 0x80, 0x80, 0x80,   /* too long */
2006         0xfe,                           /* illegal byte altogether */
2007         0x62
2008     };
2009 
2010     /* expected error test results */
2011     static const int32_t results2[]={
2012         /* number of bytes read, code point */
2013         1, 0x61,
2014         34, 0x62
2015     };
2016 
2017     UConverterToUCallback cb;
2018     const void *p;
2019 
2020     const char *source=(const char *)in,*limit=(const char *)in+sizeof(in);
2021     UErrorCode errorCode=U_ZERO_ERROR;
2022     UConverter *cnv=ucnv_open("CESU-8", &errorCode);
2023     if(U_FAILURE(errorCode)) {
2024         log_data_err("Unable to open a CESU-8 converter: %s\n", u_errorName(errorCode));
2025         return;
2026     }
2027     TestNextUChar(cnv, source, limit, results, "CESU-8");
2028     /* Test the condition when source >= sourceLimit */
2029     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2030 
2031     /* test error behavior with a skip callback */
2032     ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2033     source=(const char *)in2;
2034     limit=(const char *)(in2+sizeof(in2));
2035     TestNextUChar(cnv, source, limit, results2, "CESU-8");
2036 
2037     ucnv_close(cnv);
2038 }
2039 
TestUTF16()2040 static void TestUTF16() {
2041     /* test input */
2042     static const uint8_t in1[]={
2043         0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff
2044     };
2045     static const uint8_t in2[]={
2046         0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff
2047     };
2048     static const uint8_t in3[]={
2049         0xfe, 0xfe, 0x4e, 0x00, 0xfe, 0xff, 0xd8, 0x40, 0xdc, 0x01
2050     };
2051 
2052     /* expected test results */
2053     static const int32_t results1[]={
2054         /* number of bytes read, code point */
2055         4, 0x4e00,
2056         2, 0xfeff
2057     };
2058     static const int32_t results2[]={
2059         /* number of bytes read, code point */
2060         4, 0x004e,
2061         2, 0xfffe
2062     };
2063     static const int32_t results3[]={
2064         /* number of bytes read, code point */
2065         2, 0xfefe,
2066         2, 0x4e00,
2067         2, 0xfeff,
2068         4, 0x20001
2069     };
2070 
2071     const char *source, *limit;
2072 
2073     UErrorCode errorCode=U_ZERO_ERROR;
2074     UConverter *cnv=ucnv_open("UTF-16", &errorCode);
2075     if(U_FAILURE(errorCode)) {
2076         log_err("Unable to open a UTF-16 converter: %s\n", u_errorName(errorCode));
2077         return;
2078     }
2079 
2080     source=(const char *)in1, limit=(const char *)in1+sizeof(in1);
2081     TestNextUChar(cnv, source, limit, results1, "UTF-16");
2082 
2083     source=(const char *)in2, limit=(const char *)in2+sizeof(in2);
2084     ucnv_resetToUnicode(cnv);
2085     TestNextUChar(cnv, source, limit, results2, "UTF-16");
2086 
2087     source=(const char *)in3, limit=(const char *)in3+sizeof(in3);
2088     ucnv_resetToUnicode(cnv);
2089     TestNextUChar(cnv, source, limit, results3, "UTF-16");
2090 
2091     /* Test the condition when source >= sourceLimit */
2092     ucnv_resetToUnicode(cnv);
2093     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2094 
2095     ucnv_close(cnv);
2096 }
2097 
TestUTF16BE()2098 static void TestUTF16BE() {
2099     /* test input */
2100     static const uint8_t in[]={
2101         0x00, 0x61,
2102         0x00, 0xc0,
2103         0x00, 0x31,
2104         0x00, 0xf4,
2105         0xce, 0xfe,
2106         0xd8, 0x01, 0xdc, 0x01
2107     };
2108 
2109     /* expected test results */
2110     static const int32_t results[]={
2111         /* number of bytes read, code point */
2112         2, 0x61,
2113         2, 0xc0,
2114         2, 0x31,
2115         2, 0xf4,
2116         2, 0xcefe,
2117         4, 0x10401
2118     };
2119 
2120     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2121     UErrorCode errorCode=U_ZERO_ERROR;
2122     UConverter *cnv=ucnv_open("utf-16be", &errorCode);
2123     if(U_FAILURE(errorCode)) {
2124         log_err("Unable to open a UTF16-BE converter: %s\n", u_errorName(errorCode));
2125         return;
2126     }
2127     TestNextUChar(cnv, source, limit, results, "UTF-16BE");
2128     /* Test the condition when source >= sourceLimit */
2129     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2130     /*Test for the condition where there is an invalid character*/
2131     {
2132         static const uint8_t source2[]={0x61};
2133         ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2134         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character");
2135     }
2136 #if 0
2137     /*
2138      * Test disabled because currently the UTF-16BE/LE converters are supposed
2139      * to not set errors for unpaired surrogates.
2140      * This may change with
2141      * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
2142      */
2143 
2144     /*Test for the condition where there is a surrogate pair*/
2145     {
2146         const uint8_t source2[]={0xd8, 0x01};
2147         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character");
2148     }
2149 #endif
2150     ucnv_close(cnv);
2151 }
2152 
2153 static void
TestUTF16LE()2154 TestUTF16LE() {
2155     /* test input */
2156     static const uint8_t in[]={
2157         0x61, 0x00,
2158         0x31, 0x00,
2159         0x4e, 0x2e,
2160         0x4e, 0x00,
2161         0x01, 0xd8, 0x01, 0xdc
2162     };
2163 
2164     /* expected test results */
2165     static const int32_t results[]={
2166         /* number of bytes read, code point */
2167         2, 0x61,
2168         2, 0x31,
2169         2, 0x2e4e,
2170         2, 0x4e,
2171         4, 0x10401
2172     };
2173 
2174     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2175     UErrorCode errorCode=U_ZERO_ERROR;
2176     UConverter *cnv=ucnv_open("utf-16le", &errorCode);
2177     if(U_FAILURE(errorCode)) {
2178         log_err("Unable to open a UTF16-LE converter: %s\n", u_errorName(errorCode));
2179         return;
2180     }
2181     TestNextUChar(cnv, source, limit, results, "UTF-16LE");
2182     /* Test the condition when source >= sourceLimit */
2183     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2184     /*Test for the condition where there is an invalid character*/
2185     {
2186         static const uint8_t source2[]={0x61};
2187         ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2188         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character");
2189     }
2190 #if 0
2191     /*
2192      * Test disabled because currently the UTF-16BE/LE converters are supposed
2193      * to not set errors for unpaired surrogates.
2194      * This may change with
2195      * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
2196      */
2197 
2198     /*Test for the condition where there is a surrogate character*/
2199     {
2200         static const uint8_t source2[]={0x01, 0xd8};
2201         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character");
2202     }
2203 #endif
2204 
2205     ucnv_close(cnv);
2206 }
2207 
TestUTF32()2208 static void TestUTF32() {
2209     /* test input */
2210     static const uint8_t in1[]={
2211         0x00, 0x00, 0xfe, 0xff,   0x00, 0x10, 0x0f, 0x00,   0x00, 0x00, 0xfe, 0xff
2212     };
2213     static const uint8_t in2[]={
2214         0xff, 0xfe, 0x00, 0x00,   0x00, 0x10, 0x0f, 0x00,   0xfe, 0xff, 0x00, 0x00
2215     };
2216     static const uint8_t in3[]={
2217         0x00, 0x00, 0xfe, 0xfe,   0x00, 0x10, 0x0f, 0x00,   0x00, 0x00, 0xd8, 0x40,   0x00, 0x00, 0xdc, 0x01
2218     };
2219 
2220     /* expected test results */
2221     static const int32_t results1[]={
2222         /* number of bytes read, code point */
2223         8, 0x100f00,
2224         4, 0xfeff
2225     };
2226     static const int32_t results2[]={
2227         /* number of bytes read, code point */
2228         8, 0x0f1000,
2229         4, 0xfffe
2230     };
2231     static const int32_t results3[]={
2232         /* number of bytes read, code point */
2233         4, 0xfefe,
2234         4, 0x100f00,
2235         4, 0xfffd, /* unmatched surrogate */
2236         4, 0xfffd  /* unmatched surrogate */
2237     };
2238 
2239     const char *source, *limit;
2240 
2241     UErrorCode errorCode=U_ZERO_ERROR;
2242     UConverter *cnv=ucnv_open("UTF-32", &errorCode);
2243     if(U_FAILURE(errorCode)) {
2244         log_data_err("Unable to open a UTF-32 converter: %s\n", u_errorName(errorCode));
2245         return;
2246     }
2247 
2248     source=(const char *)in1, limit=(const char *)in1+sizeof(in1);
2249     TestNextUChar(cnv, source, limit, results1, "UTF-32");
2250 
2251     source=(const char *)in2, limit=(const char *)in2+sizeof(in2);
2252     ucnv_resetToUnicode(cnv);
2253     TestNextUChar(cnv, source, limit, results2, "UTF-32");
2254 
2255     source=(const char *)in3, limit=(const char *)in3+sizeof(in3);
2256     ucnv_resetToUnicode(cnv);
2257     TestNextUChar(cnv, source, limit, results3, "UTF-32");
2258 
2259     /* Test the condition when source >= sourceLimit */
2260     ucnv_resetToUnicode(cnv);
2261     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2262 
2263     ucnv_close(cnv);
2264 }
2265 
2266 static void
TestUTF32BE()2267 TestUTF32BE() {
2268     /* test input */
2269     static const uint8_t in[]={
2270         0x00, 0x00, 0x00, 0x61,
2271         0x00, 0x00, 0x30, 0x61,
2272         0x00, 0x00, 0xdc, 0x00,
2273         0x00, 0x00, 0xd8, 0x00,
2274         0x00, 0x00, 0xdf, 0xff,
2275         0x00, 0x00, 0xff, 0xfe,
2276         0x00, 0x10, 0xab, 0xcd,
2277         0x00, 0x10, 0xff, 0xff
2278     };
2279 
2280     /* expected test results */
2281     static const int32_t results[]={
2282         /* number of bytes read, code point */
2283         4, 0x61,
2284         4, 0x3061,
2285         4, 0xfffd,
2286         4, 0xfffd,
2287         4, 0xfffd,
2288         4, 0xfffe,
2289         4, 0x10abcd,
2290         4, 0x10ffff
2291     };
2292 
2293     /* error test input */
2294     static const uint8_t in2[]={
2295         0x00, 0x00, 0x00, 0x61,
2296         0x00, 0x11, 0x00, 0x00,         /* 0x110000 out of range */
2297         0x00, 0x00, 0x00, 0x62,
2298         0xff, 0xff, 0xff, 0xff,         /* 0xffffffff out of range */
2299         0x7f, 0xff, 0xff, 0xff,         /* 0x7fffffff out of range */
2300         0x00, 0x00, 0x01, 0x62,
2301         0x00, 0x00, 0x02, 0x62
2302     };
2303 
2304     /* expected error test results */
2305     static const int32_t results2[]={
2306         /* number of bytes read, code point */
2307         4,  0x61,
2308         8,  0x62,
2309         12, 0x162,
2310         4,  0x262
2311     };
2312 
2313     UConverterToUCallback cb;
2314     const void *p;
2315 
2316     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2317     UErrorCode errorCode=U_ZERO_ERROR;
2318     UConverter *cnv=ucnv_open("UTF-32BE", &errorCode);
2319     if(U_FAILURE(errorCode)) {
2320         log_data_err("Unable to open a UTF-32BE converter: %s\n", u_errorName(errorCode));
2321         return;
2322     }
2323     TestNextUChar(cnv, source, limit, results, "UTF-32BE");
2324 
2325     /* Test the condition when source >= sourceLimit */
2326     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2327 
2328     /* test error behavior with a skip callback */
2329     ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2330     source=(const char *)in2;
2331     limit=(const char *)(in2+sizeof(in2));
2332     TestNextUChar(cnv, source, limit, results2, "UTF-32BE");
2333 
2334     ucnv_close(cnv);
2335 }
2336 
2337 static void
TestUTF32LE()2338 TestUTF32LE() {
2339     /* test input */
2340     static const uint8_t in[]={
2341         0x61, 0x00, 0x00, 0x00,
2342         0x61, 0x30, 0x00, 0x00,
2343         0x00, 0xdc, 0x00, 0x00,
2344         0x00, 0xd8, 0x00, 0x00,
2345         0xff, 0xdf, 0x00, 0x00,
2346         0xfe, 0xff, 0x00, 0x00,
2347         0xcd, 0xab, 0x10, 0x00,
2348         0xff, 0xff, 0x10, 0x00
2349     };
2350 
2351     /* expected test results */
2352     static const int32_t results[]={
2353         /* number of bytes read, code point */
2354         4, 0x61,
2355         4, 0x3061,
2356         4, 0xfffd,
2357         4, 0xfffd,
2358         4, 0xfffd,
2359         4, 0xfffe,
2360         4, 0x10abcd,
2361         4, 0x10ffff
2362     };
2363 
2364     /* error test input */
2365     static const uint8_t in2[]={
2366         0x61, 0x00, 0x00, 0x00,
2367         0x00, 0x00, 0x11, 0x00,         /* 0x110000 out of range */
2368         0x62, 0x00, 0x00, 0x00,
2369         0xff, 0xff, 0xff, 0xff,         /* 0xffffffff out of range */
2370         0xff, 0xff, 0xff, 0x7f,         /* 0x7fffffff out of range */
2371         0x62, 0x01, 0x00, 0x00,
2372         0x62, 0x02, 0x00, 0x00,
2373     };
2374 
2375     /* expected error test results */
2376     static const int32_t results2[]={
2377         /* number of bytes read, code point */
2378         4,  0x61,
2379         8,  0x62,
2380         12, 0x162,
2381         4,  0x262,
2382     };
2383 
2384     UConverterToUCallback cb;
2385     const void *p;
2386 
2387     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2388     UErrorCode errorCode=U_ZERO_ERROR;
2389     UConverter *cnv=ucnv_open("UTF-32LE", &errorCode);
2390     if(U_FAILURE(errorCode)) {
2391         log_data_err("Unable to open a UTF-32LE converter: %s\n", u_errorName(errorCode));
2392         return;
2393     }
2394     TestNextUChar(cnv, source, limit, results, "UTF-32LE");
2395 
2396     /* Test the condition when source >= sourceLimit */
2397     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2398 
2399     /* test error behavior with a skip callback */
2400     ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2401     source=(const char *)in2;
2402     limit=(const char *)(in2+sizeof(in2));
2403     TestNextUChar(cnv, source, limit, results2, "UTF-32LE");
2404 
2405     ucnv_close(cnv);
2406 }
2407 
2408 static void
TestLATIN1()2409 TestLATIN1() {
2410     /* test input */
2411     static const uint8_t in[]={
2412        0x61,
2413        0x31,
2414        0x32,
2415        0xc0,
2416        0xf0,
2417        0xf4,
2418     };
2419 
2420     /* expected test results */
2421     static const int32_t results[]={
2422         /* number of bytes read, code point */
2423         1, 0x61,
2424         1, 0x31,
2425         1, 0x32,
2426         1, 0xc0,
2427         1, 0xf0,
2428         1, 0xf4,
2429     };
2430     static const uint16_t in1[] = {
2431         0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
2432         0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
2433         0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
2434         0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
2435         0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
2436         0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
2437         0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
2438         0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
2439         0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
2440         0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
2441         0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
2442         0xcb, 0x82
2443     };
2444     static const uint8_t out1[] = {
2445         0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
2446         0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
2447         0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
2448         0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
2449         0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
2450         0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
2451         0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
2452         0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
2453         0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
2454         0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
2455         0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
2456         0xcb, 0x82
2457     };
2458     static const uint16_t in2[]={
2459         0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2460         0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2461         0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2462         0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
2463         0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
2464         0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
2465         0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
2466         0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
2467         0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
2468         0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
2469         0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
2470         0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
2471         0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
2472         0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
2473         0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2474         0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2475         0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
2476         0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
2477         0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2478         0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2479         0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
2480         0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
2481         0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
2482         0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
2483         0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
2484         0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
2485         0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
2486         0x37, 0x20, 0x2A, 0x2F,
2487     };
2488     static const unsigned char out2[]={
2489         0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2490         0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2491         0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2492         0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
2493         0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
2494         0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
2495         0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
2496         0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
2497         0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
2498         0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
2499         0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
2500         0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
2501         0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
2502         0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
2503         0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2504         0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2505         0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
2506         0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
2507         0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2508         0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2509         0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
2510         0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
2511         0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
2512         0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
2513         0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
2514         0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
2515         0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
2516         0x37, 0x20, 0x2A, 0x2F,
2517     };
2518     const char *source=(const char *)in;
2519     const char *limit=(const char *)in+sizeof(in);
2520 
2521     UErrorCode errorCode=U_ZERO_ERROR;
2522     UConverter *cnv=ucnv_open("LATIN_1", &errorCode);
2523     if(U_FAILURE(errorCode)) {
2524         log_data_err("Unable to open a LATIN_1 converter: %s\n", u_errorName(errorCode));
2525         return;
2526     }
2527     TestNextUChar(cnv, source, limit, results, "LATIN_1");
2528     /* Test the condition when source >= sourceLimit */
2529     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2530     TestConv((uint16_t*)in1,sizeof(in1)/2,"LATIN_1","LATIN-1",(char*)out1,sizeof(out1));
2531     TestConv((uint16_t*)in2,sizeof(in2)/2,"ASCII","ASCII",(char*)out2,sizeof(out2));
2532 
2533     ucnv_close(cnv);
2534 }
2535 
2536 static void
TestSBCS()2537 TestSBCS() {
2538     /* test input */
2539     static const uint8_t in[]={ 0x61, 0xc0, 0x80, 0xe0, 0xf0, 0xf4};
2540     /* expected test results */
2541     static const int32_t results[]={
2542         /* number of bytes read, code point */
2543         1, 0x61,
2544         1, 0xbf,
2545         1, 0xc4,
2546         1, 0x2021,
2547         1, 0xf8ff,
2548         1, 0x00d9
2549     };
2550 
2551     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2552     UErrorCode errorCode=U_ZERO_ERROR;
2553     UConverter *cnv=ucnv_open("x-mac-turkish", &errorCode);
2554     if(U_FAILURE(errorCode)) {
2555         log_data_err("Unable to open a SBCS(x-mac-turkish) converter: %s\n", u_errorName(errorCode));
2556         return;
2557     }
2558     TestNextUChar(cnv, source, limit, results, "SBCS(x-mac-turkish)");
2559     /* Test the condition when source >= sourceLimit */
2560     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2561     /*Test for Illegal character */ /*
2562     {
2563     static const uint8_t input1[]={ 0xA1 };
2564     const char* illegalsource=(const char*)input1;
2565     TestNextUCharError(cnv, illegalsource, illegalsource+sizeof(illegalsource), U_INVALID_CHAR_FOUND, "source has a illegal characte");
2566     }
2567    */
2568     ucnv_close(cnv);
2569 }
2570 
2571 static void
TestDBCS()2572 TestDBCS() {
2573     /* test input */
2574     static const uint8_t in[]={
2575         0x44, 0x6a,
2576         0xc4, 0x9c,
2577         0x7a, 0x74,
2578         0x46, 0xab,
2579         0x42, 0x5b,
2580 
2581     };
2582 
2583     /* expected test results */
2584     static const int32_t results[]={
2585         /* number of bytes read, code point */
2586         2, 0x00a7,
2587         2, 0xe1d2,
2588         2, 0x6962,
2589         2, 0xf842,
2590         2, 0xffe5,
2591     };
2592 
2593     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2594     UErrorCode errorCode=U_ZERO_ERROR;
2595 
2596     UConverter *cnv=my_ucnv_open("@ibm9027", &errorCode);
2597     if(U_FAILURE(errorCode)) {
2598         log_data_err("Unable to open a DBCS(@ibm9027) converter: %s\n", u_errorName(errorCode));
2599         return;
2600     }
2601     TestNextUChar(cnv, source, limit, results, "DBCS(@ibm9027)");
2602     /* Test the condition when source >= sourceLimit */
2603     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2604     /*Test for the condition where there is an invalid character*/
2605     {
2606         static const uint8_t source2[]={0x1a, 0x1b};
2607         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character");
2608     }
2609     /*Test for the condition where we have a truncated char*/
2610     {
2611         static const uint8_t source1[]={0xc4};
2612         ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2613         TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2614     }
2615     ucnv_close(cnv);
2616 }
2617 
2618 static void
TestMBCS()2619 TestMBCS() {
2620     /* test input */
2621     static const uint8_t in[]={
2622         0x01,
2623         0xa6, 0xa3,
2624         0x00,
2625         0xa6, 0xa1,
2626         0x08,
2627         0xc2, 0x76,
2628         0xc2, 0x78,
2629 
2630     };
2631 
2632     /* expected test results */
2633     static const int32_t results[]={
2634         /* number of bytes read, code point */
2635         1, 0x0001,
2636         2, 0x250c,
2637         1, 0x0000,
2638         2, 0x2500,
2639         1, 0x0008,
2640         2, 0xd60c,
2641         2, 0xd60e,
2642     };
2643 
2644     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2645     UErrorCode errorCode=U_ZERO_ERROR;
2646 
2647     UConverter *cnv=ucnv_open("ibm-1363", &errorCode);
2648     if(U_FAILURE(errorCode)) {
2649         log_data_err("Unable to open a MBCS(ibm-1363) converter: %s\n", u_errorName(errorCode));
2650         return;
2651     }
2652     TestNextUChar(cnv, source, limit, results, "MBCS(ibm-1363)");
2653     /* Test the condition when source >= sourceLimit */
2654     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2655     /*Test for the condition where there is an invalid character*/
2656     {
2657         static const uint8_t source2[]={0xa1, 0x80};
2658         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character");
2659     }
2660     /*Test for the condition where we have a truncated char*/
2661     {
2662         static const uint8_t source1[]={0xc4};
2663         ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2664         TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2665     }
2666     ucnv_close(cnv);
2667 
2668 }
2669 
2670 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
2671 static void
TestICCRunout()2672 TestICCRunout() {
2673 /*    { "ibm-1363", :bin{ a2aea2 }, "\u00a1\u001a", :intvector{ 0, 2 }, :int{1}, :int{0}, "\", "?", :bin{""} } */
2674 
2675     const char *cnvName = "ibm-1363";
2676     UErrorCode status = U_ZERO_ERROR;
2677     const char sourceData[] = { (char)0xa2, (char)0xae, (char)0xa2 };
2678     /* UChar   expectUData[] = { 0x00a1, 0x001a }; */
2679     const char *source = sourceData;
2680     const char *sourceLim = sourceData+sizeof(sourceData);
2681     UChar c1, c2, c3;
2682     UConverter *cnv=ucnv_open(cnvName, &status);
2683     if(U_FAILURE(status)) {
2684         log_data_err("Unable to open %s converter: %s\n", cnvName, u_errorName(status));
2685 	return;
2686     }
2687 
2688 #if 0
2689     {
2690     UChar   targetBuf[256];
2691     UChar   *target = targetBuf;
2692     UChar   *targetLim = target+256;
2693     ucnv_toUnicode(cnv, &target, targetLim, &source, sourceLim, NULL, TRUE, &status);
2694 
2695     log_info("After convert: target@%d, source@%d, status%s\n",
2696 	     target-targetBuf, source-sourceData, u_errorName(status));
2697 
2698     if(U_FAILURE(status)) {
2699 	log_err("Failed to convert: %s\n", u_errorName(status));
2700     } else {
2701 
2702     }
2703     }
2704 #endif
2705 
2706     c1=ucnv_getNextUChar(cnv, &source, sourceLim, &status);
2707     log_verbose("c1: U+%04X, source@%d, status %s\n", c1, source-sourceData, u_errorName(status));
2708 
2709     c2=ucnv_getNextUChar(cnv, &source, sourceLim, &status);
2710     log_verbose("c2: U+%04X, source@%d, status %s\n", c2, source-sourceData, u_errorName(status));
2711 
2712     c3=ucnv_getNextUChar(cnv, &source, sourceLim, &status);
2713     log_verbose("c3: U+%04X, source@%d, status %s\n", c3, source-sourceData, u_errorName(status));
2714 
2715     if(status==U_INDEX_OUTOFBOUNDS_ERROR && c3==0xFFFF) {
2716 	log_verbose("OK\n");
2717     } else {
2718 	log_err("FAIL: c3 was not FFFF or err was not U_INDEXOUTOFBOUNDS_ERROR\n");
2719     }
2720 
2721     ucnv_close(cnv);
2722 
2723 }
2724 #endif
2725 
2726 #ifdef U_ENABLE_GENERIC_ISO_2022
2727 
2728 static void
TestISO_2022()2729 TestISO_2022() {
2730     /* test input */
2731     static const uint8_t in[]={
2732         0x1b, 0x25, 0x42,
2733         0x31,
2734         0x32,
2735         0x61,
2736         0xc2, 0x80,
2737         0xe0, 0xa0, 0x80,
2738         0xf0, 0x90, 0x80, 0x80
2739     };
2740 
2741 
2742 
2743     /* expected test results */
2744     static const int32_t results[]={
2745         /* number of bytes read, code point */
2746         4, 0x0031,  /* 4 bytes including the escape sequence */
2747         1, 0x0032,
2748         1, 0x61,
2749         2, 0x80,
2750         3, 0x800,
2751         4, 0x10000
2752     };
2753 
2754     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2755     UErrorCode errorCode=U_ZERO_ERROR;
2756     UConverter *cnv;
2757 
2758     cnv=ucnv_open("ISO_2022", &errorCode);
2759     if(U_FAILURE(errorCode)) {
2760         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
2761         return;
2762     }
2763     TestNextUChar(cnv, source, limit, results, "ISO_2022");
2764 
2765     /* Test the condition when source >= sourceLimit */
2766     TestNextUCharError(cnv, source, source-1, U_ILLEGAL_ARGUMENT_ERROR, "sourceLimit < source");
2767     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2768     /*Test for the condition where we have a truncated char*/
2769     {
2770         static const uint8_t source1[]={0xc4};
2771         ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2772         TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2773     }
2774     /*Test for the condition where there is an invalid character*/
2775     {
2776         static const uint8_t source2[]={0xa1, 0x01};
2777         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_CHAR_FOUND, "an invalid character");
2778     }
2779     ucnv_close(cnv);
2780 }
2781 
2782 #endif
2783 
2784 static void
TestSmallTargetBuffer(const uint16_t * source,const UChar * sourceLimit,UConverter * cnv)2785 TestSmallTargetBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2786     const UChar* uSource;
2787     const UChar* uSourceLimit;
2788     const char* cSource;
2789     const char* cSourceLimit;
2790     UChar *uTargetLimit =NULL;
2791     UChar *uTarget;
2792     char *cTarget;
2793     const char *cTargetLimit;
2794     char *cBuf;
2795     UChar *uBuf; /*,*test;*/
2796     int32_t uBufSize = 120;
2797     int len=0;
2798     int i=2;
2799     UErrorCode errorCode=U_ZERO_ERROR;
2800     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2801     cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
2802     ucnv_reset(cnv);
2803     for(;--i>0; ){
2804         uSource = (UChar*) source;
2805         uSourceLimit=(const UChar*)sourceLimit;
2806         cTarget = cBuf;
2807         uTarget = uBuf;
2808         cSource = cBuf;
2809         cTargetLimit = cBuf;
2810         uTargetLimit = uBuf;
2811 
2812         do{
2813 
2814             cTargetLimit = cTargetLimit+ i;
2815             ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode);
2816             if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2817                errorCode=U_ZERO_ERROR;
2818                 continue;
2819             }
2820 
2821             if(U_FAILURE(errorCode)){
2822                 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2823                 return;
2824             }
2825 
2826         }while (uSource<uSourceLimit);
2827 
2828         cSourceLimit =cTarget;
2829         do{
2830             uTargetLimit=uTargetLimit+i;
2831             ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode);
2832             if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2833                errorCode=U_ZERO_ERROR;
2834                 continue;
2835             }
2836             if(U_FAILURE(errorCode)){
2837                    log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2838                     return;
2839             }
2840         }while(cSource<cSourceLimit);
2841 
2842         uSource = source;
2843         /*test =uBuf;*/
2844         for(len=0;len<(int)(source - sourceLimit);len++){
2845             if(uBuf[len]!=uSource[len]){
2846                 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ;
2847             }
2848         }
2849     }
2850     free(uBuf);
2851     free(cBuf);
2852 }
2853 /* Test for Jitterbug 778 */
TestToAndFromUChars(const uint16_t * source,const UChar * sourceLimit,UConverter * cnv)2854 static void TestToAndFromUChars(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2855     const UChar* uSource;
2856     const UChar* uSourceLimit;
2857     const char* cSource;
2858     UChar *uTargetLimit =NULL;
2859     UChar *uTarget;
2860     char *cTarget;
2861     const char *cTargetLimit;
2862     char *cBuf;
2863     UChar *uBuf,*test;
2864     int32_t uBufSize = 120;
2865     int numCharsInTarget=0;
2866     UErrorCode errorCode=U_ZERO_ERROR;
2867     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2868     cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
2869     uSource = source;
2870     uSourceLimit=sourceLimit;
2871     cTarget = cBuf;
2872     cTargetLimit = cBuf +uBufSize*5;
2873     uTarget = uBuf;
2874     uTargetLimit = uBuf+ uBufSize*5;
2875     ucnv_reset(cnv);
2876     numCharsInTarget=ucnv_fromUChars(cnv, cTarget, (int32_t)(cTargetLimit-cTarget), uSource, (int32_t)(uSourceLimit-uSource), &errorCode);
2877     if(U_FAILURE(errorCode)){
2878         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
2879         return;
2880     }
2881     cSource = cBuf;
2882     test =uBuf;
2883     ucnv_toUChars(cnv,uTarget,(int32_t)(uTargetLimit-uTarget),cSource,numCharsInTarget,&errorCode);
2884     if(U_FAILURE(errorCode)){
2885         log_err("ucnv_toUChars conversion failed, reason %s\n", u_errorName(errorCode));
2886         return;
2887     }
2888     uSource = source;
2889     while(uSource<uSourceLimit){
2890         if(*test!=*uSource){
2891 
2892             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
2893         }
2894         uSource++;
2895         test++;
2896     }
2897     free(uBuf);
2898     free(cBuf);
2899 }
2900 
TestSmallSourceBuffer(const uint16_t * source,const UChar * sourceLimit,UConverter * cnv)2901 static void TestSmallSourceBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2902     const UChar* uSource;
2903     const UChar* uSourceLimit;
2904     const char* cSource;
2905     const char* cSourceLimit;
2906     UChar *uTargetLimit =NULL;
2907     UChar *uTarget;
2908     char *cTarget;
2909     const char *cTargetLimit;
2910     char *cBuf;
2911     UChar *uBuf; /*,*test;*/
2912     int32_t uBufSize = 120;
2913     int len=0;
2914     int i=2;
2915     const UChar *temp = sourceLimit;
2916     UErrorCode errorCode=U_ZERO_ERROR;
2917     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2918     cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
2919 
2920     ucnv_reset(cnv);
2921     for(;--i>0;){
2922         uSource = (UChar*) source;
2923         cTarget = cBuf;
2924         uTarget = uBuf;
2925         cSource = cBuf;
2926         cTargetLimit = cBuf;
2927         uTargetLimit = uBuf+uBufSize*5;
2928         cTargetLimit = cTargetLimit+uBufSize*10;
2929         uSourceLimit=uSource;
2930         do{
2931 
2932             if (uSourceLimit < sourceLimit) {
2933                 uSourceLimit = uSourceLimit+1;
2934             }
2935             ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode);
2936             if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2937                errorCode=U_ZERO_ERROR;
2938                 continue;
2939             }
2940 
2941             if(U_FAILURE(errorCode)){
2942                 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2943                 return;
2944             }
2945 
2946         }while (uSource<temp);
2947 
2948         cSourceLimit =cBuf;
2949         do{
2950             if (cSourceLimit < cBuf + (cTarget - cBuf)) {
2951                 cSourceLimit = cSourceLimit+1;
2952             }
2953             ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode);
2954             if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2955                errorCode=U_ZERO_ERROR;
2956                 continue;
2957             }
2958             if(U_FAILURE(errorCode)){
2959                    log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2960                     return;
2961             }
2962         }while(cSource<cTarget);
2963 
2964         uSource = source;
2965         /*test =uBuf;*/
2966         for(;len<(int)(source - sourceLimit);len++){
2967             if(uBuf[len]!=uSource[len]){
2968                 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ;
2969             }
2970         }
2971     }
2972     free(uBuf);
2973     free(cBuf);
2974 }
2975 static void
TestGetNextUChar2022(UConverter * cnv,const char * source,const char * limit,const uint16_t results[],const char * message)2976 TestGetNextUChar2022(UConverter* cnv, const char* source, const char* limit,
2977                      const uint16_t results[], const char* message){
2978 /*     const char* s0; */
2979      const char* s=(char*)source;
2980      const uint16_t *r=results;
2981      UErrorCode errorCode=U_ZERO_ERROR;
2982      uint32_t c,exC;
2983      ucnv_reset(cnv);
2984      while(s<limit) {
2985 	 /* s0=s; */
2986         c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
2987         if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
2988             break; /* no more significant input */
2989         } else if(U_FAILURE(errorCode)) {
2990             log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode));
2991             break;
2992         } else {
2993             if(U16_IS_LEAD(*r)){
2994                 int i =0, len = 2;
2995                 U16_NEXT(r, i, len, exC);
2996                 r++;
2997             }else{
2998                 exC = *r;
2999             }
3000             if(c!=(uint32_t)(exC))
3001                 log_err("%s ucnv_getNextUChar() Expected:  \\u%04X Got:  \\u%04X \n",message,(uint32_t) (*r),c);
3002         }
3003         r++;
3004     }
3005 }
3006 
TestJitterbug930(const char * enc)3007 static int TestJitterbug930(const char* enc){
3008     UErrorCode err = U_ZERO_ERROR;
3009     UConverter*converter;
3010     char out[80];
3011     char*target = out;
3012     UChar in[4];
3013     const UChar*source = in;
3014     int32_t off[80];
3015     int32_t* offsets = off;
3016     int numOffWritten=0;
3017     UBool flush = 0;
3018     converter = my_ucnv_open(enc, &err);
3019 
3020     in[0] = 0x41;     /* 0x4E00;*/
3021     in[1] = 0x4E01;
3022     in[2] = 0x4E02;
3023     in[3] = 0x4E03;
3024 
3025     memset(off, '*', sizeof(off));
3026 
3027     ucnv_fromUnicode (converter,
3028             &target,
3029             target+2,
3030             &source,
3031             source+3,
3032             offsets,
3033             flush,
3034             &err);
3035 
3036         /* writes three bytes into the output buffer: 41 1B 24
3037         * but offsets contains 0 1 1
3038     */
3039     while(*offsets< off[10]){
3040         numOffWritten++;
3041         offsets++;
3042     }
3043     log_verbose("Testing Jitterbug 930 for encoding %s",enc);
3044     if(numOffWritten!= (int)(target-out)){
3045         log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc, (int)(target-out),numOffWritten);
3046     }
3047 
3048     err = U_ZERO_ERROR;
3049 
3050     memset(off,'*' , sizeof(off));
3051 
3052     flush = 1;
3053     offsets=off;
3054     ucnv_fromUnicode (converter,
3055             &target,
3056             target+4,
3057             &source,
3058             source,
3059             offsets,
3060             flush,
3061             &err);
3062     numOffWritten=0;
3063     while(*offsets< off[10]){
3064         numOffWritten++;
3065         if(*offsets!= -1){
3066             log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc,-1,*offsets) ;
3067         }
3068         offsets++;
3069     }
3070 
3071     /* writes 42 43 7A into output buffer,
3072      * offsets contains -1 -1 -1
3073      */
3074     ucnv_close(converter);
3075     return 0;
3076 }
3077 
3078 static void
TestHZ()3079 TestHZ() {
3080     /* test input */
3081     static const uint16_t in[]={
3082             0x3000, 0x3001, 0x3002, 0x00B7, 0x02C9, 0x02C7, 0x00A8, 0x3003, 0x3005, 0x2014,
3083             0xFF5E, 0x2016, 0x2026, 0x007E, 0x997C, 0x70B3, 0x75C5, 0x5E76, 0x73BB, 0x83E0,
3084             0x64AD, 0x62E8, 0x94B5, 0x000A, 0x6CE2, 0x535A, 0x52C3, 0x640F, 0x94C2, 0x7B94,
3085             0x4F2F, 0x5E1B, 0x8236, 0x000A, 0x8116, 0x818A, 0x6E24, 0x6CCA, 0x9A73, 0x6355,
3086             0x535C, 0x54FA, 0x8865, 0x000A, 0x57E0, 0x4E0D, 0x5E03, 0x6B65, 0x7C3F, 0x90E8,
3087             0x6016, 0x248F, 0x2490, 0x000A, 0x2491, 0x2492, 0x2493, 0x2494, 0x2495, 0x2496,
3088             0x2497, 0x2498, 0x2499, 0x000A, 0x249A, 0x249B, 0x2474, 0x2475, 0x2476, 0x2477,
3089             0x2478, 0x2479, 0x247A, 0x000A, 0x247B, 0x247C, 0x247D, 0x247E, 0x247F, 0x2480,
3090             0x2481, 0x2482, 0x2483, 0x000A, 0x0041, 0x0043, 0x0044, 0x0045, 0x0046, 0x007E,
3091             0x0048, 0x0049, 0x004A, 0x000A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050,
3092             0x0051, 0x0052, 0x0053, 0x000A, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059,
3093             0x005A, 0x005B, 0x005C, 0x000A
3094       };
3095     const UChar* uSource;
3096     const UChar* uSourceLimit;
3097     const char* cSource;
3098     const char* cSourceLimit;
3099     UChar *uTargetLimit =NULL;
3100     UChar *uTarget;
3101     char *cTarget;
3102     const char *cTargetLimit;
3103     char *cBuf = NULL;
3104     UChar *uBuf = NULL;
3105     UChar *test;
3106     int32_t uBufSize = 120;
3107     UErrorCode errorCode=U_ZERO_ERROR;
3108     UConverter *cnv = NULL;
3109     int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3110     int32_t* myOff= offsets;
3111     cnv=ucnv_open("HZ", &errorCode);
3112     if(U_FAILURE(errorCode)) {
3113         log_data_err("Unable to open HZ converter: %s\n", u_errorName(errorCode));
3114         goto cleanup;
3115     }
3116 
3117     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3118     cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3119     uSource = (const UChar*)in;
3120     uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
3121     cTarget = cBuf;
3122     cTargetLimit = cBuf +uBufSize*5;
3123     uTarget = uBuf;
3124     uTargetLimit = uBuf+ uBufSize*5;
3125     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3126     if(U_FAILURE(errorCode)){
3127         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3128         goto cleanup;
3129     }
3130     cSource = cBuf;
3131     cSourceLimit =cTarget;
3132     test =uBuf;
3133     myOff=offsets;
3134     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3135     if(U_FAILURE(errorCode)){
3136         log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3137         goto cleanup;
3138     }
3139     uSource = (const UChar*)in;
3140     while(uSource<uSourceLimit){
3141         if(*test!=*uSource){
3142 
3143             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3144         }
3145         uSource++;
3146         test++;
3147     }
3148     TestGetNextUChar2022(cnv, cBuf, cTarget, in, "HZ encoding");
3149     TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3150     TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3151     TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3152     TestJitterbug930("csISO2022JP");
3153 
3154 cleanup:
3155     ucnv_close(cnv);
3156     free(offsets);
3157     free(uBuf);
3158     free(cBuf);
3159 }
3160 
3161 static void
TestISCII()3162 TestISCII(){
3163         /* test input */
3164     static const uint16_t in[]={
3165         /* test full range of Devanagari */
3166         0x0901,0x0902,0x0903,0x0905,0x0906,0x0907,0x0908,0x0909,0x090A,
3167         0x090B,0x090E,0x090F,0x0910,0x090D,0x0912,0x0913,0x0914,0x0911,
3168         0x0915,0x0916,0x0917,0x0918,0x0919,0x091A,0x091B,0x091C,0x091D,
3169         0x091E,0x091F,0x0920,0x0921,0x0922,0x0923,0x0924,0x0925,0x0926,
3170         0x0927,0x0928,0x0929,0x092A,0x092B,0x092C,0x092D,0x092E,0x092F,
3171         0x095F,0x0930,0x0931,0x0932,0x0933,0x0934,0x0935,0x0936,0x0937,
3172         0x0938,0x0939,0x200D,0x093E,0x093F,0x0940,0x0941,0x0942,0x0943,
3173         0x0946,0x0947,0x0948,0x0945,0x094A,0x094B,0x094C,0x0949,0x094D,
3174         0x093d,0x0966,0x0967,0x0968,0x0969,0x096A,0x096B,0x096C,
3175         0x096D,0x096E,0x096F,
3176         /* test Soft halant*/
3177         0x0915,0x094d, 0x200D,
3178         /* test explicit halant */
3179         0x0915,0x094d, 0x200c,
3180         /* test double danda */
3181         0x965,
3182         /* test ASCII */
3183         0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
3184         0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
3185         0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
3186         /* tests from Lotus */
3187         0x0061,0x0915,0x000D,0x000A,0x0996,0x0043,
3188         0x0930,0x094D,0x200D,
3189         0x0901,0x000D,0x000A,0x0905,0x0985,0x0043,
3190         0x0915,0x0921,0x002B,0x095F,
3191         /* tamil range */
3192         0x0B86, 0xB87, 0xB88,
3193         /* telugu range */
3194         0x0C05, 0x0C02, 0x0C03,0x0c31,
3195         /* kannada range */
3196         0x0C85, 0xC82, 0x0C83,
3197         /* test Abbr sign and Anudatta */
3198         0x0970, 0x952,
3199        /* 0x0958,
3200         0x0959,
3201         0x095A,
3202         0x095B,
3203         0x095C,
3204         0x095D,
3205         0x095E,
3206         0x095F,*/
3207         0x0960 /* Vocallic RRI 0xAB, 0xE9*/,
3208         0x0944 /* Vowel Sign Vocallic RRI 0xDF, 0xE9 */,
3209         0x090C ,
3210         0x0962,
3211         0x0961 /* Vocallic LL 0xa6, 0xE9 */,
3212         0x0963 /* Vowel Sign Vocallic LL 0xdb, 0xE9, */,
3213         0x0950 /* OM Symbol 0xa1, 0xE9,*/,
3214         0x093D /* Avagraha  0xEA, 0xE9*/,
3215         0x0958,
3216         0x0959,
3217         0x095A,
3218         0x095B,
3219         0x095C,
3220         0x095D,
3221         0x095E,
3222         0x0020, 0x094D, 0x0930, 0x0000, 0x00A0
3223       };
3224     static const unsigned char byteArr[]={
3225 
3226         0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,
3227         0xaa,0xab,0xac,0xad,0xae,0xaf,0xb0,0xb1,0xb2,
3228         0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,
3229         0xbc,0xbd,0xbe,0xbf,0xc0,0xc1,0xc2,0xc3,0xc4,
3230         0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,
3231         0xce,0xcf,0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,
3232         0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,
3233         0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,
3234         0xea,0xe9,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,
3235         0xf8,0xf9,0xfa,
3236         /* test soft halant */
3237         0xb3, 0xE8, 0xE9,
3238         /* test explicit halant */
3239         0xb3, 0xE8, 0xE8,
3240         /* test double danda */
3241         0xea, 0xea,
3242         /* test ASCII */
3243         0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
3244         0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
3245         0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
3246         /* test ATR code */
3247 
3248         /* tests from Lotus */
3249         0x61,0xEF,0x42,0xEF,0x30,0xB3,0x0D,0x0A,0xEF,0x43,0xB4,0x43,
3250         0xEF,0x42,0xCF,0xE8,0xD9,
3251         0xEF,0x42,0xA1,0x0D,0x0A,0xEF,0x42,0xA4,0xEF,0x43,0xA4,0x43,
3252         0xEF,0x42,0xB3,0xBF,0x2B,0xEF,0x42,0xCE,
3253         /* tamil range */
3254         0xEF, 0x44, 0xa5, 0xa6, 0xa7,
3255         /* telugu range */
3256         0xEF, 0x45,0xa4, 0xa2, 0xa3,0xd0,
3257         /* kannada range */
3258         0xEF, 0x48,0xa4, 0xa2, 0xa3,
3259         /* anudatta and abbreviation sign */
3260         0xEF, 0x42, 0xF0, 0xBF, 0xF0, 0xB8,
3261 
3262 
3263         0xAA, 0xE9,/* RI + NUKTA 0x0960*/
3264 
3265         0xDF, 0xE9,/* Vowel sign RI + NUKTA 0x0944*/
3266 
3267         0xa6, 0xE9,/* Vowel I + NUKTA 0x090C*/
3268 
3269         0xdb, 0xE9,/* Vowel sign I + Nukta 0x0962*/
3270 
3271         0xa7, 0xE9,/* Vowel II + NUKTA 0x0961*/
3272 
3273         0xdc, 0xE9,/* Vowel sign II + Nukta 0x0963*/
3274 
3275         0xa1, 0xE9,/* chandrabindu + Nukta 0x0950*/
3276 
3277         0xEA, 0xE9, /* Danda + Nukta 0x093D*/
3278 
3279         0xB3, 0xE9, /* Ka + NUKTA */
3280 
3281         0xB4, 0xE9, /* Kha + NUKTA */
3282 
3283         0xB5, 0xE9, /* Ga + NUKTA */
3284 
3285         0xBA, 0xE9,
3286 
3287         0xBF, 0xE9,
3288 
3289         0xC0, 0xE9,
3290 
3291         0xC9, 0xE9,
3292         /* INV halant RA    */
3293         0xD9, 0xE8, 0xCF,
3294         0x00, 0x00A0,
3295         /* just consume unhandled codepoints */
3296         0xEF, 0x30,
3297 
3298     };
3299     testConvertToU(byteArr,(sizeof(byteArr)),in,UPRV_LENGTHOF(in),"x-iscii-de",NULL,TRUE);
3300     TestConv(in,(sizeof(in)/2),"ISCII,version=0","hindi", (char *)byteArr,sizeof(byteArr));
3301 
3302 }
3303 
3304 static void
TestISO_2022_JP()3305 TestISO_2022_JP() {
3306     /* test input */
3307     static const uint16_t in[]={
3308         0x0041,/*0x00E9,*/0x3000, 0x3001, 0x3002, 0x0020, 0x000D, 0x000A,
3309         0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3310         0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3311         0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
3312         0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
3313         0x201D, 0x3014, 0x000D, 0x000A,
3314         0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3315         0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3316         };
3317     const UChar* uSource;
3318     const UChar* uSourceLimit;
3319     const char* cSource;
3320     const char* cSourceLimit;
3321     UChar *uTargetLimit =NULL;
3322     UChar *uTarget;
3323     char *cTarget;
3324     const char *cTargetLimit;
3325     char *cBuf = NULL;
3326     UChar *uBuf = NULL;
3327     UChar *test;
3328     int32_t uBufSize = 120;
3329     UErrorCode errorCode=U_ZERO_ERROR;
3330     UConverter *cnv = NULL;
3331     int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3332     int32_t* myOff= offsets;
3333     cnv=ucnv_open("ISO_2022_JP_1", &errorCode);
3334     if(U_FAILURE(errorCode)) {
3335         log_data_err("Unable to open an ISO_2022_JP_1 converter: %s\n", u_errorName(errorCode));
3336         goto cleanup;
3337     }
3338 
3339     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3340     cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3341     uSource = (const UChar*)in;
3342     uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
3343     cTarget = cBuf;
3344     cTargetLimit = cBuf +uBufSize*5;
3345     uTarget = uBuf;
3346     uTargetLimit = uBuf+ uBufSize*5;
3347     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3348     if(U_FAILURE(errorCode)){
3349         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3350         goto cleanup;
3351     }
3352     cSource = cBuf;
3353     cSourceLimit =cTarget;
3354     test =uBuf;
3355     myOff=offsets;
3356     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3357     if(U_FAILURE(errorCode)){
3358         log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3359         goto cleanup;
3360     }
3361 
3362     uSource = (const UChar*)in;
3363     while(uSource<uSourceLimit){
3364         if(*test!=*uSource){
3365 
3366             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3367         }
3368         uSource++;
3369         test++;
3370     }
3371 
3372     TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3373     TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3374     TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-JP encoding");
3375     TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3376     TestJitterbug930("csISO2022JP");
3377 
3378 cleanup:
3379     ucnv_close(cnv);
3380     free(uBuf);
3381     free(cBuf);
3382     free(offsets);
3383 }
3384 
TestConv(const uint16_t in[],int len,const char * conv,const char * lang,char byteArr[],int byteArrLen)3385 static void TestConv(const uint16_t in[],int len, const char* conv, const char* lang, char byteArr[],int byteArrLen){
3386     const UChar* uSource;
3387     const UChar* uSourceLimit;
3388     const char* cSource;
3389     const char* cSourceLimit;
3390     UChar *uTargetLimit =NULL;
3391     UChar *uTarget;
3392     char *cTarget;
3393     const char *cTargetLimit;
3394     char *cBuf;
3395     UChar *uBuf,*test;
3396     int32_t uBufSize = 120*10;
3397     UErrorCode errorCode=U_ZERO_ERROR;
3398     UConverter *cnv;
3399     int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) );
3400     int32_t* myOff= offsets;
3401     cnv=my_ucnv_open(conv, &errorCode);
3402     if(U_FAILURE(errorCode)) {
3403         log_data_err("Unable to open a %s converter: %s\n", conv, u_errorName(errorCode));
3404         return;
3405     }
3406 
3407     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar));
3408     cBuf =(char*)malloc(uBufSize * sizeof(char));
3409     uSource = (const UChar*)in;
3410     uSourceLimit=uSource+len;
3411     cTarget = cBuf;
3412     cTargetLimit = cBuf +uBufSize;
3413     uTarget = uBuf;
3414     uTargetLimit = uBuf+ uBufSize;
3415     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3416     if(U_FAILURE(errorCode)){
3417         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3418         return;
3419     }
3420     /*log_verbose("length of compressed string for language %s using %s:%i \n",conv,lang,(cTarget-cBuf));*/
3421     cSource = cBuf;
3422     cSourceLimit =cTarget;
3423     test =uBuf;
3424     myOff=offsets;
3425     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3426     if(U_FAILURE(errorCode)){
3427         log_err("ucnv_toUnicode conversion failed, reason: %s\n", u_errorName(errorCode));
3428         return;
3429     }
3430 
3431     uSource = (const UChar*)in;
3432     while(uSource<uSourceLimit){
3433         if(*test!=*uSource){
3434             log_err("for codepage %s : Expected : \\u%04X \t Got: \\u%04X\n",conv,*uSource,(int)*test) ;
3435         }
3436         uSource++;
3437         test++;
3438     }
3439     TestSmallTargetBuffer(in,(const UChar*)&in[len],cnv);
3440     TestSmallSourceBuffer(in,(const UChar*)&in[len],cnv);
3441     TestGetNextUChar2022(cnv, cBuf, cTarget, in, conv);
3442     if(byteArr && byteArrLen!=0){
3443         TestGetNextUChar2022(cnv, byteArr, (byteArr+byteArrLen), in, lang);
3444         TestToAndFromUChars(in,(const UChar*)&in[len],cnv);
3445         {
3446             cSource = byteArr;
3447             cSourceLimit = cSource+byteArrLen;
3448             test=uBuf;
3449             myOff = offsets;
3450             ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3451             if(U_FAILURE(errorCode)){
3452                 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3453                 return;
3454             }
3455 
3456             uSource = (const UChar*)in;
3457             while(uSource<uSourceLimit){
3458                 if(*test!=*uSource){
3459                     log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3460                 }
3461                 uSource++;
3462                 test++;
3463             }
3464         }
3465     }
3466 
3467     ucnv_close(cnv);
3468     free(uBuf);
3469     free(cBuf);
3470     free(offsets);
3471 }
3472 static UChar U_CALLCONV
_charAt(int32_t offset,void * context)3473 _charAt(int32_t offset, void *context) {
3474     return ((char*)context)[offset];
3475 }
3476 
3477 static int32_t
unescape(UChar * dst,int32_t dstLen,const char * src,int32_t srcLen,UErrorCode * status)3478 unescape(UChar* dst, int32_t dstLen,const char* src,int32_t srcLen,UErrorCode *status){
3479     int32_t srcIndex=0;
3480     int32_t dstIndex=0;
3481     if(U_FAILURE(*status)){
3482         return 0;
3483     }
3484     if((dst==NULL && dstLen>0) || (src==NULL ) || dstLen < -1 || srcLen <-1 ){
3485         *status = U_ILLEGAL_ARGUMENT_ERROR;
3486         return 0;
3487     }
3488     if(srcLen==-1){
3489         srcLen = (int32_t)uprv_strlen(src);
3490     }
3491 
3492     for (; srcIndex<srcLen; ) {
3493         UChar32 c = src[srcIndex++];
3494         if (c == 0x005C /*'\\'*/) {
3495             c = u_unescapeAt(_charAt,&srcIndex,srcLen,(void*)src); /* advances i*/
3496             if (c == (UChar32)0xFFFFFFFF) {
3497                 *status=U_INVALID_CHAR_FOUND; /* return empty string */
3498                 break; /* invalid escape sequence */
3499             }
3500         }
3501         if(dstIndex < dstLen){
3502             if(c>0xFFFF){
3503                dst[dstIndex++] = U16_LEAD(c);
3504                if(dstIndex<dstLen){
3505                     dst[dstIndex]=U16_TRAIL(c);
3506                }else{
3507                    *status=U_BUFFER_OVERFLOW_ERROR;
3508                }
3509             }else{
3510                 dst[dstIndex]=(UChar)c;
3511             }
3512 
3513         }else{
3514             *status = U_BUFFER_OVERFLOW_ERROR;
3515         }
3516         dstIndex++; /* for preflighting */
3517     }
3518     return dstIndex;
3519 }
3520 
3521 static void
TestFullRoundtrip(const char * cp)3522 TestFullRoundtrip(const char* cp){
3523     UChar usource[10] ={0};
3524     UChar nsrc[10] = {0};
3525     uint32_t i=1;
3526     int len=0, ulen;
3527     nsrc[0]=0x0061;
3528     /* Test codepoint 0 */
3529     TestConv(usource,1,cp,"",NULL,0);
3530     TestConv(usource,2,cp,"",NULL,0);
3531     nsrc[2]=0x5555;
3532     TestConv(nsrc,3,cp,"",NULL,0);
3533 
3534     for(;i<=0x10FFFF;i++){
3535         if(i==0xD800){
3536             i=0xDFFF;
3537             continue;
3538         }
3539         if(i<=0xFFFF){
3540             usource[0] =(UChar) i;
3541             len=1;
3542         }else{
3543             usource[0]=U16_LEAD(i);
3544             usource[1]=U16_TRAIL(i);
3545             len=2;
3546         }
3547         ulen=len;
3548         if(i==0x80) {
3549             usource[2]=0;
3550         }
3551         /* Test only single code points */
3552         TestConv(usource,ulen,cp,"",NULL,0);
3553         /* Test codepoint repeated twice */
3554         usource[ulen]=usource[0];
3555         usource[ulen+1]=usource[1];
3556         ulen+=len;
3557         TestConv(usource,ulen,cp,"",NULL,0);
3558         /* Test codepoint repeated 3 times */
3559         usource[ulen]=usource[0];
3560         usource[ulen+1]=usource[1];
3561         ulen+=len;
3562         TestConv(usource,ulen,cp,"",NULL,0);
3563         /* Test codepoint in between 2 codepoints */
3564         nsrc[1]=usource[0];
3565         nsrc[2]=usource[1];
3566         nsrc[len+1]=0x5555;
3567         TestConv(nsrc,len+2,cp,"",NULL,0);
3568         uprv_memset(usource,0,sizeof(UChar)*10);
3569     }
3570 }
3571 
3572 static void
TestRoundTrippingAllUTF(void)3573 TestRoundTrippingAllUTF(void){
3574     if(!getTestOption(QUICK_OPTION)){
3575         log_verbose("Running exhaustive round trip test for BOCU-1\n");
3576         TestFullRoundtrip("BOCU-1");
3577         log_verbose("Running exhaustive round trip test for SCSU\n");
3578         TestFullRoundtrip("SCSU");
3579         log_verbose("Running exhaustive round trip test for UTF-8\n");
3580         TestFullRoundtrip("UTF-8");
3581         log_verbose("Running exhaustive round trip test for CESU-8\n");
3582         TestFullRoundtrip("CESU-8");
3583         log_verbose("Running exhaustive round trip test for UTF-16BE\n");
3584         TestFullRoundtrip("UTF-16BE");
3585         log_verbose("Running exhaustive round trip test for UTF-16LE\n");
3586         TestFullRoundtrip("UTF-16LE");
3587         log_verbose("Running exhaustive round trip test for UTF-16\n");
3588         TestFullRoundtrip("UTF-16");
3589         log_verbose("Running exhaustive round trip test for UTF-32BE\n");
3590         TestFullRoundtrip("UTF-32BE");
3591         log_verbose("Running exhaustive round trip test for UTF-32LE\n");
3592         TestFullRoundtrip("UTF-32LE");
3593         log_verbose("Running exhaustive round trip test for UTF-32\n");
3594         TestFullRoundtrip("UTF-32");
3595         log_verbose("Running exhaustive round trip test for UTF-7\n");
3596         TestFullRoundtrip("UTF-7");
3597         log_verbose("Running exhaustive round trip test for UTF-7\n");
3598         TestFullRoundtrip("UTF-7,version=1");
3599         log_verbose("Running exhaustive round trip test for IMAP-mailbox-name\n");
3600         TestFullRoundtrip("IMAP-mailbox-name");
3601         /*
3602          *
3603          * With the update to GB18030 2005 (Ticket #8274), this test will fail because the 2005 version of
3604          * GB18030 contains mappings to actual Unicode codepoints (which were previously mapped to PUA).
3605          * The old mappings remain as fallbacks.
3606          * This test may be reintroduced at a later time.
3607          *
3608          * 110118 - mow
3609          */
3610          /*
3611          log_verbose("Running exhaustive round trip test for GB18030\n");
3612          TestFullRoundtrip("GB18030");
3613          */
3614     }
3615 }
3616 
3617 static void
TestSCSU()3618 TestSCSU() {
3619 
3620     static const uint16_t germanUTF16[]={
3621         0x00d6, 0x006c, 0x0020, 0x0066, 0x006c, 0x0069, 0x0065, 0x00df, 0x0074
3622     };
3623 
3624     static const uint8_t germanSCSU[]={
3625         0xd6, 0x6c, 0x20, 0x66, 0x6c, 0x69, 0x65, 0xdf, 0x74
3626     };
3627 
3628     static const uint16_t russianUTF16[]={
3629         0x041c, 0x043e, 0x0441, 0x043a, 0x0432, 0x0430
3630     };
3631 
3632     static const uint8_t russianSCSU[]={
3633         0x12, 0x9c, 0xbe, 0xc1, 0xba, 0xb2, 0xb0
3634     };
3635 
3636     static const uint16_t japaneseUTF16[]={
3637         0x3000, 0x266a, 0x30ea, 0x30f3, 0x30b4, 0x53ef, 0x611b,
3638         0x3044, 0x3084, 0x53ef, 0x611b, 0x3044, 0x3084, 0x30ea, 0x30f3,
3639         0x30b4, 0x3002, 0x534a, 0x4e16, 0x7d00, 0x3082, 0x524d, 0x306b,
3640         0x6d41, 0x884c, 0x3057, 0x305f, 0x300c, 0x30ea, 0x30f3, 0x30b4,
3641         0x306e, 0x6b4c, 0x300d, 0x304c, 0x3074, 0x3063, 0x305f, 0x308a,
3642         0x3059, 0x308b, 0x304b, 0x3082, 0x3057, 0x308c, 0x306a, 0x3044,
3643         0x3002, 0x7c73, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x30b3, 0x30f3,
3644         0x30d4, 0x30e5, 0x30fc, 0x30bf, 0x793e, 0x306e, 0x30d1, 0x30bd,
3645         0x30b3, 0x30f3, 0x300c, 0x30de, 0x30c3, 0x30af, 0xff08, 0x30de,
3646         0x30c3, 0x30ad, 0x30f3, 0x30c8, 0x30c3, 0x30b7, 0x30e5, 0xff09,
3647         0x300d, 0x3092, 0x3001, 0x3053, 0x3088, 0x306a, 0x304f, 0x611b,
3648         0x3059, 0x308b, 0x4eba, 0x305f, 0x3061, 0x306e, 0x3053, 0x3068,
3649         0x3060, 0x3002, 0x300c, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x4fe1,
3650         0x8005, 0x300d, 0x306a, 0x3093, 0x3066, 0x8a00, 0x3044, 0x65b9,
3651         0x307e, 0x3067, 0x3042, 0x308b, 0x3002
3652     };
3653 
3654     /* SCSUEncoder produces a slightly longer result (179B vs. 178B) because of one different choice:
3655      it uses an SQn once where a longer look-ahead could have shown that SCn is more efficient */
3656     static const uint8_t japaneseSCSU[]={
3657         0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
3658         0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
3659         0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
3660         0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
3661         0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
3662         0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
3663         0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
3664         0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
3665         0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
3666         0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
3667         0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
3668         0xcb, 0x82
3669     };
3670 
3671     static const uint16_t allFeaturesUTF16[]={
3672         0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 0x01df, 0xf000, 0xdbff,
3673         0xdfff, 0x000d, 0x000a, 0x0041, 0x00df, 0x0401, 0x015f, 0x00df,
3674         0x01df, 0xf000, 0xdbff, 0xdfff
3675     };
3676 
3677     /* see comment at japaneseSCSU: the same kind of different choice yields a slightly shorter
3678      * result here (34B vs. 35B)
3679      */
3680     static const uint8_t allFeaturesSCSU[]={
3681         0x41, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x1b, 0x03,
3682         0xdf, 0x1c, 0x88, 0x80, 0x0b, 0xbf, 0xff, 0xff, 0x0d, 0x0a,
3683         0x41, 0x10, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x13,
3684         0xdf, 0x14, 0x80, 0x15, 0xff
3685     };
3686     static const uint16_t monkeyIn[]={
3687         0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
3688         0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
3689         0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
3690         0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
3691         0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
3692         0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
3693         0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
3694         0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
3695         0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
3696         0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
3697         0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
3698         0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3699         0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3700         0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3701         0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
3702         0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
3703         0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
3704         0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
3705         0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A,
3706         /* test non-BMP code points */
3707         0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
3708         0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
3709         0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
3710         0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
3711         0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
3712         0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
3713         0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
3714         0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
3715         0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
3716         0xD869, 0xDED5, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xDBFF, 0xDFFF,
3717         0xDBFF, 0xDFFF, 0xDBFF, 0xDFFF,
3718 
3719 
3720         0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
3721         0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
3722         0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
3723         0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
3724         0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
3725     };
3726     static const char *fTestCases [] = {
3727           "\\ud800\\udc00", /* smallest surrogate*/
3728           "\\ud8ff\\udcff",
3729           "\\udBff\\udFff", /* largest surrogate pair*/
3730           "\\ud834\\udc00",
3731           "\\U0010FFFF",
3732           "Hello \\u9292 \\u9192 World!",
3733           "Hell\\u0429o \\u9292 \\u9192 W\\u00e4rld!",
3734           "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
3735 
3736           "\\u0648\\u06c8", /* catch missing reset*/
3737           "\\u0648\\u06c8",
3738 
3739           "\\u4444\\uE001", /* lowest quotable*/
3740           "\\u4444\\uf2FF", /* highest quotable*/
3741           "\\u4444\\uf188\\u4444",
3742           "\\u4444\\uf188\\uf288",
3743           "\\u4444\\uf188abc\\u0429\\uf288",
3744           "\\u9292\\u2222",
3745           "Hell\\u0429\\u04230o \\u9292 \\u9292W\\u00e4\\u0192rld!",
3746           "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
3747           "Hello World!123456",
3748           "Hello W\\u0081\\u011f\\u0082!", /* Latin 1 run*/
3749 
3750           "abc\\u0301\\u0302",  /* uses SQn for u301 u302*/
3751           "abc\\u4411d",      /* uses SQU*/
3752           "abc\\u4411\\u4412d",/* uses SCU*/
3753           "abc\\u0401\\u0402\\u047f\\u00a5\\u0405", /* uses SQn for ua5*/
3754           "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000", /* SJIS like data*/
3755           "\\u9292\\u2222",
3756           "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000",
3757           "\\u9999\\u3051\\u300c\\u9999\\u9999\\u3060\\u9999\\u3065\\u3065\\u3065\\u300c",
3758           "\\u3000\\u266a\\u30ea\\u30f3\\u30b4\\u53ef\\u611b\\u3044\\u3084\\u53ef\\u611b\\u3044\\u3084\\u30ea\\u30f3\\u30b4\\u3002",
3759 
3760           "", /* empty input*/
3761           "\\u0000", /* smallest BMP character*/
3762           "\\uFFFF", /* largest BMP character*/
3763 
3764           /* regression tests*/
3765           "\\u6441\\ub413\\ua733\\uf8fe\\ueedb\\u587f\\u195f\\u4899\\uf23d\\u49fd\\u0aac\\u5792\\ufc22\\ufc3c\\ufc46\\u00aa",
3766           "\\u00df\\u01df\\uf000\\udbff\\udfff\\u000d\n\\u0041\\u00df\\u0401\\u015f\\u00df\\u01df\\uf000\\udbff\\udfff",
3767           "\\u30f9\\u8321\\u05e5\\u181c\\ud72b\\u2019\\u99c9\\u2f2f\\uc10c\\u82e1\\u2c4d\\u1ebc\\u6013\\u66dc\\ubbde\\u94a5\\u4726\\u74af\\u3083\\u55b9\\u000c",
3768           "\\u0041\\u00df\\u0401\\u015f",
3769           "\\u9066\\u2123abc",
3770           "\\ud266\\u43d7\\u\\ue386\\uc9c0\\u4a6b\\u9222\\u901f\\u7410\\ua63f\\u539b\\u9596\\u482e\\u9d47\\ucfe4\\u7b71\\uc280\\uf26a\\u982f\\u862a\\u4edd\\uf513\\ufda6\\u869d\\u2ee0\\ua216\\u3ff6\\u3c70\\u89c0\\u9576\\ud5ec\\ubfda\\u6cca\\u5bb3\\ubcea\\u554c\\u914e\\ufa4a\\uede3\\u2990\\ud2f5\\u2729\\u5141\\u0f26\\uccd8\\u5413\\ud196\\ubbe2\\u51b9\\u9b48\\u0dc8\\u2195\\u21a2\\u21e9\\u00e4\\u9d92\\u0bc0\\u06c5",
3771           "\\uf95b\\u2458\\u2468\\u0e20\\uf51b\\ue36e\\ubfc1\\u0080\\u02dd\\uf1b5\\u0cf3\\u6059\\u7489",
3772     };
3773     int i=0;
3774     for(;i<UPRV_LENGTHOF(fTestCases);i++){
3775         const char* cSrc = fTestCases[i];
3776         UErrorCode status = U_ZERO_ERROR;
3777         int32_t cSrcLen,srcLen;
3778         UChar* src;
3779         /* UConverter* cnv = ucnv_open("SCSU",&status); */
3780         cSrcLen = srcLen = (int32_t)uprv_strlen(fTestCases[i]);
3781         src = (UChar*) malloc((sizeof(UChar) * srcLen) + sizeof(UChar));
3782         srcLen=unescape(src,srcLen,cSrc,cSrcLen,&status);
3783         log_verbose("Testing roundtrip for src: %s at index :%d\n",cSrc,i);
3784         TestConv(src,srcLen,"SCSU","Coverage",NULL,0);
3785         free(src);
3786     }
3787     TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features", (char *)allFeaturesSCSU,sizeof(allFeaturesSCSU));
3788     TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features",(char *)allFeaturesSCSU,sizeof(allFeaturesSCSU));
3789     TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU));
3790     TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU,locale=ja","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU));
3791     TestConv(germanUTF16,(sizeof(germanUTF16)/2),"SCSU","german",(char *)germanSCSU,sizeof(germanSCSU));
3792     TestConv(russianUTF16,(sizeof(russianUTF16)/2), "SCSU","russian",(char *)russianSCSU,sizeof(russianSCSU));
3793     TestConv(monkeyIn,(sizeof(monkeyIn)/2),"SCSU","monkey",NULL,0);
3794 }
3795 
3796 #if !UCONFIG_NO_LEGACY_CONVERSION
TestJitterbug2346()3797 static void TestJitterbug2346(){
3798     char source[] = { 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a,
3799                       0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a};
3800     uint16_t expected[] = {0x91CD,0x000D,0x000A,0x91CD,0x000D,0x000A};
3801 
3802     UChar uTarget[500]={'\0'};
3803     UChar* utarget=uTarget;
3804     UChar* utargetLimit=uTarget+sizeof(uTarget)/2;
3805 
3806     char cTarget[500]={'\0'};
3807     char* ctarget=cTarget;
3808     char* ctargetLimit=cTarget+sizeof(cTarget);
3809     const char* csource=source;
3810     UChar* temp = expected;
3811     UErrorCode err=U_ZERO_ERROR;
3812 
3813     UConverter* conv =ucnv_open("ISO_2022_JP",&err);
3814     if(U_FAILURE(err)) {
3815         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err));
3816         return;
3817     }
3818     ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(source),NULL,TRUE,&err);
3819     if(U_FAILURE(err)) {
3820         log_err("ISO_2022_JP to Unicode conversion failed: %s\n", u_errorName(err));
3821         return;
3822     }
3823     utargetLimit=utarget;
3824     utarget = uTarget;
3825     while(utarget<utargetLimit){
3826         if(*temp!=*utarget){
3827 
3828             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*utarget,(int)*temp) ;
3829         }
3830         utarget++;
3831         temp++;
3832     }
3833     ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err);
3834     if(U_FAILURE(err)) {
3835         log_err("ISO_2022_JP from Unicode conversion failed: %s\n", u_errorName(err));
3836         return;
3837     }
3838     ctargetLimit=ctarget;
3839     ctarget =cTarget;
3840     ucnv_close(conv);
3841 
3842 
3843 }
3844 
3845 static void
TestISO_2022_JP_1()3846 TestISO_2022_JP_1() {
3847     /* test input */
3848     static const uint16_t in[]={
3849         0x3000, 0x3001, 0x3002, 0x0020, 0xFF0E, 0x30FB, 0xFF1A, 0xFF1B, 0x000D, 0x000A,
3850         0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3851         0x52C8, 0x52CC, 0x52CF, 0x52D1, 0x52D4, 0x52D6, 0x52DB, 0x52DC, 0x000D, 0x000A,
3852         0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3853         0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
3854         0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
3855         0x201D, 0x000D, 0x000A,
3856         0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3857         0x4F94, 0x4F97, 0x52BA, 0x52BB, 0x52BD, 0x52C0, 0x52C4, 0x52C6, 0x000D, 0x000A,
3858         0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3859         0x4F78, 0x4F79, 0x4F7A, 0x4F7D, 0x4F7E, 0x4F81, 0x4F82, 0x4F84, 0x000D, 0x000A,
3860         0x4F85, 0x4F89, 0x4F8A, 0x4F8C, 0x4F8E, 0x4F90, 0x4F92, 0x4F93, 0x000D, 0x000A,
3861         0x52E1, 0x52E5, 0x52E8, 0x52E9, 0x000D, 0x000A
3862       };
3863     const UChar* uSource;
3864     const UChar* uSourceLimit;
3865     const char* cSource;
3866     const char* cSourceLimit;
3867     UChar *uTargetLimit =NULL;
3868     UChar *uTarget;
3869     char *cTarget;
3870     const char *cTargetLimit;
3871     char *cBuf;
3872     UChar *uBuf,*test;
3873     int32_t uBufSize = 120;
3874     UErrorCode errorCode=U_ZERO_ERROR;
3875     UConverter *cnv;
3876 
3877     cnv=ucnv_open("ISO_2022_JP_1", &errorCode);
3878     if(U_FAILURE(errorCode)) {
3879         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
3880         return;
3881     }
3882 
3883     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3884     cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3885     uSource = (const UChar*)in;
3886     uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
3887     cTarget = cBuf;
3888     cTargetLimit = cBuf +uBufSize*5;
3889     uTarget = uBuf;
3890     uTargetLimit = uBuf+ uBufSize*5;
3891     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,TRUE, &errorCode);
3892     if(U_FAILURE(errorCode)){
3893         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3894         return;
3895     }
3896     cSource = cBuf;
3897     cSourceLimit =cTarget;
3898     test =uBuf;
3899     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,TRUE,&errorCode);
3900     if(U_FAILURE(errorCode)){
3901         log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3902         return;
3903     }
3904     uSource = (const UChar*)in;
3905     while(uSource<uSourceLimit){
3906         if(*test!=*uSource){
3907 
3908             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3909         }
3910         uSource++;
3911         test++;
3912     }
3913     /*ucnv_close(cnv);
3914     cnv=ucnv_open("ISO_2022,locale=jp,version=1", &errorCode);*/
3915     /*Test for the condition where there is an invalid character*/
3916     ucnv_reset(cnv);
3917     {
3918         static const uint8_t source2[]={0x0e,0x24,0x053};
3919         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-1]");
3920     }
3921     TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3922     TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3923     ucnv_close(cnv);
3924     free(uBuf);
3925     free(cBuf);
3926 }
3927 
3928 static void
TestISO_2022_JP_2()3929 TestISO_2022_JP_2() {
3930     /* test input */
3931     static const uint16_t in[]={
3932         0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
3933         0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
3934         0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
3935         0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
3936         0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
3937         0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
3938         0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
3939         0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
3940         0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
3941         0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
3942         0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
3943         0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3944         0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3945         0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3946         0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
3947         0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
3948         0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
3949         0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
3950         0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A
3951       };
3952     const UChar* uSource;
3953     const UChar* uSourceLimit;
3954     const char* cSource;
3955     const char* cSourceLimit;
3956     UChar *uTargetLimit =NULL;
3957     UChar *uTarget;
3958     char *cTarget;
3959     const char *cTargetLimit;
3960     char *cBuf = NULL;
3961     UChar *uBuf = NULL;
3962     UChar *test;
3963     int32_t uBufSize = 120;
3964     UErrorCode errorCode=U_ZERO_ERROR;
3965     UConverter *cnv = NULL;
3966     int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3967     int32_t* myOff= offsets;
3968     cnv=ucnv_open("ISO_2022_JP_2", &errorCode);
3969     if(U_FAILURE(errorCode)) {
3970         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
3971         goto cleanup;
3972     }
3973 
3974     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3975     cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3976     uSource = (const UChar*)in;
3977     uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
3978     cTarget = cBuf;
3979     cTargetLimit = cBuf +uBufSize*5;
3980     uTarget = uBuf;
3981     uTargetLimit = uBuf+ uBufSize*5;
3982     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3983     if(U_FAILURE(errorCode)){
3984         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3985         goto cleanup;
3986     }
3987     cSource = cBuf;
3988     cSourceLimit =cTarget;
3989     test =uBuf;
3990     myOff=offsets;
3991     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3992     if(U_FAILURE(errorCode)){
3993         log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3994         goto cleanup;
3995     }
3996     uSource = (const UChar*)in;
3997     while(uSource<uSourceLimit){
3998         if(*test!=*uSource){
3999 
4000             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
4001         }
4002         uSource++;
4003         test++;
4004     }
4005     TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4006     TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4007     TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4008     /*Test for the condition where there is an invalid character*/
4009     ucnv_reset(cnv);
4010     {
4011         static const uint8_t source2[]={0x0e,0x24,0x053};
4012         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-2]");
4013     }
4014 
4015 cleanup:
4016     ucnv_close(cnv);
4017     free(uBuf);
4018     free(cBuf);
4019     free(offsets);
4020 }
4021 
4022 static void
TestISO_2022_KR()4023 TestISO_2022_KR() {
4024     /* test input */
4025     static const uint16_t in[]={
4026                     0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D
4027                    ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04
4028                    ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
4029                    ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
4030                    ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2
4031                    ,0x53E3,0x53E4,0x000A,0x000D};
4032     const UChar* uSource;
4033     const UChar* uSourceLimit;
4034     const char* cSource;
4035     const char* cSourceLimit;
4036     UChar *uTargetLimit =NULL;
4037     UChar *uTarget;
4038     char *cTarget;
4039     const char *cTargetLimit;
4040     char *cBuf = NULL;
4041     UChar *uBuf = NULL;
4042     UChar *test;
4043     int32_t uBufSize = 120;
4044     UErrorCode errorCode=U_ZERO_ERROR;
4045     UConverter *cnv = NULL;
4046     int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4047     int32_t* myOff= offsets;
4048     cnv=ucnv_open("ISO_2022,locale=kr", &errorCode);
4049     if(U_FAILURE(errorCode)) {
4050         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4051         goto cleanup;
4052     }
4053 
4054     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4055     cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
4056     uSource = (const UChar*)in;
4057     uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
4058     cTarget = cBuf;
4059     cTargetLimit = cBuf +uBufSize*5;
4060     uTarget = uBuf;
4061     uTargetLimit = uBuf+ uBufSize*5;
4062     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4063     if(U_FAILURE(errorCode)){
4064         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4065         goto cleanup;
4066     }
4067     cSource = cBuf;
4068     cSourceLimit =cTarget;
4069     test =uBuf;
4070     myOff=offsets;
4071     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4072     if(U_FAILURE(errorCode)){
4073         log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4074         goto cleanup;
4075     }
4076     uSource = (const UChar*)in;
4077     while(uSource<uSourceLimit){
4078         if(*test!=*uSource){
4079             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ;
4080         }
4081         uSource++;
4082         test++;
4083     }
4084     TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding");
4085     TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4086     TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4087     TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4088     TestJitterbug930("csISO2022KR");
4089     /*Test for the condition where there is an invalid character*/
4090     ucnv_reset(cnv);
4091     {
4092         static const uint8_t source2[]={0x1b,0x24,0x053};
4093         ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
4094         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]");
4095     }
4096 
4097 cleanup:
4098     ucnv_close(cnv);
4099     free(uBuf);
4100     free(cBuf);
4101     free(offsets);
4102 }
4103 
4104 static void
TestISO_2022_KR_1()4105 TestISO_2022_KR_1() {
4106     /* test input */
4107     static const uint16_t in[]={
4108                     0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D
4109                    ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04
4110                    ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
4111                    ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
4112                    ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2
4113                    ,0x53E3,0x53E4,0x000A,0x000D};
4114     const UChar* uSource;
4115     const UChar* uSourceLimit;
4116     const char* cSource;
4117     const char* cSourceLimit;
4118     UChar *uTargetLimit =NULL;
4119     UChar *uTarget;
4120     char *cTarget;
4121     const char *cTargetLimit;
4122     char *cBuf = NULL;
4123     UChar *uBuf = NULL;
4124     UChar *test;
4125     int32_t uBufSize = 120;
4126     UErrorCode errorCode=U_ZERO_ERROR;
4127     UConverter *cnv = NULL;
4128     int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4129     int32_t* myOff= offsets;
4130     cnv=ucnv_open("ibm-25546", &errorCode);
4131     if(U_FAILURE(errorCode)) {
4132         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4133         goto cleanup;
4134     }
4135 
4136     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4137     cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
4138     uSource = (const UChar*)in;
4139     uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
4140     cTarget = cBuf;
4141     cTargetLimit = cBuf +uBufSize*5;
4142     uTarget = uBuf;
4143     uTargetLimit = uBuf+ uBufSize*5;
4144     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4145     if(U_FAILURE(errorCode)){
4146         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4147         goto cleanup;
4148     }
4149     cSource = cBuf;
4150     cSourceLimit =cTarget;
4151     test =uBuf;
4152     myOff=offsets;
4153     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4154     if(U_FAILURE(errorCode)){
4155         log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4156         goto cleanup;
4157     }
4158     uSource = (const UChar*)in;
4159     while(uSource<uSourceLimit){
4160         if(*test!=*uSource){
4161             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ;
4162         }
4163         uSource++;
4164         test++;
4165     }
4166     ucnv_reset(cnv);
4167     TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding");
4168     TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4169     TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4170     ucnv_reset(cnv);
4171     TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4172         /*Test for the condition where there is an invalid character*/
4173     ucnv_reset(cnv);
4174     {
4175         static const uint8_t source2[]={0x1b,0x24,0x053};
4176         ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
4177         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]");
4178     }
4179 
4180 cleanup:
4181     ucnv_close(cnv);
4182     free(uBuf);
4183     free(cBuf);
4184     free(offsets);
4185 }
4186 
TestJitterbug2411()4187 static void TestJitterbug2411(){
4188     static const char* source = "\x1b\x24\x29\x43\x6b\x6b\x6e\x6e\x6a\x68\x70\x6f\x69\x75\x79\x71\x77\x65\x68\x67\x0A"
4189                          "\x1b\x24\x29\x43\x6a\x61\x73\x64\x66\x6a\x61\x73\x64\x66\x68\x6f\x69\x75\x79\x1b\x24\x29\x43";
4190     UConverter* kr=NULL, *kr1=NULL;
4191     UErrorCode errorCode = U_ZERO_ERROR;
4192     UChar tgt[100]={'\0'};
4193     UChar* target = tgt;
4194     UChar* targetLimit = target+100;
4195     kr=ucnv_open("iso-2022-kr", &errorCode);
4196     if(U_FAILURE(errorCode)) {
4197         log_data_err("Unable to open a iso-2022-kr converter: %s\n", u_errorName(errorCode));
4198         return;
4199     }
4200     ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode);
4201     if(U_FAILURE(errorCode)) {
4202         log_err("iso-2022-kr cannot handle multiple escape sequences : %s\n", u_errorName(errorCode));
4203         return;
4204     }
4205     kr1 = ucnv_open("ibm-25546", &errorCode);
4206     if(U_FAILURE(errorCode)) {
4207         log_data_err("Unable to open a iso-2022-kr_1 converter: %s\n", u_errorName(errorCode));
4208         return;
4209     }
4210     target = tgt;
4211     targetLimit = target+100;
4212     ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode);
4213 
4214     if(U_FAILURE(errorCode)) {
4215         log_err("iso-2022-kr_1 cannot handle multiple escape sequences : %s\n", u_errorName(errorCode));
4216         return;
4217     }
4218 
4219     ucnv_close(kr);
4220     ucnv_close(kr1);
4221 
4222 }
4223 
4224 static void
TestJIS()4225 TestJIS(){
4226     /* From Unicode moved to testdata/conversion.txt */
4227     /*To Unicode*/
4228     {
4229         static const uint8_t sampleTextJIS[] = {
4230             0x1b,0x28,0x48,0x41,0x42, /*jis-Roman*/
4231             0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
4232             0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
4233         };
4234         static const uint16_t expectedISO2022JIS[] = {
4235             0x0041, 0x0042,
4236             0xFF81, 0xFF82,
4237             0x3000
4238         };
4239         static const int32_t  toISO2022JISOffs[]={
4240             3,4,
4241             8,9,
4242             16
4243         };
4244 
4245         static const uint8_t sampleTextJIS7[] = {
4246             0x1b,0x28,0x48,0x41,0x42, /*JIS7-Roman*/
4247             0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
4248             0x1b,0x24,0x42,0x21,0x21,
4249             0x0e,0x41,0x42,0x0f,      /*Test Katakana set with SI and SO */
4250             0x21,0x22,
4251             0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
4252         };
4253         static const uint16_t expectedISO2022JIS7[] = {
4254             0x0041, 0x0042,
4255             0xFF81, 0xFF82,
4256             0x3000,
4257             0xFF81, 0xFF82,
4258             0x3001,
4259             0x3000
4260         };
4261         static const int32_t  toISO2022JIS7Offs[]={
4262             3,4,
4263             8,9,
4264             13,16,
4265             17,
4266             19,27
4267         };
4268         static const uint8_t sampleTextJIS8[] = {
4269             0x1b,0x28,0x48,0x41,0x42, /*JIS8-Roman*/
4270             0xa1,0xc8,0xd9,/*Katakana Set*/
4271             0x1b,0x28,0x42,
4272             0x41,0x42,
4273             0xb1,0xc3, /*Katakana Set*/
4274             0x1b,0x24,0x42,0x21,0x21
4275         };
4276         static const uint16_t expectedISO2022JIS8[] = {
4277             0x0041, 0x0042,
4278             0xff61, 0xff88, 0xff99,
4279             0x0041, 0x0042,
4280             0xff71, 0xff83,
4281             0x3000
4282         };
4283         static const int32_t  toISO2022JIS8Offs[]={
4284             3, 4,  5,  6,
4285             7, 11, 12, 13,
4286             14, 18,
4287         };
4288 
4289         testConvertToU(sampleTextJIS,sizeof(sampleTextJIS),expectedISO2022JIS,
4290             UPRV_LENGTHOF(expectedISO2022JIS),"JIS", toISO2022JISOffs,TRUE);
4291         testConvertToU(sampleTextJIS7,sizeof(sampleTextJIS7),expectedISO2022JIS7,
4292             UPRV_LENGTHOF(expectedISO2022JIS7),"JIS7", toISO2022JIS7Offs,TRUE);
4293         testConvertToU(sampleTextJIS8,sizeof(sampleTextJIS8),expectedISO2022JIS8,
4294             UPRV_LENGTHOF(expectedISO2022JIS8),"JIS8", toISO2022JIS8Offs,TRUE);
4295     }
4296 
4297 }
4298 
4299 
4300 #if 0
4301  ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
4302 
4303 static void TestJitterbug915(){
4304 /* tests for roundtripping of the below sequence
4305 \x1b$)G\x0E#!#"###$#%#&#'#(#)#*#+          / *plane 1 * /
4306 \x1b$*H\x1bN"!\x1bN""\x1bN"#\x1bN"$\x1bN"% / *plane 2 * /
4307 \x1b$+I\x1bO"D\x1bO"E\x1bO"F\x1bO"G\x1bO"H / *plane 3 * /
4308 \x1b$+J\x1bO!D\x1bO!E\x1bO"j\x1bO"k\x1bO"l / *plane 4 * /
4309 \x1b$+K\x1bO!t\x1bO"P\x1bO"Q\x1bO#7\x1bO"\ / *plane 5 * /
4310 \x1b$+L\x1bO!#\x1bO",\x1bO#N\x1bO!n\x1bO#q / *plane 6 * /
4311 \x1b$+M\x1bO"q\x1bO!N\x1bO!j\x1bO#:\x1bO#o / *plane 7 * /
4312 */
4313     static const char cSource[]={
4314         0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
4315         0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
4316         0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
4317         0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
4318         0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
4319         0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
4320         0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x2F, 0x2A, 0x70,
4321         0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
4322         0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
4323         0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
4324         0x1B, 0x4F, 0x22, 0x48, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
4325         0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
4326         0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
4327         0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
4328         0x4F, 0x22, 0x6C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
4329         0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
4330         0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
4331         0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
4332         0x22, 0x5C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
4333         0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
4334         0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
4335         0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
4336         0x23, 0x71, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
4337         0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
4338         0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
4339         0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
4340         0x6F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
4341         0x37, 0x20, 0x2A, 0x2F
4342     };
4343     UChar uTarget[500]={'\0'};
4344     UChar* utarget=uTarget;
4345     UChar* utargetLimit=uTarget+sizeof(uTarget)/2;
4346 
4347     char cTarget[500]={'\0'};
4348     char* ctarget=cTarget;
4349     char* ctargetLimit=cTarget+sizeof(cTarget);
4350     const char* csource=cSource;
4351     const char* tempSrc = cSource;
4352     UErrorCode err=U_ZERO_ERROR;
4353 
4354     UConverter* conv =ucnv_open("ISO_2022_CN_EXT",&err);
4355     if(U_FAILURE(err)) {
4356         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err));
4357         return;
4358     }
4359     ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(cSource),NULL,TRUE,&err);
4360     if(U_FAILURE(err)) {
4361         log_err("iso-2022-CN to Unicode conversion failed: %s\n", u_errorName(err));
4362         return;
4363     }
4364     utargetLimit=utarget;
4365     utarget = uTarget;
4366     ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err);
4367     if(U_FAILURE(err)) {
4368         log_err("iso-2022-CN from Unicode conversion failed: %s\n", u_errorName(err));
4369         return;
4370     }
4371     ctargetLimit=ctarget;
4372     ctarget =cTarget;
4373     while(ctarget<ctargetLimit){
4374         if(*ctarget != *tempSrc){
4375             log_err("j915[%d] Expected : \\x%02X \t Got: \\x%02X\n", (int)(ctarget-cTarget), *ctarget,(int)*tempSrc) ;
4376         }
4377         ++ctarget;
4378         ++tempSrc;
4379     }
4380 
4381     ucnv_close(conv);
4382 }
4383 
4384 static void
4385 TestISO_2022_CN_EXT() {
4386     /* test input */
4387     static const uint16_t in[]={
4388                 /* test Non-BMP code points */
4389          0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
4390          0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
4391          0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
4392          0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
4393          0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
4394          0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
4395          0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
4396          0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
4397          0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
4398          0xD869, 0xDED5,
4399 
4400          0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
4401          0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
4402          0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
4403          0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
4404          0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
4405          0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
4406          0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
4407          0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
4408          0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
4409          0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
4410          0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
4411          0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
4412          0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
4413          0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x3443, 0x3444, 0x000D, 0x000A,
4414          0x3445, 0x3449, 0x344A, 0x344B, 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A,
4415          0x60F6, 0x60F7, 0x60F8, 0x60F9, 0x60FA, 0x60FB, 0x60FC, 0x60FD, 0x000D, 0x000A,
4416          0x60FE, 0x60FF, 0x6100, 0x6101, 0x6102, 0x0041, 0x0042, 0x0043, 0x000D, 0x000A,
4417          0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x004B, 0x000D, 0x000A,
4418 
4419          0x33E7, 0x33E8, 0x33E9, 0x33EA, 0x000D, 0x000A
4420 
4421       };
4422 
4423     const UChar* uSource;
4424     const UChar* uSourceLimit;
4425     const char* cSource;
4426     const char* cSourceLimit;
4427     UChar *uTargetLimit =NULL;
4428     UChar *uTarget;
4429     char *cTarget;
4430     const char *cTargetLimit;
4431     char *cBuf = NULL;
4432     UChar *uBuf = NULL;
4433     UChar *test;
4434     int32_t uBufSize = 180;
4435     UErrorCode errorCode=U_ZERO_ERROR;
4436     UConverter *cnv = NULL;
4437     int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4438     int32_t* myOff= offsets;
4439     cnv=ucnv_open("ISO_2022,locale=cn,version=1", &errorCode);
4440     if(U_FAILURE(errorCode)) {
4441         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4442         goto cleanup;
4443     }
4444 
4445     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4446     cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
4447     uSource = (const UChar*)in;
4448     uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
4449     cTarget = cBuf;
4450     cTargetLimit = cBuf +uBufSize*5;
4451     uTarget = uBuf;
4452     uTargetLimit = uBuf+ uBufSize*5;
4453     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4454     if(U_FAILURE(errorCode)){
4455         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4456         goto cleanup;
4457     }
4458     cSource = cBuf;
4459     cSourceLimit =cTarget;
4460     test =uBuf;
4461     myOff=offsets;
4462     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4463     if(U_FAILURE(errorCode)){
4464         log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4465         goto cleanup;
4466     }
4467     uSource = (const UChar*)in;
4468     while(uSource<uSourceLimit){
4469         if(*test!=*uSource){
4470             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
4471         }
4472         else{
4473             log_verbose("      Got: \\u%04X\n",(int)*test) ;
4474         }
4475         uSource++;
4476         test++;
4477     }
4478     TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4479     TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4480     /*Test for the condition where there is an invalid character*/
4481     ucnv_reset(cnv);
4482     {
4483         static const uint8_t source2[]={0x0e,0x24,0x053};
4484         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN-EXT]");
4485     }
4486 
4487 cleanup:
4488     ucnv_close(cnv);
4489     free(uBuf);
4490     free(cBuf);
4491     free(offsets);
4492 }
4493 #endif
4494 
4495 static void
TestISO_2022_CN()4496 TestISO_2022_CN() {
4497     /* test input */
4498     static const uint16_t in[]={
4499          /* jitterbug 951 */
4500          0xFF2D, 0xFF49, 0xFF58, 0xFF45, 0xFF44, 0x0020, 0xFF43, 0xFF48, 0xFF41, 0xFF52,
4501          0x0020, 0xFF06, 0x0020, 0xFF11, 0xFF12, 0xFF13, 0xFF14, 0xFF15, 0xFF16, 0xFF17,
4502          0xFF18, 0xFF19, 0xFF10, 0x0020, 0xFF4E, 0xFF55, 0xFF4D, 0xFF42, 0xFF45, 0xFF52,
4503          0x0020, 0xFF54, 0xFF45, 0xFF53, 0xFF54, 0x0020, 0xFF4C, 0xFF49, 0xFF4E, 0xFF45,
4504          0x0020, 0x0045, 0x004e, 0x0044,
4505          /**/
4506          0x4E00, 0x4E00, 0x4E01, 0x4E03, 0x60F6, 0x60F7, 0x60F8, 0x60FB, 0x000D, 0x000A,
4507          0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x60FB, 0x60FC, 0x000D, 0x000A,
4508          0x4E07, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x0042, 0x0043, 0x000D, 0x000A,
4509          0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
4510          0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
4511          0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
4512          0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
4513          0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
4514          0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
4515          0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
4516          0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
4517          0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
4518          0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x60FE, 0x60FF, 0x000D, 0x000A,
4519          0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A, 0x60F9, 0x60FA, 0x000D, 0x000A,
4520          0x6100, 0x6101, 0x0041, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
4521          0x247D, 0x247E, 0x247F, 0x2480, 0x2481, 0x2482, 0x2483, 0x2484, 0x2485, 0x2486,
4522          0x2487, 0x2460, 0x2461, 0xFF20, 0xFF21, 0xFF22, 0x0049, 0x004A, 0x000D, 0x000A,
4523 
4524       };
4525     const UChar* uSource;
4526     const UChar* uSourceLimit;
4527     const char* cSource;
4528     const char* cSourceLimit;
4529     UChar *uTargetLimit =NULL;
4530     UChar *uTarget;
4531     char *cTarget;
4532     const char *cTargetLimit;
4533     char *cBuf = NULL;
4534     UChar *uBuf = NULL;
4535     UChar *test;
4536     int32_t uBufSize = 180;
4537     UErrorCode errorCode=U_ZERO_ERROR;
4538     UConverter *cnv = NULL;
4539     int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4540     int32_t* myOff= offsets;
4541     cnv=ucnv_open("ISO_2022,locale=cn,version=0", &errorCode);
4542     if(U_FAILURE(errorCode)) {
4543         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4544         goto cleanup;
4545     }
4546 
4547     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4548     cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
4549     uSource = (const UChar*)in;
4550     uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
4551     cTarget = cBuf;
4552     cTargetLimit = cBuf +uBufSize*5;
4553     uTarget = uBuf;
4554     uTargetLimit = uBuf+ uBufSize*5;
4555     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4556     if(U_FAILURE(errorCode)){
4557         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4558         goto cleanup;
4559     }
4560     cSource = cBuf;
4561     cSourceLimit =cTarget;
4562     test =uBuf;
4563     myOff=offsets;
4564     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4565     if(U_FAILURE(errorCode)){
4566         log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4567         goto cleanup;
4568     }
4569     uSource = (const UChar*)in;
4570     while(uSource<uSourceLimit){
4571         if(*test!=*uSource){
4572             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
4573         }
4574         else{
4575             log_verbose("      Got: \\u%04X\n",(int)*test) ;
4576         }
4577         uSource++;
4578         test++;
4579     }
4580     TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-CN encoding");
4581     TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4582     TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4583     TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4584     TestJitterbug930("csISO2022CN");
4585     /*Test for the condition where there is an invalid character*/
4586     ucnv_reset(cnv);
4587     {
4588         static const uint8_t source2[]={0x0e,0x24,0x053};
4589         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN]");
4590     }
4591 
4592 cleanup:
4593     ucnv_close(cnv);
4594     free(uBuf);
4595     free(cBuf);
4596     free(offsets);
4597 }
4598 
4599 /* Tests for empty segments in ISO-2022-JP/KR/CN, HZ, check that UConverterCallbackReason is UCNV_IRREGULAR */
4600 typedef struct {
4601     const char *    converterName;
4602     const char *    inputText;
4603     int             inputTextLength;
4604 } EmptySegmentTest;
4605 
4606 /* Callback for TestJitterbug6175, should only get called for empty segment errors */
UCNV_TO_U_CALLBACK_EMPTYSEGMENT(const void * context,UConverterToUnicodeArgs * toArgs,const char * codeUnits,int32_t length,UConverterCallbackReason reason,UErrorCode * err)4607 static void UCNV_TO_U_CALLBACK_EMPTYSEGMENT( const void *context, UConverterToUnicodeArgs *toArgs, const char* codeUnits,
4608                                              int32_t length, UConverterCallbackReason reason, UErrorCode * err ) {
4609     // suppress compiler warnings about unused variables
4610     (void)context;
4611     (void)codeUnits;
4612     (void)length;
4613     if (reason > UCNV_IRREGULAR) {
4614         return;
4615     }
4616     if (reason != UCNV_IRREGULAR) {
4617         log_err("toUnicode callback invoked for empty segment but reason is not UCNV_IRREGULAR\n");
4618     }
4619     /* Standard stuff below from UCNV_TO_U_CALLBACK_SUBSTITUTE */
4620     *err = U_ZERO_ERROR;
4621     ucnv_cbToUWriteSub(toArgs,0,err);
4622 }
4623 
4624 enum { kEmptySegmentToUCharsMax = 64 };
TestJitterbug6175(void)4625 static void TestJitterbug6175(void) {
4626     static const char  iso2022jp_a[] = { 0x61, 0x62, 0x1B,0x24,0x42, 0x1B,0x28,0x42, 0x63, 0x64, 0x0D, 0x0A };
4627     static const char  iso2022kr_a[] = { 0x1B,0x24,0x29,0x43, 0x61, 0x0E, 0x0F, 0x62, 0x0D, 0x0A };
4628     static const char  iso2022cn_a[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x0F, 0x1B,0x24,0x2A,0x48, 0x1B,0x4E, 0x6A,0x65, 0x63, 0x0D, 0x0A };
4629     static const char  iso2022cn_b[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x1B,0x24,0x29,0x47, 0x68,0x64, 0x0F, 0x63, 0x0D, 0x0A };
4630     static const char  hzGB2312_a[]  = { 0x61, 0x62, 0x7E,0x7B, 0x7E,0x7D, 0x63, 0x64 };
4631     static const EmptySegmentTest emptySegmentTests[] = {
4632         /* converterName inputText    inputTextLength */
4633         { "ISO-2022-JP", iso2022jp_a, sizeof(iso2022jp_a) },
4634         { "ISO-2022-KR", iso2022kr_a, sizeof(iso2022kr_a) },
4635         { "ISO-2022-CN", iso2022cn_a, sizeof(iso2022cn_a) },
4636         { "ISO-2022-CN", iso2022cn_b, sizeof(iso2022cn_b) },
4637         { "HZ-GB-2312",  hzGB2312_a,  sizeof(hzGB2312_a)  },
4638         /* terminator: */
4639         { NULL,          NULL,        0,                  }
4640     };
4641     const EmptySegmentTest * testPtr;
4642     for (testPtr = emptySegmentTests; testPtr->converterName != NULL; ++testPtr) {
4643         UErrorCode   err = U_ZERO_ERROR;
4644         UConverter * cnv = ucnv_open(testPtr->converterName, &err);
4645         if (U_FAILURE(err)) {
4646             log_data_err("Unable to open %s converter: %s\n", testPtr->converterName, u_errorName(err));
4647             return;
4648         }
4649         ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_EMPTYSEGMENT, NULL, NULL, NULL, &err);
4650         if (U_FAILURE(err)) {
4651             log_data_err("Unable to setToUCallBack for %s converter: %s\n", testPtr->converterName, u_errorName(err));
4652             ucnv_close(cnv);
4653             return;
4654         }
4655         {
4656             UChar         toUChars[kEmptySegmentToUCharsMax];
4657             UChar *       toUCharsPtr = toUChars;
4658             const UChar * toUCharsLimit = toUCharsPtr + kEmptySegmentToUCharsMax;
4659             const char *  inCharsPtr = testPtr->inputText;
4660             const char *  inCharsLimit = inCharsPtr + testPtr->inputTextLength;
4661             ucnv_toUnicode(cnv, &toUCharsPtr, toUCharsLimit, &inCharsPtr, inCharsLimit, NULL, TRUE, &err);
4662         }
4663         ucnv_close(cnv);
4664     }
4665 }
4666 
4667 static void
TestEBCDIC_STATEFUL()4668 TestEBCDIC_STATEFUL() {
4669     /* test input */
4670     static const uint8_t in[]={
4671         0x61,
4672         0x1a,
4673         0x0f, 0x4b,
4674         0x42,
4675         0x40,
4676         0x36,
4677     };
4678 
4679     /* expected test results */
4680     static const int32_t results[]={
4681         /* number of bytes read, code point */
4682         1, 0x002f,
4683         1, 0x0092,
4684         2, 0x002e,
4685         1, 0xff62,
4686         1, 0x0020,
4687         1, 0x0096,
4688 
4689     };
4690     static const uint8_t in2[]={
4691         0x0f,
4692         0xa1,
4693         0x01
4694     };
4695 
4696     /* expected test results */
4697     static const int32_t results2[]={
4698         /* number of bytes read, code point */
4699         2, 0x203E,
4700         1, 0x0001,
4701     };
4702 
4703     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
4704     UErrorCode errorCode=U_ZERO_ERROR;
4705     UConverter *cnv=ucnv_open("ibm-930", &errorCode);
4706     if(U_FAILURE(errorCode)) {
4707         log_data_err("Unable to open a EBCDIC_STATEFUL(ibm-930) converter: %s\n", u_errorName(errorCode));
4708         return;
4709     }
4710     TestNextUChar(cnv, source, limit, results, "EBCDIC_STATEFUL(ibm-930)");
4711     ucnv_reset(cnv);
4712      /* Test the condition when source >= sourceLimit */
4713     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
4714     ucnv_reset(cnv);
4715     /*Test for the condition where source > sourcelimit after consuming the shift chracter */
4716     {
4717         static const uint8_t source1[]={0x0f};
4718         TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_INDEX_OUTOFBOUNDS_ERROR, "a character is truncated");
4719     }
4720     /*Test for the condition where there is an invalid character*/
4721     ucnv_reset(cnv);
4722     {
4723         static const uint8_t source2[]={0x0e, 0x7F, 0xFF};
4724         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [EBCDIC STATEFUL]");
4725     }
4726     ucnv_reset(cnv);
4727     source=(const char*)in2;
4728     limit=(const char*)in2+sizeof(in2);
4729     TestNextUChar(cnv,source,limit,results2,"EBCDIC_STATEFUL(ibm-930),seq#2");
4730     ucnv_close(cnv);
4731 
4732 }
4733 
4734 static void
TestGB18030()4735 TestGB18030() {
4736     /* test input */
4737     static const uint8_t in[]={
4738         0x24,
4739         0x7f,
4740         0x81, 0x30, 0x81, 0x30,
4741         0xa8, 0xbf,
4742         0xa2, 0xe3,
4743         0xd2, 0xbb,
4744         0x82, 0x35, 0x8f, 0x33,
4745         0x84, 0x31, 0xa4, 0x39,
4746         0x90, 0x30, 0x81, 0x30,
4747         0xe3, 0x32, 0x9a, 0x35
4748 #if 0
4749         /*
4750          * Feature removed   markus 2000-oct-26
4751          * Only some codepages must match surrogate pairs into supplementary code points -
4752          * see javadoc for ucnv_getNextUChar() and implementation notes in ucnvmbcs.c .
4753          * GB 18030 provides direct encodings for supplementary code points, therefore
4754          * it must not combine two single-encoded surrogates into one code point.
4755          */
4756         0x83, 0x36, 0xc8, 0x30, 0x83, 0x37, 0xb0, 0x34 /* separately encoded surrogates */
4757 #endif
4758     };
4759 
4760     /* expected test results */
4761     static const int32_t results[]={
4762         /* number of bytes read, code point */
4763         1, 0x24,
4764         1, 0x7f,
4765         4, 0x80,
4766         2, 0x1f9,
4767         2, 0x20ac,
4768         2, 0x4e00,
4769         4, 0x9fa6,
4770         4, 0xffff,
4771         4, 0x10000,
4772         4, 0x10ffff
4773 #if 0
4774         /* Feature removed. See comment above. */
4775         8, 0x10000
4776 #endif
4777     };
4778 
4779 /*    const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); */
4780     UErrorCode errorCode=U_ZERO_ERROR;
4781     UConverter *cnv=ucnv_open("gb18030", &errorCode);
4782     if(U_FAILURE(errorCode)) {
4783         log_data_err("Unable to open a gb18030 converter: %s\n", u_errorName(errorCode));
4784         return;
4785     }
4786     TestNextUChar(cnv, (const char *)in, (const char *)in+sizeof(in), results, "gb18030");
4787     ucnv_close(cnv);
4788 }
4789 
4790 static void
TestLMBCS()4791 TestLMBCS() {
4792     /* LMBCS-1 string */
4793     static const uint8_t pszLMBCS[]={
4794         0x61,
4795         0x01, 0x29,
4796         0x81,
4797         0xA0,
4798         0x0F, 0x27,
4799         0x0F, 0x91,
4800         0x14, 0x0a, 0x74,
4801         0x14, 0xF6, 0x02,
4802         0x14, 0xd8, 0x4d, 0x14, 0xdc, 0x56, /* UTF-16 surrogate pair */
4803         0x10, 0x88, 0xA0,
4804     };
4805 
4806     /* Unicode UChar32 equivalents */
4807     static const UChar32 pszUnicode32[]={
4808         /* code point */
4809         0x00000061,
4810         0x00002013,
4811         0x000000FC,
4812         0x000000E1,
4813         0x00000007,
4814         0x00000091,
4815         0x00000a74,
4816         0x00000200,
4817         0x00023456, /* code point for surrogate pair */
4818         0x00005516
4819     };
4820 
4821 /* Unicode UChar equivalents */
4822     static const UChar pszUnicode[]={
4823         /* code point */
4824         0x0061,
4825         0x2013,
4826         0x00FC,
4827         0x00E1,
4828         0x0007,
4829         0x0091,
4830         0x0a74,
4831         0x0200,
4832         0xD84D, /* low surrogate */
4833         0xDC56, /* high surrogate */
4834         0x5516
4835     };
4836 
4837 /* expected test results */
4838     static const int offsets32[]={
4839         /* number of bytes read, code point */
4840         0,
4841         1,
4842         3,
4843         4,
4844         5,
4845         7,
4846         9,
4847         12,
4848         15,
4849         21,
4850         24
4851     };
4852 
4853 /* expected test results */
4854     static const int offsets[]={
4855         /* number of bytes read, code point */
4856         0,
4857         1,
4858         3,
4859         4,
4860         5,
4861         7,
4862         9,
4863         12,
4864         15,
4865         18,
4866         21,
4867         24
4868     };
4869 
4870 
4871     UConverter *cnv;
4872 
4873 #define NAME_LMBCS_1 "LMBCS-1"
4874 #define NAME_LMBCS_2 "LMBCS-2"
4875 
4876 
4877    /* Some basic open/close/property tests on some LMBCS converters */
4878     {
4879 
4880       char expected_subchars[] = {0x3F};   /* ANSI Question Mark */
4881       char new_subchars [] = {0x7F};       /* subst char used by SmartSuite..*/
4882       char get_subchars [1];
4883       const char * get_name;
4884       UConverter *cnv1;
4885       UConverter *cnv2;
4886 
4887       int8_t len = sizeof(get_subchars);
4888 
4889       UErrorCode errorCode=U_ZERO_ERROR;
4890 
4891       /* Open */
4892       cnv1=ucnv_open(NAME_LMBCS_1, &errorCode);
4893       if(U_FAILURE(errorCode)) {
4894          log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
4895          return;
4896       }
4897       cnv2=ucnv_open(NAME_LMBCS_2, &errorCode);
4898       if(U_FAILURE(errorCode)) {
4899          log_data_err("Unable to open a LMBCS-2 converter: %s\n", u_errorName(errorCode));
4900          return;
4901       }
4902 
4903       /* Name */
4904       get_name = ucnv_getName (cnv1, &errorCode);
4905       if (strcmp(NAME_LMBCS_1,get_name)){
4906          log_err("Unexpected converter name: %s\n", get_name);
4907       }
4908       get_name = ucnv_getName (cnv2, &errorCode);
4909       if (strcmp(NAME_LMBCS_2,get_name)){
4910          log_err("Unexpected converter name: %s\n", get_name);
4911       }
4912 
4913       /* substitution chars */
4914       ucnv_getSubstChars (cnv1, get_subchars, &len, &errorCode);
4915       if(U_FAILURE(errorCode)) {
4916          log_err("Failure on get subst chars: %s\n", u_errorName(errorCode));
4917       }
4918       if (len!=1){
4919          log_err("Unexpected length of sub chars\n");
4920       }
4921       if (get_subchars[0] != expected_subchars[0]){
4922            log_err("Unexpected value of sub chars\n");
4923       }
4924       ucnv_setSubstChars (cnv2,new_subchars, len, &errorCode);
4925       if(U_FAILURE(errorCode)) {
4926          log_err("Failure on set subst chars: %s\n", u_errorName(errorCode));
4927       }
4928       ucnv_getSubstChars (cnv2, get_subchars, &len, &errorCode);
4929       if(U_FAILURE(errorCode)) {
4930          log_err("Failure on get subst chars: %s\n", u_errorName(errorCode));
4931       }
4932       if (len!=1){
4933          log_err("Unexpected length of sub chars\n");
4934       }
4935       if (get_subchars[0] != new_subchars[0]){
4936            log_err("Unexpected value of sub chars\n");
4937       }
4938       ucnv_close(cnv1);
4939       ucnv_close(cnv2);
4940 
4941     }
4942 
4943     /* LMBCS to Unicode - offsets */
4944     {
4945        UErrorCode errorCode=U_ZERO_ERROR;
4946 
4947        const char * pSource = (const char *)pszLMBCS;
4948        const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS);
4949 
4950        UChar Out [sizeof(pszUnicode) + 1];
4951        UChar * pOut = Out;
4952        UChar * OutLimit = Out + UPRV_LENGTHOF(pszUnicode);
4953 
4954        int32_t off [sizeof(offsets)];
4955 
4956       /* last 'offset' in expected results is just the final size.
4957          (Makes other tests easier). Compensate here: */
4958 
4959        off[UPRV_LENGTHOF(offsets)-1] = sizeof(pszLMBCS);
4960 
4961 
4962 
4963       cnv=ucnv_open("lmbcs", &errorCode); /* use generic name for LMBCS-1 */
4964       if(U_FAILURE(errorCode)) {
4965            log_data_err("Unable to open a LMBCS converter: %s\n", u_errorName(errorCode));
4966            return;
4967       }
4968 
4969 
4970 
4971       ucnv_toUnicode (cnv,
4972                       &pOut,
4973                       OutLimit,
4974                       &pSource,
4975                       sourceLimit,
4976                       off,
4977                       TRUE,
4978                       &errorCode);
4979 
4980 
4981        if (memcmp(off,offsets,sizeof(offsets)))
4982        {
4983          log_err("LMBCS->Uni: Calculated offsets do not match expected results\n");
4984        }
4985        if (memcmp(Out,pszUnicode,sizeof(pszUnicode)))
4986        {
4987          log_err("LMBCS->Uni: Calculated codepoints do not match expected results\n");
4988        }
4989        ucnv_close(cnv);
4990     }
4991     {
4992    /* LMBCS to Unicode - getNextUChar */
4993       const char * sourceStart;
4994       const char *source=(const char *)pszLMBCS;
4995       const char *limit=(const char *)pszLMBCS+sizeof(pszLMBCS);
4996       const UChar32 *results= pszUnicode32;
4997       const int *off = offsets32;
4998 
4999       UErrorCode errorCode=U_ZERO_ERROR;
5000       UChar32 uniChar;
5001 
5002       cnv=ucnv_open("LMBCS-1", &errorCode);
5003       if(U_FAILURE(errorCode)) {
5004            log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
5005            return;
5006       }
5007       else
5008       {
5009 
5010          while(source<limit) {
5011             sourceStart=source;
5012             uniChar=ucnv_getNextUChar(cnv, &source, source + (off[1] - off[0]), &errorCode);
5013             if(U_FAILURE(errorCode)) {
5014                   log_err("LMBCS-1 ucnv_getNextUChar() failed: %s\n", u_errorName(errorCode));
5015                   break;
5016             } else if(source-sourceStart != off[1] - off[0] || uniChar != *results) {
5017                log_err("LMBCS-1 ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
5018                    uniChar, (source-sourceStart), *results, *off);
5019                break;
5020             }
5021             results++;
5022             off++;
5023          }
5024        }
5025        ucnv_close(cnv);
5026     }
5027     { /* test locale & optimization group operations: Unicode to LMBCS */
5028 
5029       UErrorCode errorCode=U_ZERO_ERROR;
5030       UConverter *cnv16he = ucnv_open("LMBCS-16,locale=he", &errorCode);
5031       UConverter *cnv16jp = ucnv_open("LMBCS-16,locale=ja_JP", &errorCode);
5032       UConverter *cnv01us = ucnv_open("LMBCS-1,locale=us_EN", &errorCode);
5033       UChar uniString [] = {0x0192}; /* Latin Small letter f with hook */
5034       const UChar * pUniOut = uniString;
5035       UChar * pUniIn = uniString;
5036       uint8_t lmbcsString [4];
5037       const char * pLMBCSOut = (const char *)lmbcsString;
5038       char * pLMBCSIn = (char *)lmbcsString;
5039 
5040       /* 0192 (hook) converts to both group 3 & group 1. input locale should differentiate */
5041       ucnv_fromUnicode (cnv16he,
5042                         &pLMBCSIn, (pLMBCSIn + UPRV_LENGTHOF(lmbcsString)),
5043                         &pUniOut, pUniOut + UPRV_LENGTHOF(uniString),
5044                         NULL, 1, &errorCode);
5045 
5046       if (lmbcsString[0] != 0x3 || lmbcsString[1] != 0x83)
5047       {
5048          log_err("LMBCS-16,locale=he gives unexpected translation\n");
5049       }
5050 
5051       pLMBCSIn= (char *)lmbcsString;
5052       pUniOut = uniString;
5053       ucnv_fromUnicode (cnv01us,
5054                         &pLMBCSIn, (const char *)(lmbcsString + UPRV_LENGTHOF(lmbcsString)),
5055                         &pUniOut, pUniOut + UPRV_LENGTHOF(uniString),
5056                         NULL, 1, &errorCode);
5057 
5058       if (lmbcsString[0] != 0x9F)
5059       {
5060          log_err("LMBCS-1,locale=US gives unexpected translation\n");
5061       }
5062 
5063       /* single byte char from mbcs char set */
5064       lmbcsString[0] = 0xAE;  /* 1/2 width katakana letter small Yo */
5065       pLMBCSOut = (const char *)lmbcsString;
5066       pUniIn = uniString;
5067       ucnv_toUnicode (cnv16jp,
5068                         &pUniIn, pUniIn + 1,
5069                         &pLMBCSOut, (pLMBCSOut + 1),
5070                         NULL, 1, &errorCode);
5071       if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+1 || pUniIn != uniString+1 || uniString[0] != 0xFF6E)
5072       {
5073            log_err("Unexpected results from LMBCS-16 single byte char\n");
5074       }
5075       /* convert to group 1: should be 3 bytes */
5076       pLMBCSIn = (char *)lmbcsString;
5077       pUniOut = uniString;
5078       ucnv_fromUnicode (cnv01us,
5079                         &pLMBCSIn, (const char *)(pLMBCSIn + 3),
5080                         &pUniOut, pUniOut + 1,
5081                         NULL, 1, &errorCode);
5082       if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+3 || pUniOut != uniString+1
5083          || lmbcsString[0] != 0x10 || lmbcsString[1] != 0x10 || lmbcsString[2] != 0xAE)
5084       {
5085            log_err("Unexpected results to LMBCS-1 single byte mbcs char\n");
5086       }
5087       pLMBCSOut = (const char *)lmbcsString;
5088       pUniIn = uniString;
5089       ucnv_toUnicode (cnv01us,
5090                         &pUniIn, pUniIn + 1,
5091                         &pLMBCSOut, (const char *)(pLMBCSOut + 3),
5092                         NULL, 1, &errorCode);
5093       if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+3 || pUniIn != uniString+1 || uniString[0] != 0xFF6E)
5094       {
5095            log_err("Unexpected results from LMBCS-1 single byte mbcs char\n");
5096       }
5097       pLMBCSIn = (char *)lmbcsString;
5098       pUniOut = uniString;
5099       ucnv_fromUnicode (cnv16jp,
5100                         &pLMBCSIn, (const char *)(pLMBCSIn + 1),
5101                         &pUniOut, pUniOut + 1,
5102                         NULL, 1, &errorCode);
5103       if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+1 || pUniOut != uniString+1 || lmbcsString[0] != 0xAE)
5104       {
5105            log_err("Unexpected results to LMBCS-16 single byte mbcs char\n");
5106       }
5107       ucnv_close(cnv16he);
5108       ucnv_close(cnv16jp);
5109       ucnv_close(cnv01us);
5110     }
5111     {
5112        /* Small source buffer testing, LMBCS -> Unicode */
5113 
5114        UErrorCode errorCode=U_ZERO_ERROR;
5115 
5116        const char * pSource = (const char *)pszLMBCS;
5117        const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS);
5118        int codepointCount = 0;
5119 
5120        UChar Out [sizeof(pszUnicode) + 1];
5121        UChar * pOut = Out;
5122        UChar * OutLimit = Out + UPRV_LENGTHOF(pszUnicode);
5123 
5124 
5125        cnv = ucnv_open(NAME_LMBCS_1, &errorCode);
5126        if(U_FAILURE(errorCode)) {
5127            log_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
5128            return;
5129        }
5130 
5131 
5132        while ((pSource < sourceLimit) && U_SUCCESS (errorCode))
5133        {
5134            ucnv_toUnicode (cnv,
5135                &pOut,
5136                OutLimit,
5137                &pSource,
5138                (pSource+1), /* claim that this is a 1- byte buffer */
5139                NULL,
5140                FALSE,    /* FALSE means there might be more chars in the next buffer */
5141                &errorCode);
5142 
5143            if (U_SUCCESS (errorCode))
5144            {
5145                if ((pSource - (const char *)pszLMBCS) == offsets [codepointCount+1])
5146                {
5147                    /* we are on to the next code point: check value */
5148 
5149                    if (Out[0] != pszUnicode[codepointCount]){
5150                        log_err("LMBCS->Uni result %lx should have been %lx \n",
5151                            Out[0], pszUnicode[codepointCount]);
5152                    }
5153 
5154                    pOut = Out; /* reset for accumulating next code point */
5155                    codepointCount++;
5156                }
5157            }
5158            else
5159            {
5160                log_err("Unexpected Error on toUnicode: %s\n", u_errorName(errorCode));
5161            }
5162        }
5163        {
5164          /* limits & surrogate error testing */
5165          char LIn [sizeof(pszLMBCS)];
5166          const char * pLIn = LIn;
5167 
5168          char LOut [sizeof(pszLMBCS)];
5169          char * pLOut = LOut;
5170 
5171          UChar UOut [sizeof(pszUnicode)];
5172          UChar * pUOut = UOut;
5173 
5174          UChar UIn [sizeof(pszUnicode)];
5175          const UChar * pUIn = UIn;
5176 
5177          int32_t off [sizeof(offsets)];
5178          UChar32 uniChar;
5179 
5180          errorCode=U_ZERO_ERROR;
5181 
5182          /* negative source request should always return U_ILLEGAL_ARGUMENT_ERROR */
5183          pUIn++;
5184          ucnv_fromUnicode(cnv, &pLOut, pLOut+1, &pUIn, pUIn-1, off, FALSE, &errorCode);
5185          if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
5186          {
5187             log_err("Unexpected Error on negative source request to ucnv_fromUnicode: %s\n", u_errorName(errorCode));
5188          }
5189          pUIn--;
5190 
5191          errorCode=U_ZERO_ERROR;
5192          ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)(pLIn-1),off,FALSE, &errorCode);
5193          if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
5194          {
5195             log_err("Unexpected Error on negative source request to ucnv_toUnicode: %s\n", u_errorName(errorCode));
5196          }
5197          errorCode=U_ZERO_ERROR;
5198 
5199          uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)(pLIn-1), &errorCode);
5200          if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
5201          {
5202             log_err("Unexpected Error on negative source request to ucnv_getNextUChar: %s\n", u_errorName(errorCode));
5203          }
5204          errorCode=U_ZERO_ERROR;
5205 
5206          /* 0 byte source request - no error, no pointer movement */
5207          ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)pLIn,off,FALSE, &errorCode);
5208          ucnv_fromUnicode(cnv, &pLOut,pLOut+1,&pUIn,pUIn,off,FALSE, &errorCode);
5209          if(U_FAILURE(errorCode)) {
5210             log_err("0 byte source request: unexpected error: %s\n", u_errorName(errorCode));
5211          }
5212          if ((pUOut != UOut) || (pUIn != UIn) || (pLOut != LOut) || (pLIn != LIn))
5213          {
5214               log_err("Unexpected pointer move in 0 byte source request \n");
5215          }
5216          /*0 byte source request - GetNextUChar : error & value == fffe or ffff */
5217          uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)pLIn, &errorCode);
5218          if (errorCode != U_INDEX_OUTOFBOUNDS_ERROR)
5219          {
5220             log_err("Unexpected Error on 0-byte source request to ucnv_getnextUChar: %s\n", u_errorName(errorCode));
5221          }
5222          if (((uint32_t)uniChar - 0xfffe) > 1) /* not 0xfffe<=uniChar<=0xffff */
5223          {
5224             log_err("Unexpected value on 0-byte source request to ucnv_getnextUChar \n");
5225          }
5226          errorCode = U_ZERO_ERROR;
5227 
5228          /* running out of target room : U_BUFFER_OVERFLOW_ERROR */
5229 
5230          pUIn = pszUnicode;
5231          ucnv_fromUnicode(cnv, &pLOut,pLOut+offsets[4],&pUIn,pUIn+UPRV_LENGTHOF(pszUnicode),off,FALSE, &errorCode);
5232          if (errorCode != U_BUFFER_OVERFLOW_ERROR || pLOut != LOut + offsets[4] || pUIn != pszUnicode+4 )
5233          {
5234             log_err("Unexpected results on out of target room to ucnv_fromUnicode\n");
5235          }
5236 
5237          errorCode = U_ZERO_ERROR;
5238 
5239          pLIn = (const char *)pszLMBCS;
5240          ucnv_toUnicode(cnv, &pUOut,pUOut+4,&pLIn,(pLIn+sizeof(pszLMBCS)),off,FALSE, &errorCode);
5241          if (errorCode != U_BUFFER_OVERFLOW_ERROR || pUOut != UOut + 4 || pLIn != (const char *)pszLMBCS+offsets[4])
5242          {
5243             log_err("Unexpected results on out of target room to ucnv_toUnicode\n");
5244          }
5245 
5246          /* unpaired or chopped LMBCS surrogates */
5247 
5248          /* OK high surrogate, Low surrogate is chopped */
5249          LIn [0] = (char)0x14;
5250          LIn [1] = (char)0xD8;
5251          LIn [2] = (char)0x01;
5252          LIn [3] = (char)0x14;
5253          LIn [4] = (char)0xDC;
5254          pLIn = LIn;
5255          errorCode = U_ZERO_ERROR;
5256          pUOut = UOut;
5257 
5258          ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
5259          ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
5260          if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5)
5261          {
5262             log_err("Unexpected results on chopped low surrogate\n");
5263          }
5264 
5265          /* chopped at surrogate boundary */
5266          LIn [0] = (char)0x14;
5267          LIn [1] = (char)0xD8;
5268          LIn [2] = (char)0x01;
5269          pLIn = LIn;
5270          errorCode = U_ZERO_ERROR;
5271          pUOut = UOut;
5272 
5273          ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+3),off,TRUE, &errorCode);
5274          if (UOut[0] != 0xD801 || U_FAILURE(errorCode) || pUOut != UOut + 1 || pLIn != LIn + 3)
5275          {
5276             log_err("Unexpected results on chopped at surrogate boundary \n");
5277          }
5278 
5279          /* unpaired surrogate plus valid Unichar */
5280          LIn [0] = (char)0x14;
5281          LIn [1] = (char)0xD8;
5282          LIn [2] = (char)0x01;
5283          LIn [3] = (char)0x14;
5284          LIn [4] = (char)0xC9;
5285          LIn [5] = (char)0xD0;
5286          pLIn = LIn;
5287          errorCode = U_ZERO_ERROR;
5288          pUOut = UOut;
5289 
5290          ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+6),off,TRUE, &errorCode);
5291          if (UOut[0] != 0xD801 || UOut[1] != 0xC9D0 || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 6)
5292          {
5293             log_err("Unexpected results after unpaired surrogate plus valid Unichar \n");
5294          }
5295 
5296       /* unpaired surrogate plus chopped Unichar */
5297          LIn [0] = (char)0x14;
5298          LIn [1] = (char)0xD8;
5299          LIn [2] = (char)0x01;
5300          LIn [3] = (char)0x14;
5301          LIn [4] = (char)0xC9;
5302 
5303          pLIn = LIn;
5304          errorCode = U_ZERO_ERROR;
5305          pUOut = UOut;
5306 
5307          ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
5308          if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5)
5309          {
5310             log_err("Unexpected results after unpaired surrogate plus chopped Unichar \n");
5311          }
5312 
5313          /* unpaired surrogate plus valid non-Unichar */
5314          LIn [0] = (char)0x14;
5315          LIn [1] = (char)0xD8;
5316          LIn [2] = (char)0x01;
5317          LIn [3] = (char)0x0F;
5318          LIn [4] = (char)0x3B;
5319 
5320          pLIn = LIn;
5321          errorCode = U_ZERO_ERROR;
5322          pUOut = UOut;
5323 
5324          ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
5325          if (UOut[0] != 0xD801 || UOut[1] != 0x1B || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 5)
5326          {
5327             log_err("Unexpected results after unpaired surrogate plus valid non-Unichar\n");
5328          }
5329 
5330          /* unpaired surrogate plus chopped non-Unichar */
5331          LIn [0] = (char)0x14;
5332          LIn [1] = (char)0xD8;
5333          LIn [2] = (char)0x01;
5334          LIn [3] = (char)0x0F;
5335 
5336          pLIn = LIn;
5337          errorCode = U_ZERO_ERROR;
5338          pUOut = UOut;
5339 
5340          ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+4),off,TRUE, &errorCode);
5341 
5342          if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 4)
5343          {
5344             log_err("Unexpected results after unpaired surrogate plus chopped non-Unichar\n");
5345          }
5346        }
5347     }
5348    ucnv_close(cnv);  /* final cleanup */
5349 }
5350 
5351 
TestJitterbug255()5352 static void TestJitterbug255()
5353 {
5354     static const uint8_t testBytes[] = { 0x95, 0xcf, 0x8a, 0xb7, 0x0d, 0x0a, 0x00 };
5355     const char *testBuffer = (const char *)testBytes;
5356     const char *testEnd = (const char *)testBytes + sizeof(testBytes);
5357     UErrorCode status = U_ZERO_ERROR;
5358     /*UChar32 result;*/
5359     UConverter *cnv = 0;
5360 
5361     cnv = ucnv_open("shift-jis", &status);
5362     if (U_FAILURE(status) || cnv == 0) {
5363         log_data_err("Failed to open the converter for SJIS.\n");
5364                 return;
5365     }
5366     while (testBuffer != testEnd)
5367     {
5368         /*result = */ucnv_getNextUChar (cnv, &testBuffer, testEnd , &status);
5369         if (U_FAILURE(status))
5370         {
5371             log_err("Failed to convert the next UChar for SJIS.\n");
5372             break;
5373         }
5374     }
5375     ucnv_close(cnv);
5376 }
5377 
TestEBCDICUS4XML()5378 static void TestEBCDICUS4XML()
5379 {
5380     UChar unicodes_x[] = {0x0000, 0x0000, 0x0000, 0x0000};
5381     static const UChar toUnicodeMaps_x[] = {0x000A, 0x000A, 0x000D, 0x0000};
5382     static const char fromUnicodeMaps_x[] = {0x25, 0x25, 0x0D, 0x00};
5383     static const char newLines_x[] = {0x25, 0x15, 0x0D, 0x00};
5384     char target_x[] = {0x00, 0x00, 0x00, 0x00};
5385     UChar *unicodes = unicodes_x;
5386     const UChar *toUnicodeMaps = toUnicodeMaps_x;
5387     char *target = target_x;
5388     const char* fromUnicodeMaps = fromUnicodeMaps_x, *newLines = newLines_x;
5389     UErrorCode status = U_ZERO_ERROR;
5390     UConverter *cnv = 0;
5391 
5392     cnv = ucnv_open("ebcdic-xml-us", &status);
5393     if (U_FAILURE(status) || cnv == 0) {
5394         log_data_err("Failed to open the converter for EBCDIC-XML-US.\n");
5395         return;
5396     }
5397     ucnv_toUnicode(cnv, &unicodes, unicodes+3, (const char**)&newLines, newLines+3, NULL, TRUE, &status);
5398     if (U_FAILURE(status) || memcmp(unicodes_x, toUnicodeMaps, sizeof(UChar)*3) != 0) {
5399         log_err("To Unicode conversion failed in EBCDICUS4XML test. %s\n",
5400             u_errorName(status));
5401         printUSeqErr(unicodes_x, 3);
5402         printUSeqErr(toUnicodeMaps, 3);
5403     }
5404     status = U_ZERO_ERROR;
5405     ucnv_fromUnicode(cnv, &target, target+3, (const UChar**)&toUnicodeMaps, toUnicodeMaps+3, NULL, TRUE, &status);
5406     if (U_FAILURE(status) || memcmp(target_x, fromUnicodeMaps, sizeof(char)*3) != 0) {
5407         log_err("From Unicode conversion failed in EBCDICUS4XML test. %s\n",
5408             u_errorName(status));
5409         printSeqErr((const unsigned char*)target_x, 3);
5410         printSeqErr((const unsigned char*)fromUnicodeMaps, 3);
5411     }
5412     ucnv_close(cnv);
5413 }
5414 #endif /* #if !UCONFIG_NO_LEGACY_COLLATION */
5415 
5416 #if !UCONFIG_NO_COLLATION
5417 
TestJitterbug981()5418 static void TestJitterbug981(){
5419     const UChar* rules;
5420     int32_t rules_length, target_cap, bytes_needed, buff_size;
5421     UErrorCode status = U_ZERO_ERROR;
5422     UConverter *utf8cnv;
5423     UCollator* myCollator;
5424     char *buff;
5425     int numNeeded=0;
5426     utf8cnv = ucnv_open ("utf8", &status);
5427     if(U_FAILURE(status)){
5428         log_err("Could not open UTF-8 converter. Error: %s\n", u_errorName(status));
5429         return;
5430     }
5431     myCollator = ucol_open("zh", &status);
5432     if(U_FAILURE(status)){
5433         log_data_err("Could not open collator for zh locale. Error: %s\n", u_errorName(status));
5434         ucnv_close(utf8cnv);
5435         return;
5436     }
5437 
5438     rules = ucol_getRules(myCollator, &rules_length);
5439     if(rules_length == 0) {
5440         log_data_err("missing zh tailoring rule string\n");
5441         ucol_close(myCollator);
5442         ucnv_close(utf8cnv);
5443         return;
5444     }
5445     buff_size = rules_length * ucnv_getMaxCharSize(utf8cnv);
5446     buff = malloc(buff_size);
5447 
5448     target_cap = 0;
5449     do {
5450         ucnv_reset(utf8cnv);
5451         status = U_ZERO_ERROR;
5452         if(target_cap >= buff_size) {
5453             log_err("wanted %d bytes, only %d available\n", target_cap, buff_size);
5454             break;
5455         }
5456         bytes_needed = ucnv_fromUChars(utf8cnv, buff, target_cap,
5457             rules, rules_length, &status);
5458         target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1;
5459         if(numNeeded!=0 && numNeeded!= bytes_needed){
5460             log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
5461             break;
5462         }
5463         numNeeded = bytes_needed;
5464     } while (status == U_BUFFER_OVERFLOW_ERROR);
5465     ucol_close(myCollator);
5466     ucnv_close(utf8cnv);
5467     free(buff);
5468 }
5469 
5470 #endif
5471 
5472 #if !UCONFIG_NO_LEGACY_CONVERSION
TestJitterbug1293()5473 static void TestJitterbug1293(){
5474     static const UChar src[] = {0x30DE, 0x30A4, 0x5E83, 0x544A, 0x30BF, 0x30A4, 0x30D7,0x000};
5475     char target[256];
5476     UErrorCode status = U_ZERO_ERROR;
5477     UConverter* conv=NULL;
5478     int32_t target_cap, bytes_needed, numNeeded = 0;
5479     conv = ucnv_open("shift-jis",&status);
5480     if(U_FAILURE(status)){
5481       log_data_err("Could not open Shift-Jis converter. Error: %s", u_errorName(status));
5482       return;
5483     }
5484 
5485     do{
5486         target_cap =0;
5487         bytes_needed = ucnv_fromUChars(conv,target,256,src,u_strlen(src),&status);
5488         target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1;
5489         if(numNeeded!=0 && numNeeded!= bytes_needed){
5490           log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
5491         }
5492         numNeeded = bytes_needed;
5493     } while (status == U_BUFFER_OVERFLOW_ERROR);
5494     if(U_FAILURE(status)){
5495       log_err("An error occured in ucnv_fromUChars. Error: %s", u_errorName(status));
5496       return;
5497     }
5498     ucnv_close(conv);
5499 }
5500 #endif
5501 
TestJB5275_1()5502 static void TestJB5275_1(){
5503 
5504     static const char* data = "\x3B\xB3\x0A" /* Easy characters */
5505                                 "\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test */
5506                                 /* Switch script: */
5507                                 "\xEF\x43\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Bengali test */
5508                                 "\x3B\xB3\x0A" /* Easy characters - new line, so should default!*/
5509                                 "\xEF\x40\x3B\xB3\x0A";
5510     static const UChar expected[] ={
5511             0x003b, 0x0a15, 0x000a, /* Easy characters */
5512             0x0a5c, 0x0a4d, 0x0a39, 0x0a5c, 0x0a4d, 0x0a39, 0x000a, /* Gurmukhi test */
5513             0x09dd, 0x09dc, 0x09cd, 0x09b9, 0x000a, /* Switch script: to Bengali*/
5514             0x003b, 0x0a15, 0x000a, /* Easy characters - new line, so should default!*/
5515             0x003b, 0x0a15, 0x000a /* Back to Gurmukhi*/
5516     };
5517 
5518     UErrorCode status = U_ZERO_ERROR;
5519     UConverter* conv = ucnv_open("iscii-gur", &status);
5520     UChar dest[100] = {'\0'};
5521     UChar* target = dest;
5522     UChar* targetLimit = dest+100;
5523     const char* source = data;
5524     const char* sourceLimit = data+strlen(data);
5525     const UChar* exp = expected;
5526 
5527     if (U_FAILURE(status)) {
5528         log_data_err("Unable to open converter: iscii-gur got errorCode: %s\n", u_errorName(status));
5529         return;
5530     }
5531 
5532     log_verbose("Testing switching back to default script when new line is encountered.\n");
5533     ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &status);
5534     if(U_FAILURE(status)){
5535         log_err("conversion failed: %s \n", u_errorName(status));
5536     }
5537     targetLimit = target;
5538     target = dest;
5539     printUSeq(target, (int)(targetLimit-target));
5540     while(target<targetLimit){
5541         if(*exp!=*target){
5542             log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp, *target);
5543         }
5544         target++;
5545         exp++;
5546     }
5547     ucnv_close(conv);
5548 }
5549 
TestJB5275()5550 static void TestJB5275(){
5551     static const char* data =
5552     /* "\xEF\x42\xEF\x41\xA4\xD5\xE5\xB3\xEA\x0A"  unsupported sequence \xEF\x41 */
5553     /* "\xEF\x42\xEF\x41\xD4\xDA\xB3\xE8\xEA\x0A"  unsupported sequence \xEF\x41  */
5554     /* "\xEF\x44\xEF\x41\xC8\xE1\x8B\xDB\xB3\xE8 \xB3\xE4\xC1\xE8\x0A"  unsupported sequence \xEF\x41 */
5555         "\xEF\x4B\xC0\xE9\xBF\xE9\xE8\xD8\x0A"  /* Gurmukhi test */
5556         "\xEF\x4A\xC0\xD4\xBF\xD4\xE8\xD8\x0A"  /* Gujarati test */
5557         "\xEF\x48\x38\xB3\x0A"  /* Kannada test */
5558         "\xEF\x49\x39\xB3\x0A"  /* Malayalam test */
5559         "\xEF\x4A\x3A\xB3\x0A"  /* Gujarati test */
5560         "\xEF\x4B\x3B\xB3\x0A"  /* Punjabi test */
5561         /* "\xEF\x4C\x3C\xB3\x0A"  unsupported sequence \xEF\x41 */;
5562     static const UChar expected[] ={
5563         0x0A5C, 0x0A4D, 0x0A39, 0x0A5C, 0x0A4D, 0x0A39, 0x000A, /* Gurmukhi test */
5564         0x0AA2, 0x0AB5, 0x0AA1, 0x0AB5, 0x0ACD, 0x0AB9, 0x000A,     /* Gujarati test */
5565         0x0038, 0x0C95, 0x000A, /* Kannada test */
5566         0x0039, 0x0D15, 0x000A, /* Malayalam test */
5567         0x003A, 0x0A95, 0x000A, /* Gujarati test */
5568         0x003B, 0x0A15, 0x000A, /* Punjabi test */
5569     };
5570 
5571     UErrorCode status = U_ZERO_ERROR;
5572     UConverter* conv = ucnv_open("iscii", &status);
5573     UChar dest[100] = {'\0'};
5574     UChar* target = dest;
5575     UChar* targetLimit = dest+100;
5576     const char* source = data;
5577     const char* sourceLimit = data+strlen(data);
5578     const UChar* exp = expected;
5579     ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &status);
5580     if(U_FAILURE(status)){
5581         log_data_err("conversion failed: %s \n", u_errorName(status));
5582     }
5583     targetLimit = target;
5584     target = dest;
5585 
5586     printUSeq(target, (int)(targetLimit-target));
5587 
5588     while(target<targetLimit){
5589         if(*exp!=*target){
5590             log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp, *target);
5591         }
5592         target++;
5593         exp++;
5594     }
5595     ucnv_close(conv);
5596 }
5597 
5598 static void
TestIsFixedWidth()5599 TestIsFixedWidth() {
5600     UErrorCode status = U_ZERO_ERROR;
5601     UConverter *cnv = NULL;
5602     int32_t i;
5603 
5604     const char *fixedWidth[] = {
5605             "US-ASCII",
5606             "UTF32",
5607             "ibm-5478_P100-1995"
5608     };
5609 
5610     const char *notFixedWidth[] = {
5611             "GB18030",
5612             "UTF8",
5613             "windows-949-2000",
5614             "UTF16"
5615     };
5616 
5617     for (i = 0; i < UPRV_LENGTHOF(fixedWidth); i++) {
5618         cnv = ucnv_open(fixedWidth[i], &status);
5619         if (cnv == NULL || U_FAILURE(status)) {
5620             log_data_err("Error open converter: %s - %s \n", fixedWidth[i], u_errorName(status));
5621             continue;
5622         }
5623 
5624         if (!ucnv_isFixedWidth(cnv, &status)) {
5625             log_err("%s is a fixedWidth converter but returned FALSE.\n", fixedWidth[i]);
5626         }
5627         ucnv_close(cnv);
5628     }
5629 
5630     for (i = 0; i < UPRV_LENGTHOF(notFixedWidth); i++) {
5631         cnv = ucnv_open(notFixedWidth[i], &status);
5632         if (cnv == NULL || U_FAILURE(status)) {
5633             log_data_err("Error open converter: %s - %s \n", notFixedWidth[i], u_errorName(status));
5634             continue;
5635         }
5636 
5637         if (ucnv_isFixedWidth(cnv, &status)) {
5638             log_err("%s is NOT a fixedWidth converter but returned TRUE.\n", notFixedWidth[i]);
5639         }
5640         ucnv_close(cnv);
5641     }
5642 }
5643