• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4  * COPYRIGHT:
5  * Copyright (c) 1997-2016, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  ********************************************************************/
8 /*******************************************************************************
9 *
10 * File nucnvtst.c
11 *
12 * Modification History:
13 *        Name                     Description
14 *    Steven R. Loomis     7/8/1999      Adding input buffer test
15 ********************************************************************************
16 */
17 #include <stdio.h>
18 #include "cstring.h"
19 #include "unicode/uloc.h"
20 #include "unicode/ucnv.h"
21 #include "unicode/ucnv_err.h"
22 #include "unicode/ucnv_cb.h"
23 #include "cintltst.h"
24 #include "unicode/utypes.h"
25 #include "unicode/ustring.h"
26 #include "unicode/ucol.h"
27 #include "unicode/utf16.h"
28 #include "cmemory.h"
29 #include "nucnvtst.h"
30 
31 static void TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message);
32 static void TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message);
33 #if !UCONFIG_NO_COLLATION
34 static void TestJitterbug981(void);
35 #endif
36 #if !UCONFIG_NO_LEGACY_CONVERSION
37 static void TestJitterbug1293(void);
38 #endif
39 static void TestNewConvertWithBufferSizes(int32_t osize, int32_t isize) ;
40 static void TestConverterTypesAndStarters(void);
41 static void TestAmbiguous(void);
42 static void TestSignatureDetection(void);
43 static void TestUTF7(void);
44 static void TestIMAP(void);
45 static void TestUTF8(void);
46 static void TestCESU8(void);
47 static void TestUTF16(void);
48 static void TestUTF16BE(void);
49 static void TestUTF16LE(void);
50 static void TestUTF32(void);
51 static void TestUTF32BE(void);
52 static void TestUTF32LE(void);
53 static void TestLATIN1(void);
54 
55 #if !UCONFIG_NO_LEGACY_CONVERSION
56 static void TestSBCS(void);
57 static void TestDBCS(void);
58 static void TestMBCS(void);
59 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
60 static void TestICCRunout(void);
61 #endif
62 
63 #ifdef U_ENABLE_GENERIC_ISO_2022
64 static void TestISO_2022(void);
65 #endif
66 
67 static void TestISO_2022_JP(void);
68 static void TestISO_2022_JP_1(void);
69 static void TestISO_2022_JP_2(void);
70 static void TestISO_2022_KR(void);
71 static void TestISO_2022_KR_1(void);
72 static void TestISO_2022_CN(void);
73 #if 0
74    /*
75     * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
76     */
77 static void TestISO_2022_CN_EXT(void);
78 #endif
79 static void TestJIS(void);
80 static void TestHZ(void);
81 #endif
82 
83 static void TestSCSU(void);
84 
85 #if !UCONFIG_NO_LEGACY_CONVERSION
86 static void TestEBCDIC_STATEFUL(void);
87 static void TestGB18030(void);
88 static void TestLMBCS(void);
89 static void TestJitterbug255(void);
90 static void TestEBCDICUS4XML(void);
91 #if 0
92    /*
93     * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
94     */
95 static void TestJitterbug915(void);
96 #endif
97 static void TestISCII(void);
98 
99 static void TestCoverageMBCS(void);
100 static void TestJitterbug2346(void);
101 static void TestJitterbug2411(void);
102 static void TestJB5275(void);
103 static void TestJB5275_1(void);
104 static void TestJitterbug6175(void);
105 
106 static void TestIsFixedWidth(void);
107 #endif
108 
109 static void TestInBufSizes(void);
110 
111 static void TestRoundTrippingAllUTF(void);
112 static void TestConv(const uint16_t in[],
113                      int len,
114                      const char* conv,
115                      const char* lang,
116                      char byteArr[],
117                      int byteArrLen);
118 
119 /* open a converter, using test data if it begins with '@' */
120 static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err);
121 
122 
123 #define NEW_MAX_BUFFER 999
124 
125 static int32_t  gInBufferSize = NEW_MAX_BUFFER;
126 static int32_t  gOutBufferSize = NEW_MAX_BUFFER;
127 static char     gNuConvTestName[1024];
128 
129 #define nct_min(x,y)  ((x<y) ? x : y)
130 
my_ucnv_open(const char * cnv,UErrorCode * err)131 static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err)
132 {
133   if(cnv && cnv[0] == '@') {
134     return ucnv_openPackage(loadTestData(err), cnv+1, err);
135   } else {
136     return ucnv_open(cnv, err);
137   }
138 }
139 
printSeq(const unsigned char * a,int len)140 static void printSeq(const unsigned char* a, int len)
141 {
142     int i=0;
143     log_verbose("{");
144     while (i<len)
145         log_verbose("0x%02x ", a[i++]);
146     log_verbose("}\n");
147 }
148 
printUSeq(const UChar * a,int len)149 static void printUSeq(const UChar* a, int len)
150 {
151     int i=0;
152     log_verbose("{U+");
153     while (i<len) log_verbose("0x%04x ", a[i++]);
154     log_verbose("}\n");
155 }
156 
printSeqErr(const unsigned char * a,int len)157 static void printSeqErr(const unsigned char* a, int len)
158 {
159     int i=0;
160     fprintf(stderr, "{");
161     while (i<len)
162         fprintf(stderr, "0x%02x ", a[i++]);
163     fprintf(stderr, "}\n");
164 }
165 
printUSeqErr(const UChar * a,int len)166 static void printUSeqErr(const UChar* a, int len)
167 {
168     int i=0;
169     fprintf(stderr, "{U+");
170     while (i<len)
171         fprintf(stderr, "0x%04x ", a[i++]);
172     fprintf(stderr,"}\n");
173 }
174 
175 static void
TestNextUChar(UConverter * cnv,const char * source,const char * limit,const int32_t results[],const char * message)176 TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message)
177 {
178      const char* s0;
179      const char* s=(char*)source;
180      const int32_t *r=results;
181      UErrorCode errorCode=U_ZERO_ERROR;
182      UChar32 c;
183 
184      while(s<limit) {
185         s0=s;
186         c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
187         if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
188             break; /* no more significant input */
189         } else if(U_FAILURE(errorCode)) {
190             log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode));
191             break;
192         } else if(
193             /* test the expected number of input bytes only if >=0 */
194             (*r>=0 && (int32_t)(s-s0)!=*r) ||
195             c!=*(r+1)
196         ) {
197             log_err("%s ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
198                 message, c, (s-s0), *(r+1), *r);
199             break;
200         }
201         r+=2;
202     }
203 }
204 
205 static void
TestNextUCharError(UConverter * cnv,const char * source,const char * limit,UErrorCode expected,const char * message)206 TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message)
207 {
208      const char* s=(char*)source;
209      UErrorCode errorCode=U_ZERO_ERROR;
210      uint32_t c;
211      c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
212      if(errorCode != expected){
213         log_err("FAIL: Expected:%s when %s-----Got:%s\n", myErrorName(expected), message, myErrorName(errorCode));
214      }
215      if(c != 0xFFFD && c != 0xffff){
216         log_err("FAIL: Expected return value of 0xfffd or 0xffff when %s-----Got 0x%lx\n", message, c);
217      }
218 
219 }
220 
TestInBufSizes(void)221 static void TestInBufSizes(void)
222 {
223   TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,1);
224 #if 1
225   TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,2);
226   TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,3);
227   TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,4);
228   TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,5);
229   TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,6);
230   TestNewConvertWithBufferSizes(1,1);
231   TestNewConvertWithBufferSizes(2,3);
232   TestNewConvertWithBufferSizes(3,2);
233 #endif
234 }
235 
TestOutBufSizes(void)236 static void TestOutBufSizes(void)
237 {
238 #if 1
239   TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,NEW_MAX_BUFFER);
240   TestNewConvertWithBufferSizes(1,NEW_MAX_BUFFER);
241   TestNewConvertWithBufferSizes(2,NEW_MAX_BUFFER);
242   TestNewConvertWithBufferSizes(3,NEW_MAX_BUFFER);
243   TestNewConvertWithBufferSizes(4,NEW_MAX_BUFFER);
244   TestNewConvertWithBufferSizes(5,NEW_MAX_BUFFER);
245 
246 #endif
247 }
248 
249 
addTestNewConvert(TestNode ** root)250 void addTestNewConvert(TestNode** root)
251 {
252 #if !UCONFIG_NO_FILE_IO
253    addTest(root, &TestInBufSizes, "tsconv/nucnvtst/TestInBufSizes");
254    addTest(root, &TestOutBufSizes, "tsconv/nucnvtst/TestOutBufSizes");
255 #endif
256    addTest(root, &TestConverterTypesAndStarters, "tsconv/nucnvtst/TestConverterTypesAndStarters");
257    addTest(root, &TestAmbiguous, "tsconv/nucnvtst/TestAmbiguous");
258    addTest(root, &TestSignatureDetection, "tsconv/nucnvtst/TestSignatureDetection");
259    addTest(root, &TestUTF7, "tsconv/nucnvtst/TestUTF7");
260    addTest(root, &TestIMAP, "tsconv/nucnvtst/TestIMAP");
261    addTest(root, &TestUTF8, "tsconv/nucnvtst/TestUTF8");
262 
263    /* test ucnv_getNextUChar() for charsets that encode single surrogates with complete byte sequences */
264    addTest(root, &TestCESU8, "tsconv/nucnvtst/TestCESU8");
265    addTest(root, &TestUTF16, "tsconv/nucnvtst/TestUTF16");
266    addTest(root, &TestUTF16BE, "tsconv/nucnvtst/TestUTF16BE");
267    addTest(root, &TestUTF16LE, "tsconv/nucnvtst/TestUTF16LE");
268    addTest(root, &TestUTF32, "tsconv/nucnvtst/TestUTF32");
269    addTest(root, &TestUTF32BE, "tsconv/nucnvtst/TestUTF32BE");
270    addTest(root, &TestUTF32LE, "tsconv/nucnvtst/TestUTF32LE");
271 
272 #if !UCONFIG_NO_LEGACY_CONVERSION
273    addTest(root, &TestLMBCS, "tsconv/nucnvtst/TestLMBCS");
274 #endif
275 
276    addTest(root, &TestLATIN1, "tsconv/nucnvtst/TestLATIN1");
277 
278 #if !UCONFIG_NO_LEGACY_CONVERSION
279    addTest(root, &TestSBCS, "tsconv/nucnvtst/TestSBCS");
280 #if !UCONFIG_NO_FILE_IO
281    addTest(root, &TestDBCS, "tsconv/nucnvtst/TestDBCS");
282    addTest(root, &TestICCRunout, "tsconv/nucnvtst/TestICCRunout");
283 #endif
284    addTest(root, &TestMBCS, "tsconv/nucnvtst/TestMBCS");
285 
286 #ifdef U_ENABLE_GENERIC_ISO_2022
287    addTest(root, &TestISO_2022, "tsconv/nucnvtst/TestISO_2022");
288 #endif
289 
290    addTest(root, &TestISO_2022_JP, "tsconv/nucnvtst/TestISO_2022_JP");
291    addTest(root, &TestJIS, "tsconv/nucnvtst/TestJIS");
292    addTest(root, &TestISO_2022_JP_1, "tsconv/nucnvtst/TestISO_2022_JP_1");
293    // android-changed (no have ISO_2022_JP_2) -- addTest(root, &TestISO_2022_JP_2, "tsconv/nucnvtst/TestISO_2022_JP_2");
294    addTest(root, &TestISO_2022_KR, "tsconv/nucnvtst/TestISO_2022_KR");
295    addTest(root, &TestISO_2022_KR_1, "tsconv/nucnvtst/TestISO_2022_KR_1");
296    // android-changed (no ISO-2022-CN) -- addTest(root, &TestISO_2022_CN, "tsconv/nucnvtst/TestISO_2022_CN");
297    /*
298     * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
299    addTest(root, &TestISO_2022_CN_EXT, "tsconv/nucnvtst/TestISO_2022_CN_EXT");
300    addTest(root, &TestJitterbug915, "tsconv/nucnvtst/TestJitterbug915");
301     */
302    addTest(root, &TestHZ, "tsconv/nucnvtst/TestHZ");
303 #endif
304 
305    addTest(root, &TestSCSU, "tsconv/nucnvtst/TestSCSU");
306 
307 #if !UCONFIG_NO_LEGACY_CONVERSION
308    addTest(root, &TestEBCDIC_STATEFUL, "tsconv/nucnvtst/TestEBCDIC_STATEFUL");
309    addTest(root, &TestGB18030, "tsconv/nucnvtst/TestGB18030");
310    addTest(root, &TestJitterbug255, "tsconv/nucnvtst/TestJitterbug255");
311    addTest(root, &TestEBCDICUS4XML, "tsconv/nucnvtst/TestEBCDICUS4XML");
312    addTest(root, &TestISCII, "tsconv/nucnvtst/TestISCII");
313    addTest(root, &TestJB5275, "tsconv/nucnvtst/TestJB5275");
314    addTest(root, &TestJB5275_1, "tsconv/nucnvtst/TestJB5275_1");
315 #if !UCONFIG_NO_COLLATION
316    addTest(root, &TestJitterbug981, "tsconv/nucnvtst/TestJitterbug981");
317 #endif
318 
319    addTest(root, &TestJitterbug1293, "tsconv/nucnvtst/TestJitterbug1293");
320 #endif
321 
322 
323 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
324    addTest(root, &TestCoverageMBCS, "tsconv/nucnvtst/TestCoverageMBCS");
325 #endif
326 
327    addTest(root, &TestRoundTrippingAllUTF, "tsconv/nucnvtst/TestRoundTrippingAllUTF");
328 
329 #if !UCONFIG_NO_LEGACY_CONVERSION
330    addTest(root, &TestJitterbug2346, "tsconv/nucnvtst/TestJitterbug2346");
331    addTest(root, &TestJitterbug2411, "tsconv/nucnvtst/TestJitterbug2411");
332    // android-removed (no full ISO2022 CJK tables)  -- addTest(root, &TestJitterbug6175, "tsconv/nucnvtst/TestJitterbug6175");
333    addTest(root, &TestIsFixedWidth, "tsconv/nucnvtst/TestIsFixedWidth");
334 #endif
335 }
336 
337 
338 /* Note that this test already makes use of statics, so it's not really
339    multithread safe.
340    This convenience function lets us make the error messages actually useful.
341 */
342 
setNuConvTestName(const char * codepage,const char * direction)343 static void setNuConvTestName(const char *codepage, const char *direction)
344 {
345     sprintf(gNuConvTestName, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]",
346         codepage,
347         direction,
348         (int)gInBufferSize,
349         (int)gOutBufferSize);
350 }
351 
352 typedef enum
353 {
354   TC_OK       = 0,  /* test was OK */
355   TC_MISMATCH = 1,  /* Match failed - err was printed */
356   TC_FAIL     = 2   /* Test failed, don't print an err because it was already printed. */
357 } ETestConvertResult;
358 
359 /* Note: This function uses global variables and it will not do offset
360 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
testConvertFromU(const UChar * source,int sourceLen,const uint8_t * expect,int expectLen,const char * codepage,const int32_t * expectOffsets,UBool useFallback)361 static ETestConvertResult testConvertFromU( const UChar *source, int sourceLen,  const uint8_t *expect, int expectLen,
362                 const char *codepage, const int32_t *expectOffsets , UBool useFallback)
363 {
364     UErrorCode status = U_ZERO_ERROR;
365     UConverter *conv = 0;
366     char    junkout[NEW_MAX_BUFFER]; /* FIX */
367     int32_t    junokout[NEW_MAX_BUFFER]; /* FIX */
368     char *p;
369     const UChar *src;
370     char *end;
371     char *targ;
372     int32_t *offs;
373     int i;
374     int32_t   realBufferSize;
375     char *realBufferEnd;
376     const UChar *realSourceEnd;
377     const UChar *sourceLimit;
378     UBool checkOffsets = TRUE;
379     UBool doFlush;
380 
381     for(i=0;i<NEW_MAX_BUFFER;i++)
382         junkout[i] = (char)0xF0;
383     for(i=0;i<NEW_MAX_BUFFER;i++)
384         junokout[i] = 0xFF;
385 
386     setNuConvTestName(codepage, "FROM");
387 
388     log_verbose("\n=========  %s\n", gNuConvTestName);
389 
390     conv = my_ucnv_open(codepage, &status);
391 
392     if(U_FAILURE(status))
393     {
394         log_data_err("Couldn't open converter %s\n",codepage);
395         return TC_FAIL;
396     }
397     if(useFallback){
398         ucnv_setFallback(conv,useFallback);
399     }
400 
401     log_verbose("Converter opened..\n");
402 
403     src = source;
404     targ = junkout;
405     offs = junokout;
406 
407     realBufferSize = UPRV_LENGTHOF(junkout);
408     realBufferEnd = junkout + realBufferSize;
409     realSourceEnd = source + sourceLen;
410 
411     if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER )
412         checkOffsets = FALSE;
413 
414     do
415     {
416       end = nct_min(targ + gOutBufferSize, realBufferEnd);
417       sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
418 
419       doFlush = (UBool)(sourceLimit == realSourceEnd);
420 
421       if(targ == realBufferEnd) {
422         log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
423         return TC_FAIL;
424       }
425       log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx  TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE");
426 
427 
428       status = U_ZERO_ERROR;
429 
430       ucnv_fromUnicode (conv,
431                         &targ,
432                         end,
433                         &src,
434                         sourceLimit,
435                         checkOffsets ? offs : NULL,
436                         doFlush, /* flush if we're at the end of the input data */
437                         &status);
438     } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && sourceLimit < realSourceEnd) );
439 
440     if(U_FAILURE(status)) {
441       log_err("Problem doing fromUnicode to %s, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
442       return TC_FAIL;
443     }
444 
445     log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
446                 sourceLen, targ-junkout);
447 
448     if(getTestOption(VERBOSITY_OPTION))
449     {
450       char junk[9999];
451       char offset_str[9999];
452       char *ptr;
453 
454       junk[0] = 0;
455       offset_str[0] = 0;
456       for(ptr = junkout;ptr<targ;ptr++) {
457         sprintf(junk + strlen(junk), "0x%02x, ", (int)(0xFF & *ptr));
458         sprintf(offset_str + strlen(offset_str), "0x%02x, ", (int)(0xFF & junokout[ptr-junkout]));
459       }
460 
461       log_verbose(junk);
462       printSeq((const uint8_t *)expect, expectLen);
463       if ( checkOffsets ) {
464         log_verbose("\nOffsets:");
465         log_verbose(offset_str);
466       }
467       log_verbose("\n");
468     }
469     ucnv_close(conv);
470 
471     if(expectLen != targ-junkout) {
472       log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
473       log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
474       fprintf(stderr, "Got:\n");
475       printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
476       fprintf(stderr, "Expected:\n");
477       printSeqErr((const unsigned char*)expect, expectLen);
478       return TC_MISMATCH;
479     }
480 
481     if (checkOffsets && (expectOffsets != 0) ) {
482       log_verbose("comparing %d offsets..\n", targ-junkout);
483       if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
484         log_err("did not get the expected offsets. %s\n", gNuConvTestName);
485         printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
486         log_err("\n");
487         log_err("Got  :     ");
488         for(p=junkout;p<targ;p++) {
489           log_err("%d,", junokout[p-junkout]);
490         }
491         log_err("\n");
492         log_err("Expected:  ");
493         for(i=0; i<(targ-junkout); i++) {
494           log_err("%d,", expectOffsets[i]);
495         }
496         log_err("\n");
497       }
498     }
499 
500     log_verbose("comparing..\n");
501     if(!memcmp(junkout, expect, expectLen)) {
502       log_verbose("Matches!\n");
503       return TC_OK;
504     } else {
505       log_err("String does not match u->%s\n", gNuConvTestName);
506       printUSeqErr(source, sourceLen);
507       fprintf(stderr, "Got:\n");
508       printSeqErr((const unsigned char *)junkout, expectLen);
509       fprintf(stderr, "Expected:\n");
510       printSeqErr((const unsigned char *)expect, expectLen);
511 
512       return TC_MISMATCH;
513     }
514 }
515 
516 /* Note: This function uses global variables and it will not do offset
517 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
testConvertToU(const uint8_t * source,int sourcelen,const UChar * expect,int expectlen,const char * codepage,const int32_t * expectOffsets,UBool useFallback)518 static ETestConvertResult testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
519                                           const char *codepage, const int32_t *expectOffsets, UBool useFallback)
520 {
521     UErrorCode status = U_ZERO_ERROR;
522     UConverter *conv = 0;
523     UChar    junkout[NEW_MAX_BUFFER]; /* FIX */
524     int32_t    junokout[NEW_MAX_BUFFER]; /* FIX */
525     const char *src;
526     const char *realSourceEnd;
527     const char *srcLimit;
528     UChar *p;
529     UChar *targ;
530     UChar *end;
531     int32_t *offs;
532     int i;
533     UBool   checkOffsets = TRUE;
534 
535     int32_t   realBufferSize;
536     UChar *realBufferEnd;
537 
538 
539     for(i=0;i<NEW_MAX_BUFFER;i++)
540         junkout[i] = 0xFFFE;
541 
542     for(i=0;i<NEW_MAX_BUFFER;i++)
543         junokout[i] = -1;
544 
545     setNuConvTestName(codepage, "TO");
546 
547     log_verbose("\n=========  %s\n", gNuConvTestName);
548 
549     conv = my_ucnv_open(codepage, &status);
550 
551     if(U_FAILURE(status))
552     {
553         log_data_err("Couldn't open converter %s\n",gNuConvTestName);
554         return TC_FAIL;
555     }
556     if(useFallback){
557         ucnv_setFallback(conv,useFallback);
558     }
559     log_verbose("Converter opened..\n");
560 
561     src = (const char *)source;
562     targ = junkout;
563     offs = junokout;
564 
565     realBufferSize = UPRV_LENGTHOF(junkout);
566     realBufferEnd = junkout + realBufferSize;
567     realSourceEnd = src + sourcelen;
568 
569     if ( gOutBufferSize != realBufferSize ||  gInBufferSize != NEW_MAX_BUFFER )
570         checkOffsets = FALSE;
571 
572     do
573     {
574         end = nct_min( targ + gOutBufferSize, realBufferEnd);
575         srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
576 
577         if(targ == realBufferEnd)
578         {
579             log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjet=%08lx %s",targ,gNuConvTestName);
580             return TC_FAIL;
581         }
582         log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
583 
584         /* oldTarg = targ; */
585 
586         status = U_ZERO_ERROR;
587 
588         ucnv_toUnicode (conv,
589                 &targ,
590                 end,
591                 &src,
592                 srcLimit,
593                 checkOffsets ? offs : NULL,
594                 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of hte source data */
595                 &status);
596 
597         /*        offs += (targ-oldTarg); */
598 
599       } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */
600 
601     if(U_FAILURE(status))
602     {
603         log_err("Problem doing %s toUnicode, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
604         return TC_FAIL;
605     }
606 
607     log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
608         sourcelen, targ-junkout);
609     if(getTestOption(VERBOSITY_OPTION))
610     {
611         char junk[9999];
612         char offset_str[9999];
613         UChar *ptr;
614 
615         junk[0] = 0;
616         offset_str[0] = 0;
617 
618         for(ptr = junkout;ptr<targ;ptr++)
619         {
620             sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*ptr);
621             sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[ptr-junkout]);
622         }
623 
624         log_verbose(junk);
625         printUSeq(expect, expectlen);
626         if ( checkOffsets )
627           {
628             log_verbose("\nOffsets:");
629             log_verbose(offset_str);
630           }
631         log_verbose("\n");
632     }
633     ucnv_close(conv);
634 
635     log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
636 
637     if (checkOffsets && (expectOffsets != 0))
638     {
639         if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))){
640             log_err("did not get the expected offsets. %s\n",gNuConvTestName);
641             log_err("Got:      ");
642             for(p=junkout;p<targ;p++) {
643                 log_err("%d,", junokout[p-junkout]);
644             }
645             log_err("\n");
646             log_err("Expected: ");
647             for(i=0; i<(targ-junkout); i++) {
648                 log_err("%d,", expectOffsets[i]);
649             }
650             log_err("\n");
651             log_err("output:   ");
652             for(i=0; i<(targ-junkout); i++) {
653                 log_err("%X,", junkout[i]);
654             }
655             log_err("\n");
656             log_err("input:    ");
657             for(i=0; i<(src-(const char *)source); i++) {
658                 log_err("%X,", (unsigned char)source[i]);
659             }
660             log_err("\n");
661         }
662     }
663 
664     if(!memcmp(junkout, expect, expectlen*2))
665     {
666         log_verbose("Matches!\n");
667         return TC_OK;
668     }
669     else
670     {
671         log_err("String does not match. %s\n", gNuConvTestName);
672         log_verbose("String does not match. %s\n", gNuConvTestName);
673         printf("\nGot:");
674         printUSeqErr(junkout, expectlen);
675         printf("\nExpected:");
676         printUSeqErr(expect, expectlen);
677         return TC_MISMATCH;
678     }
679 }
680 
681 
TestNewConvertWithBufferSizes(int32_t outsize,int32_t insize)682 static void TestNewConvertWithBufferSizes(int32_t outsize, int32_t insize )
683 {
684 /** test chars #1 */
685     /*  1 2 3  1Han 2Han 3Han .  */
686     static const UChar   sampleText[] =
687      { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xD840, 0xDC21 };
688     static const UChar sampleTextRoundTripUnmappable[] =
689     { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xfffd };
690 
691 
692     static const uint8_t expectedUTF8[] =
693      { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E, 0xf0, 0xa0, 0x80, 0xa1 };
694     static const int32_t toUTF8Offs[] =
695      { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07, 0x08, 0x08, 0x08, 0x08 };
696     static const int32_t fmUTF8Offs[] =
697      { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d, 0x000e, 0x000e };
698 
699 #ifdef U_ENABLE_GENERIC_ISO_2022
700     /* Same as UTF8, but with ^[%B preceeding */
701     static const const uint8_t expectedISO2022[] =
702      { 0x1b, 0x25, 0x42, 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E };
703     static const int32_t toISO2022Offs[]     =
704      { -1, -1, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x04,
705        0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07 }; /* right? */
706     static const int32_t fmISO2022Offs[] =
707      { 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x000a, 0x000d, 0x0010 }; /* is this right? */
708 #endif
709 
710     /*  1 2 3 0, <SO> h1 h2 h3 <SI> . EBCDIC_STATEFUL */
711     static const uint8_t expectedIBM930[] =
712      { 0xF1, 0xF2, 0xF3, 0x00, 0x0E, 0x45, 0x41, 0x45, 0x42, 0x45, 0x43, 0x0F, 0x4B, 0x0e, 0xfe, 0xfe, 0x0f };
713     static const int32_t toIBM930Offs[] =
714      { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, -1 };
715     static const int32_t fmIBM930Offs[] =
716      { 0x0000, 0x0001, 0x0002, 0x0003, 0x0005, 0x0007, 0x0009, 0x000c, 0x000e };
717 
718     /* 1 2 3 0 h1 h2 h3 . MBCS*/
719     static const uint8_t expectedIBM943[] =
720      {  0x31, 0x32, 0x33, 0x00, 0x88, 0xea, 0x93, 0xf1, 0x8e, 0x4f, 0x2e, 0xfc, 0xfc };
721     static const int32_t toIBM943Offs    [] =
722      {  0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x08, 0x08 };
723     static const int32_t fmIBM943Offs[] =
724      { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0006, 0x0008, 0x000a, 0x000b };
725 
726     /* 1 2 3 0 h1 h2 h3 . DBCS*/
727     static const uint8_t expectedIBM9027[] =
728      {  0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0x4c, 0x41, 0x4c, 0x48, 0x4c, 0x55, 0xfe, 0xfe, 0xfe, 0xfe };
729     static const int32_t toIBM9027Offs    [] =
730      {  0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08 };
731 
732      /* 1 2 3 0 <?> <?> <?> . SBCS*/
733     static const uint8_t expectedIBM920[] =
734      {  0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2e, 0x1a };
735     static const int32_t toIBM920Offs    [] =
736      {  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
737 
738     /* 1 2 3 0 <?> <?> <?> . SBCS*/
739     static const uint8_t expectedISO88593[] =
740      { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a };
741     static const int32_t toISO88593Offs[]     =
742      { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
743 
744     /* 1 2 3 0 <?> <?> <?> . <?> LATIN_1*/
745     static const uint8_t expectedLATIN1[] =
746      { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a };
747     static const int32_t toLATIN1Offs[]     =
748      { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
749 
750 
751     /*  etc */
752     static const uint8_t expectedUTF16BE[] =
753      { 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x4e, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x00, 0x2e, 0xd8, 0x40, 0xdc, 0x21 };
754     static const int32_t toUTF16BEOffs[]=
755      { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 };
756     static const int32_t fmUTF16BEOffs[] =
757      { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c,  0x000e, 0x0010, 0x0010 };
758 
759     static const uint8_t expectedUTF16LE[] =
760      { 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x4e, 0x2e, 0x00, 0x40, 0xd8, 0x21, 0xdc };
761     static const int32_t toUTF16LEOffs[]=
762      { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06,  0x07, 0x07, 0x08, 0x08, 0x08, 0x08 };
763     static const int32_t fmUTF16LEOffs[] =
764      { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0x0010 };
765 
766     static const uint8_t expectedUTF32BE[] =
767      { 0x00, 0x00, 0x00, 0x31,
768        0x00, 0x00, 0x00, 0x32,
769        0x00, 0x00, 0x00, 0x33,
770        0x00, 0x00, 0x00, 0x00,
771        0x00, 0x00, 0x4e, 0x00,
772        0x00, 0x00, 0x4e, 0x8c,
773        0x00, 0x00, 0x4e, 0x09,
774        0x00, 0x00, 0x00, 0x2e,
775        0x00, 0x02, 0x00, 0x21 };
776     static const int32_t toUTF32BEOffs[]=
777      { 0x00, 0x00, 0x00, 0x00,
778        0x01, 0x01, 0x01, 0x01,
779        0x02, 0x02, 0x02, 0x02,
780        0x03, 0x03, 0x03, 0x03,
781        0x04, 0x04, 0x04, 0x04,
782        0x05, 0x05, 0x05, 0x05,
783        0x06, 0x06, 0x06, 0x06,
784        0x07, 0x07, 0x07, 0x07,
785        0x08, 0x08, 0x08, 0x08,
786        0x08, 0x08, 0x08, 0x08 };
787     static const int32_t fmUTF32BEOffs[] =
788      { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018,  0x001c, 0x0020, 0x0020 };
789 
790     static const uint8_t expectedUTF32LE[] =
791      { 0x31, 0x00, 0x00, 0x00,
792        0x32, 0x00, 0x00, 0x00,
793        0x33, 0x00, 0x00, 0x00,
794        0x00, 0x00, 0x00, 0x00,
795        0x00, 0x4e, 0x00, 0x00,
796        0x8c, 0x4e, 0x00, 0x00,
797        0x09, 0x4e, 0x00, 0x00,
798        0x2e, 0x00, 0x00, 0x00,
799        0x21, 0x00, 0x02, 0x00 };
800     static const int32_t toUTF32LEOffs[]=
801      { 0x00, 0x00, 0x00, 0x00,
802        0x01, 0x01, 0x01, 0x01,
803        0x02, 0x02, 0x02, 0x02,
804        0x03, 0x03, 0x03, 0x03,
805        0x04, 0x04, 0x04, 0x04,
806        0x05, 0x05, 0x05, 0x05,
807        0x06, 0x06, 0x06, 0x06,
808        0x07, 0x07, 0x07, 0x07,
809        0x08, 0x08, 0x08, 0x08,
810        0x08, 0x08, 0x08, 0x08 };
811     static const int32_t fmUTF32LEOffs[] =
812      { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0x0020 };
813 
814 
815 
816 
817 /** Test chars #2 **/
818 
819     /* Sahha [health],  slashed h's */
820     static const UChar malteseUChars[] = { 0x0053, 0x0061, 0x0127, 0x0127, 0x0061 };
821     static const uint8_t expectedMaltese913[] = { 0x53, 0x61, 0xB1, 0xB1, 0x61 };
822 
823     /* LMBCS */
824     static const UChar LMBCSUChars[]     = { 0x0027, 0x010A, 0x0000, 0x0127, 0x2666, 0x0220 };
825     static const uint8_t expectedLMBCS[] = { 0x27, 0x06, 0x04, 0x00, 0x01, 0x73, 0x01, 0x04, 0x14, 0x02, 0x20 };
826     static const int32_t toLMBCSOffs[]   = { 0x00, 0x01, 0x01, 0x02, 0x03, 0x03, 0x04, 0x04 , 0x05, 0x05, 0x05 };
827     static const int32_t fmLMBCSOffs[]   = { 0x0000, 0x0001, 0x0003, 0x0004, 0x0006, 0x0008};
828     /*********************************** START OF CODE finally *************/
829 
830     gInBufferSize = insize;
831     gOutBufferSize = outsize;
832 
833     log_verbose("\n\n\nTesting conversions with InputBufferSize = %d, OutputBufferSize = %d\n", gInBufferSize, gOutBufferSize);
834 
835 
836     /*UTF-8*/
837     testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
838         expectedUTF8, sizeof(expectedUTF8), "UTF8", toUTF8Offs,FALSE );
839 
840     log_verbose("Test surrogate behaviour for UTF8\n");
841     {
842         static const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01 };
843         static const uint8_t expectedUTF8test2[]= { 0xe2, 0x82, 0xac,
844                            0xf0, 0x90, 0x90, 0x81,
845                            0xef, 0xbf, 0xbd
846         };
847         static const int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3 };
848         testConvertFromU(testinput, UPRV_LENGTHOF(testinput),
849                          expectedUTF8test2, sizeof(expectedUTF8test2), "UTF8", offsets,FALSE );
850 
851 
852     }
853 
854 #if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022)
855     /*ISO-2022*/
856     testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
857         expectedISO2022, sizeof(expectedISO2022), "ISO_2022", toISO2022Offs,FALSE );
858 #endif
859 
860     /*UTF16 LE*/
861     testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
862         expectedUTF16LE, sizeof(expectedUTF16LE), "utf-16le", toUTF16LEOffs,FALSE );
863     /*UTF16 BE*/
864     testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
865         expectedUTF16BE, sizeof(expectedUTF16BE), "utf-16be", toUTF16BEOffs,FALSE );
866     /*UTF32 LE*/
867     testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
868         expectedUTF32LE, sizeof(expectedUTF32LE), "utf-32le", toUTF32LEOffs,FALSE );
869     /*UTF32 BE*/
870     testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
871         expectedUTF32BE, sizeof(expectedUTF32BE), "utf-32be", toUTF32BEOffs,FALSE );
872 
873     /*LATIN_1*/
874     testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
875         expectedLATIN1, sizeof(expectedLATIN1), "LATIN_1", toLATIN1Offs,FALSE );
876 
877 #if !UCONFIG_NO_LEGACY_CONVERSION
878     /*EBCDIC_STATEFUL*/
879     testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
880         expectedIBM930, sizeof(expectedIBM930), "ibm-930", toIBM930Offs,FALSE );
881 
882     testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
883         expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE );
884 
885     /*MBCS*/
886 
887     testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
888         expectedIBM943, sizeof(expectedIBM943), "ibm-943", toIBM943Offs,FALSE );
889     /*DBCS*/
890     testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
891         expectedIBM9027, sizeof(expectedIBM9027), "@ibm9027", toIBM9027Offs,FALSE );
892     /*SBCS*/
893     testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
894         expectedIBM920, sizeof(expectedIBM920), "ibm-920", toIBM920Offs,FALSE );
895     /*SBCS*/
896     testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
897         expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE );
898 #endif
899 
900 
901 /****/
902 
903     /*UTF-8*/
904     testConvertToU(expectedUTF8, sizeof(expectedUTF8),
905         sampleText, UPRV_LENGTHOF(sampleText), "utf8", fmUTF8Offs,FALSE);
906 #if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022)
907     /*ISO-2022*/
908     testConvertToU(expectedISO2022, sizeof(expectedISO2022),
909         sampleText, UPRV_LENGTHOF(sampleText), "ISO_2022", fmISO2022Offs,FALSE);
910 #endif
911 
912     /*UTF16 LE*/
913     testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE),
914         sampleText, UPRV_LENGTHOF(sampleText), "utf-16le", fmUTF16LEOffs,FALSE);
915     /*UTF16 BE*/
916     testConvertToU(expectedUTF16BE, sizeof(expectedUTF16BE),
917         sampleText, UPRV_LENGTHOF(sampleText), "utf-16be", fmUTF16BEOffs,FALSE);
918     /*UTF32 LE*/
919     testConvertToU(expectedUTF32LE, sizeof(expectedUTF32LE),
920         sampleText, UPRV_LENGTHOF(sampleText), "utf-32le", fmUTF32LEOffs,FALSE);
921     /*UTF32 BE*/
922     testConvertToU(expectedUTF32BE, sizeof(expectedUTF32BE),
923         sampleText, UPRV_LENGTHOF(sampleText), "utf-32be", fmUTF32BEOffs,FALSE);
924 
925 #if !UCONFIG_NO_LEGACY_CONVERSION
926     /*EBCDIC_STATEFUL*/
927     testConvertToU(expectedIBM930, sizeof(expectedIBM930), sampleTextRoundTripUnmappable,
928             UPRV_LENGTHOF(sampleTextRoundTripUnmappable), "ibm-930", fmIBM930Offs,FALSE);
929     /*MBCS*/
930     testConvertToU(expectedIBM943, sizeof(expectedIBM943),sampleTextRoundTripUnmappable,
931             UPRV_LENGTHOF(sampleTextRoundTripUnmappable), "ibm-943", fmIBM943Offs,FALSE);
932 #endif
933 
934     /* Try it again to make sure it still works */
935     testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE),
936         sampleText, UPRV_LENGTHOF(sampleText), "utf-16le", fmUTF16LEOffs,FALSE);
937 
938 #if !UCONFIG_NO_LEGACY_CONVERSION
939     testConvertToU(expectedMaltese913, sizeof(expectedMaltese913),
940         malteseUChars, UPRV_LENGTHOF(malteseUChars), "latin3", NULL,FALSE);
941 
942     testConvertFromU(malteseUChars, UPRV_LENGTHOF(malteseUChars),
943         expectedMaltese913, sizeof(expectedMaltese913), "iso-8859-3", NULL,FALSE );
944 
945     /*LMBCS*/
946     testConvertFromU(LMBCSUChars, UPRV_LENGTHOF(LMBCSUChars),
947         expectedLMBCS, sizeof(expectedLMBCS), "LMBCS-1", toLMBCSOffs,FALSE );
948     testConvertToU(expectedLMBCS, sizeof(expectedLMBCS),
949         LMBCSUChars, UPRV_LENGTHOF(LMBCSUChars), "LMBCS-1", fmLMBCSOffs,FALSE);
950 #endif
951 
952     /* UTF-7 examples are mostly from http://www.imc.org/rfc2152 */
953     {
954         /* encode directly set D and set O */
955         static const uint8_t utf7[] = {
956             /*
957                 Hi Mom -+Jjo--!
958                 A+ImIDkQ.
959                 +-
960                 +ZeVnLIqe-
961             */
962             0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
963             0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
964             0x2b, 0x2d,
965             0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d
966         };
967         static const UChar unicode[] = {
968             /*
969                 Hi Mom -<WHITE SMILING FACE>-!
970                 A<NOT IDENTICAL TO><ALPHA>.
971                 +
972                 [Japanese word "nihongo"]
973             */
974             0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
975             0x41, 0x2262, 0x0391, 0x2e,
976             0x2b,
977             0x65e5, 0x672c, 0x8a9e
978         };
979         static const int32_t toUnicodeOffsets[] = {
980             0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
981             15, 17, 19, 23,
982             24,
983             27, 29, 32
984         };
985         static const int32_t fromUnicodeOffsets[] = {
986             0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
987             11, 12, 12, 12, 13, 13, 13, 13, 14,
988             15, 15,
989             16, 16, 16, 17, 17, 17, 18, 18, 18, 18
990         };
991 
992         /* same but escaping set O (the exclamation mark) */
993         static const uint8_t utf7Restricted[] = {
994             /*
995                 Hi Mom -+Jjo--+ACE-
996                 A+ImIDkQ.
997                 +-
998                 +ZeVnLIqe-
999             */
1000             0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x2b, 0x41, 0x43, 0x45, 0x2d,
1001             0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
1002             0x2b, 0x2d,
1003             0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d
1004         };
1005         static const int32_t toUnicodeOffsetsR[] = {
1006             0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 15,
1007             19, 21, 23, 27,
1008             28,
1009             31, 33, 36
1010         };
1011         static const int32_t fromUnicodeOffsetsR[] = {
1012             0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 10, 10, 10, 10,
1013             11, 12, 12, 12, 13, 13, 13, 13, 14,
1014             15, 15,
1015             16, 16, 16, 17, 17, 17, 18, 18, 18, 18
1016         };
1017 
1018         testConvertFromU(unicode, UPRV_LENGTHOF(unicode), utf7, sizeof(utf7), "UTF-7", fromUnicodeOffsets,FALSE);
1019 
1020         testConvertToU(utf7, sizeof(utf7), unicode, UPRV_LENGTHOF(unicode), "UTF-7", toUnicodeOffsets,FALSE);
1021 
1022         testConvertFromU(unicode, UPRV_LENGTHOF(unicode), utf7Restricted, sizeof(utf7Restricted), "UTF-7,version=1", fromUnicodeOffsetsR,FALSE);
1023 
1024         testConvertToU(utf7Restricted, sizeof(utf7Restricted), unicode, UPRV_LENGTHOF(unicode), "UTF-7,version=1", toUnicodeOffsetsR,FALSE);
1025     }
1026 
1027     /*
1028      * IMAP-mailbox-name examples are mostly from http://www.imc.org/rfc2152,
1029      * modified according to RFC 2060,
1030      * and supplemented with the one example in RFC 2060 itself.
1031      */
1032     {
1033         static const uint8_t imap[] = {
1034             /*  Hi Mom -&Jjo--!
1035                 A&ImIDkQ-.
1036                 &-
1037                 &ZeVnLIqe-
1038                 \
1039                 ~peter
1040                 /mail
1041                 /&ZeVnLIqe-
1042                 /&U,BTFw-
1043             */
1044             0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x26, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
1045             0x41, 0x26, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2d, 0x2e,
1046             0x26, 0x2d,
1047             0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
1048             0x5c,
1049             0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
1050             0x2f, 0x6d, 0x61, 0x69, 0x6c,
1051             0x2f, 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
1052             0x2f, 0x26, 0x55, 0x2c, 0x42, 0x54, 0x46, 0x77, 0x2d
1053         };
1054         static const UChar unicode[] = {
1055             /*  Hi Mom -<WHITE SMILING FACE>-!
1056                 A<NOT IDENTICAL TO><ALPHA>.
1057                 &
1058                 [Japanese word "nihongo"]
1059                 \
1060                 ~peter
1061                 /mail
1062                 /<65e5, 672c, 8a9e>
1063                 /<53f0, 5317>
1064             */
1065             0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
1066             0x41, 0x2262, 0x0391, 0x2e,
1067             0x26,
1068             0x65e5, 0x672c, 0x8a9e,
1069             0x5c,
1070             0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
1071             0x2f, 0x6d, 0x61, 0x69, 0x6c,
1072             0x2f, 0x65e5, 0x672c, 0x8a9e,
1073             0x2f, 0x53f0, 0x5317
1074         };
1075         static const int32_t toUnicodeOffsets[] = {
1076             0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
1077             15, 17, 19, 24,
1078             25,
1079             28, 30, 33,
1080             37,
1081             38, 39, 40, 41, 42, 43,
1082             44, 45, 46, 47, 48,
1083             49, 51, 53, 56,
1084             60, 62, 64
1085         };
1086         static const int32_t fromUnicodeOffsets[] = {
1087             0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
1088             11, 12, 12, 12, 13, 13, 13, 13, 13, 14,
1089             15, 15,
1090             16, 16, 16, 17, 17, 17, 18, 18, 18, 18,
1091             19,
1092             20, 21, 22, 23, 24, 25,
1093             26, 27, 28, 29, 30,
1094             31, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34,
1095             35, 36, 36, 36, 37, 37, 37, 37, 37
1096         };
1097 
1098         testConvertFromU(unicode, UPRV_LENGTHOF(unicode), imap, sizeof(imap), "IMAP-mailbox-name", fromUnicodeOffsets,FALSE);
1099 
1100         testConvertToU(imap, sizeof(imap), unicode, UPRV_LENGTHOF(unicode), "IMAP-mailbox-name", toUnicodeOffsets,FALSE);
1101     }
1102 
1103     /* Test UTF-8 bad data handling*/
1104     {
1105         static const uint8_t utf8[]={
1106             0x61,
1107             0xf7, 0xbf, 0xbf, 0xbf,         /* > 10FFFF */
1108             0x00,
1109             0x62,
1110             0xfb, 0xbf, 0xbf, 0xbf, 0xbf,   /* > 10FFFF */
1111             0xfb, 0xbf, 0xbf, 0xbf, 0xbf,   /* > 10FFFF */
1112             0xf4, 0x8f, 0xbf, 0xbf,         /* 10FFFF */
1113             0xdf, 0xbf,                     /* 7ff */
1114             0xbf,                           /* truncated tail */
1115             0xf4, 0x90, 0x80, 0x80,         /* 110000 */
1116             0x02
1117         };
1118 
1119         static const uint16_t utf8Expected[]={
1120             0x0061,
1121             0xfffd, 0xfffd, 0xfffd, 0xfffd,
1122             0x0000,
1123             0x0062,
1124             0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
1125             0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
1126             0xdbff, 0xdfff,
1127             0x07ff,
1128             0xfffd,
1129             0xfffd, 0xfffd, 0xfffd, 0xfffd,
1130             0x0002
1131         };
1132 
1133         static const int32_t utf8Offsets[]={
1134             0,
1135             1, 2, 3, 4,
1136             5,
1137             6,
1138             7, 8, 9, 10, 11,
1139             12, 13, 14, 15, 16,
1140             17, 17,
1141             21,
1142             23,
1143             24, 25, 26, 27,
1144             28
1145         };
1146         testConvertToU(utf8, sizeof(utf8),
1147                        utf8Expected, UPRV_LENGTHOF(utf8Expected), "utf-8", utf8Offsets ,FALSE);
1148 
1149     }
1150 
1151     /* Test UTF-32BE bad data handling*/
1152     {
1153         static const uint8_t utf32[]={
1154             0x00, 0x00, 0x00, 0x61,
1155             0x00, 0x11, 0x00, 0x00,         /* 0x110000 out of range */
1156             0x00, 0x10, 0xff, 0xff,         /* 0x10FFFF in range */
1157             0x00, 0x00, 0x00, 0x62,
1158             0xff, 0xff, 0xff, 0xff,         /* 0xffffffff out of range */
1159             0x7f, 0xff, 0xff, 0xff,         /* 0x7fffffff out of range */
1160             0x00, 0x00, 0x01, 0x62,
1161             0x00, 0x00, 0x02, 0x62
1162         };
1163         static const uint16_t utf32Expected[]={
1164             0x0061,
1165             0xfffd,         /* 0x110000 out of range */
1166             0xDBFF,         /* 0x10FFFF in range */
1167             0xDFFF,
1168             0x0062,
1169             0xfffd,         /* 0xffffffff out of range */
1170             0xfffd,         /* 0x7fffffff out of range */
1171             0x0162,
1172             0x0262
1173         };
1174         static const int32_t utf32Offsets[]={
1175             0, 4, 8, 8, 12, 16, 20, 24, 28
1176         };
1177         static const uint8_t utf32ExpectedBack[]={
1178             0x00, 0x00, 0x00, 0x61,
1179             0x00, 0x00, 0xff, 0xfd,         /* 0x110000 out of range */
1180             0x00, 0x10, 0xff, 0xff,         /* 0x10FFFF in range */
1181             0x00, 0x00, 0x00, 0x62,
1182             0x00, 0x00, 0xff, 0xfd,         /* 0xffffffff out of range */
1183             0x00, 0x00, 0xff, 0xfd,         /* 0x7fffffff out of range */
1184             0x00, 0x00, 0x01, 0x62,
1185             0x00, 0x00, 0x02, 0x62
1186         };
1187         static const int32_t utf32OffsetsBack[]={
1188             0,0,0,0,
1189             1,1,1,1,
1190             2,2,2,2,
1191             4,4,4,4,
1192             5,5,5,5,
1193             6,6,6,6,
1194             7,7,7,7,
1195             8,8,8,8
1196         };
1197 
1198         testConvertToU(utf32, sizeof(utf32),
1199                        utf32Expected, UPRV_LENGTHOF(utf32Expected), "utf-32be", utf32Offsets ,FALSE);
1200         testConvertFromU(utf32Expected, UPRV_LENGTHOF(utf32Expected),
1201             utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32be", utf32OffsetsBack, FALSE);
1202     }
1203 
1204     /* Test UTF-32LE bad data handling*/
1205     {
1206         static const uint8_t utf32[]={
1207             0x61, 0x00, 0x00, 0x00,
1208             0x00, 0x00, 0x11, 0x00,         /* 0x110000 out of range */
1209             0xff, 0xff, 0x10, 0x00,         /* 0x10FFFF in range */
1210             0x62, 0x00, 0x00, 0x00,
1211             0xff, 0xff, 0xff, 0xff,         /* 0xffffffff out of range */
1212             0xff, 0xff, 0xff, 0x7f,         /* 0x7fffffff out of range */
1213             0x62, 0x01, 0x00, 0x00,
1214             0x62, 0x02, 0x00, 0x00,
1215         };
1216 
1217         static const uint16_t utf32Expected[]={
1218             0x0061,
1219             0xfffd,         /* 0x110000 out of range */
1220             0xDBFF,         /* 0x10FFFF in range */
1221             0xDFFF,
1222             0x0062,
1223             0xfffd,         /* 0xffffffff out of range */
1224             0xfffd,         /* 0x7fffffff out of range */
1225             0x0162,
1226             0x0262
1227         };
1228         static const int32_t utf32Offsets[]={
1229             0, 4, 8, 8, 12, 16, 20, 24, 28
1230         };
1231         static const uint8_t utf32ExpectedBack[]={
1232             0x61, 0x00, 0x00, 0x00,
1233             0xfd, 0xff, 0x00, 0x00,         /* 0x110000 out of range */
1234             0xff, 0xff, 0x10, 0x00,         /* 0x10FFFF in range */
1235             0x62, 0x00, 0x00, 0x00,
1236             0xfd, 0xff, 0x00, 0x00,         /* 0xffffffff out of range */
1237             0xfd, 0xff, 0x00, 0x00,         /* 0x7fffffff out of range */
1238             0x62, 0x01, 0x00, 0x00,
1239             0x62, 0x02, 0x00, 0x00
1240         };
1241         static const int32_t utf32OffsetsBack[]={
1242             0,0,0,0,
1243             1,1,1,1,
1244             2,2,2,2,
1245             4,4,4,4,
1246             5,5,5,5,
1247             6,6,6,6,
1248             7,7,7,7,
1249             8,8,8,8
1250         };
1251         testConvertToU(utf32, sizeof(utf32),
1252             utf32Expected, UPRV_LENGTHOF(utf32Expected), "utf-32le", utf32Offsets,FALSE );
1253         testConvertFromU(utf32Expected, UPRV_LENGTHOF(utf32Expected),
1254             utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32le", utf32OffsetsBack, FALSE);
1255     }
1256 }
1257 
TestCoverageMBCS()1258 static void TestCoverageMBCS(){
1259 #if 0
1260     UErrorCode status = U_ZERO_ERROR;
1261     const char *directory = loadTestData(&status);
1262     char* tdpath = NULL;
1263     char* saveDirectory = (char*)malloc(sizeof(char) *(strlen(u_getDataDirectory())+1));
1264     int len = strlen(directory);
1265     char* index=NULL;
1266 
1267     tdpath = (char*) malloc(sizeof(char) * (len * 2));
1268     uprv_strcpy(saveDirectory,u_getDataDirectory());
1269     log_verbose("Retrieved data directory %s \n",saveDirectory);
1270     uprv_strcpy(tdpath,directory);
1271     index=strrchr(tdpath,(char)U_FILE_SEP_CHAR);
1272 
1273     if((unsigned int)(index-tdpath) != (strlen(tdpath)-1)){
1274             *(index+1)=0;
1275     }
1276     u_setDataDirectory(tdpath);
1277     log_verbose("ICU data directory is set to: %s \n" ,tdpath);
1278 #endif
1279 
1280     /*some more test to increase the code coverage in MBCS.  Create an test converter from test1.ucm
1281       which is test file for MBCS conversion with single-byte codepage data.*/
1282     {
1283 
1284         /* MBCS with single byte codepage data test1.ucm*/
1285         const UChar unicodeInput[]    = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0x0003};
1286         const uint8_t expectedtest1[] = { 0x00, 0x05, 0xff, 0x07, 0xff,};
1287         int32_t  totest1Offs[]        = { 0, 1, 2, 3, 5, };
1288 
1289         /*from Unicode*/
1290         testConvertFromU(unicodeInput, UPRV_LENGTHOF(unicodeInput),
1291             expectedtest1, sizeof(expectedtest1), "@test1", totest1Offs,FALSE );
1292     }
1293 
1294     /*some more test to increase the code coverage in MBCS.  Create an test converter from test3.ucm
1295       which is test file for MBCS conversion with three-byte codepage data.*/
1296     {
1297 
1298         /* MBCS with three byte codepage data test3.ucm*/
1299         const UChar unicodeInput[]    = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
1300         const uint8_t expectedtest3[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x0b,  0x07,  0x01, 0x02, 0x0a,  0xff,};
1301         int32_t  totest3Offs[]        = { 0, 1, 2, 3, 3, 3, 4, 6, 6, 6, 8};
1302 
1303         const uint8_t test3input[]    = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x0b,  0x07,  0x01, 0x02, 0x0a, 0x01, 0x02, 0x0c,};
1304         const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
1305         int32_t fromtest3Offs[]       = { 0, 1, 2, 3, 6, 6, 7, 7, 10 };
1306 
1307         /*from Unicode*/
1308         testConvertFromU(unicodeInput, UPRV_LENGTHOF(unicodeInput),
1309             expectedtest3, sizeof(expectedtest3), "@test3", totest3Offs,FALSE );
1310 
1311         /*to Unicode*/
1312         testConvertToU(test3input, sizeof(test3input),
1313             expectedUnicode, UPRV_LENGTHOF(expectedUnicode), "@test3", fromtest3Offs ,FALSE);
1314 
1315     }
1316 
1317     /*some more test to increase the code coverage in MBCS.  Create an test converter from test4.ucm
1318       which is test file for MBCS conversion with four-byte codepage data.*/
1319     {
1320 
1321         /* MBCS with three byte codepage data test4.ucm*/
1322         static const UChar unicodeInput[]    = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
1323         static const uint8_t expectedtest4[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x03, 0x0b,  0x07,  0x01, 0x02, 0x03, 0x0a,  0xff,};
1324         static const int32_t totest4Offs[]   = { 0, 1, 2, 3, 3, 3, 3, 4, 6, 6, 6, 6, 8,};
1325 
1326         static const uint8_t test4input[]    = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x03, 0x0b,  0x07,  0x01, 0x02, 0x03, 0x0a, 0x01, 0x02, 0x03, 0x0c,};
1327         static const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
1328         static const int32_t fromtest4Offs[] = { 0, 1, 2, 3, 7, 7, 8, 8, 12,};
1329 
1330         /*from Unicode*/
1331         testConvertFromU(unicodeInput, UPRV_LENGTHOF(unicodeInput),
1332             expectedtest4, sizeof(expectedtest4), "@test4", totest4Offs,FALSE );
1333 
1334         /*to Unicode*/
1335         testConvertToU(test4input, sizeof(test4input),
1336             expectedUnicode, UPRV_LENGTHOF(expectedUnicode), "@test4", fromtest4Offs,FALSE );
1337 
1338     }
1339 #if 0
1340     free(tdpath);
1341     /* restore the original data directory */
1342     log_verbose("Setting the data directory to %s \n", saveDirectory);
1343     u_setDataDirectory(saveDirectory);
1344     free(saveDirectory);
1345 #endif
1346 
1347 }
1348 
TestConverterType(const char * convName,UConverterType convType)1349 static void TestConverterType(const char *convName, UConverterType convType) {
1350     UConverter* myConverter;
1351     UErrorCode err = U_ZERO_ERROR;
1352 
1353     myConverter = my_ucnv_open(convName, &err);
1354 
1355     if (U_FAILURE(err)) {
1356         log_data_err("Failed to create an %s converter\n", convName);
1357         return;
1358     }
1359     else
1360     {
1361         if (ucnv_getType(myConverter)!=convType) {
1362             log_err("ucnv_getType Failed for %s. Got enum value 0x%X\n",
1363                 convName, convType);
1364         }
1365         else {
1366             log_verbose("ucnv_getType %s ok\n", convName);
1367         }
1368     }
1369     ucnv_close(myConverter);
1370 }
1371 
TestConverterTypesAndStarters()1372 static void TestConverterTypesAndStarters()
1373 {
1374 #if !UCONFIG_NO_LEGACY_CONVERSION
1375     UConverter* myConverter;
1376     UErrorCode err = U_ZERO_ERROR;
1377     UBool mystarters[256];
1378 
1379 /*    const UBool expectedKSCstarters[256] = {
1380         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1381         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1382         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1383         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1384         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1385         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1386         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1387         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1388         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1389         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1390         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1391         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1392         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1393         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1394         FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1395         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1396         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1397         TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE,
1398         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1399         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1400         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1401         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1402         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1403         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1404         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1405         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE};*/
1406 
1407 
1408     log_verbose("Testing KSC, ibm-930, ibm-878  for starters and their conversion types.");
1409 
1410     myConverter = ucnv_open("ksc", &err);
1411     if (U_FAILURE(err)) {
1412       log_data_err("Failed to create an ibm-ksc converter\n");
1413       return;
1414     }
1415     else
1416     {
1417         if (ucnv_getType(myConverter)!=UCNV_MBCS)
1418             log_err("ucnv_getType Failed for ibm-949\n");
1419         else
1420             log_verbose("ucnv_getType ibm-949 ok\n");
1421 
1422         if(myConverter!=NULL)
1423             ucnv_getStarters(myConverter, mystarters, &err);
1424 
1425         /*if (memcmp(expectedKSCstarters, mystarters, sizeof(expectedKSCstarters)))
1426           log_err("Failed ucnv_getStarters for ksc\n");
1427           else
1428           log_verbose("ucnv_getStarters ok\n");*/
1429 
1430     }
1431     ucnv_close(myConverter);
1432 
1433     TestConverterType("ibm-930", UCNV_EBCDIC_STATEFUL);
1434     TestConverterType("ibm-878", UCNV_SBCS);
1435 #endif
1436 
1437     TestConverterType("iso-8859-1", UCNV_LATIN_1);
1438 
1439     TestConverterType("ibm-1208", UCNV_UTF8);
1440 
1441     TestConverterType("utf-8", UCNV_UTF8);
1442     TestConverterType("UTF-16BE", UCNV_UTF16_BigEndian);
1443     TestConverterType("UTF-16LE", UCNV_UTF16_LittleEndian);
1444     TestConverterType("UTF-32BE", UCNV_UTF32_BigEndian);
1445     TestConverterType("UTF-32LE", UCNV_UTF32_LittleEndian);
1446 
1447 #if !UCONFIG_NO_LEGACY_CONVERSION
1448 
1449 #if defined(U_ENABLE_GENERIC_ISO_2022)
1450     TestConverterType("iso-2022", UCNV_ISO_2022);
1451 #endif
1452 
1453     TestConverterType("hz", UCNV_HZ);
1454 #endif
1455 
1456     TestConverterType("scsu", UCNV_SCSU);
1457 
1458 #if !UCONFIG_NO_LEGACY_CONVERSION
1459     TestConverterType("x-iscii-de", UCNV_ISCII);
1460 #endif
1461 
1462     TestConverterType("ascii", UCNV_US_ASCII);
1463     TestConverterType("utf-7", UCNV_UTF7);
1464     TestConverterType("IMAP-mailbox-name", UCNV_IMAP_MAILBOX);
1465     TestConverterType("bocu-1", UCNV_BOCU1);
1466 }
1467 
1468 static void
TestAmbiguousConverter(UConverter * cnv)1469 TestAmbiguousConverter(UConverter *cnv) {
1470     static const char inBytes[3]={ 0x61, 0x5B, 0x5c };
1471     UChar outUnicode[20]={ 0, 0, 0, 0 };
1472 
1473     const char *s;
1474     UChar *u;
1475     UErrorCode errorCode;
1476     UBool isAmbiguous;
1477 
1478     /* try to convert an 'a', a square bracket and a US-ASCII backslash */
1479     errorCode=U_ZERO_ERROR;
1480     s=inBytes;
1481     u=outUnicode;
1482     ucnv_toUnicode(cnv, &u, u+20, &s, s+3, NULL, TRUE, &errorCode);
1483     if(U_FAILURE(errorCode)) {
1484         /* we do not care about general failures in this test; the input may just not be mappable */
1485         return;
1486     }
1487 
1488     if(outUnicode[0]!=0x61 || outUnicode[1]!=0x5B || outUnicode[2]==0xfffd) {
1489         /* not a close ASCII-family encoding, or 0x5c is unassigned/illegal: this test is not applicable */
1490         /* There are some encodings that are partially ASCII based,
1491         like the ISO-7 and GSM series of codepages, which we ignore. */
1492         return;
1493     }
1494 
1495     isAmbiguous=ucnv_isAmbiguous(cnv);
1496 
1497     /* check that outUnicode[1]!=0x5c is exactly the same as ucnv_isAmbiguous() */
1498     if((outUnicode[2]!=0x5c)!=isAmbiguous) {
1499         log_err("error: converter \"%s\" needs a backslash fix: %d but ucnv_isAmbiguous()==%d\n",
1500             ucnv_getName(cnv, &errorCode), outUnicode[2]!=0x5c, isAmbiguous);
1501         return;
1502     }
1503 
1504     if(outUnicode[2]!=0x5c) {
1505         /* needs fixup, fix it */
1506         ucnv_fixFileSeparator(cnv, outUnicode, (int32_t)(u-outUnicode));
1507         if(outUnicode[2]!=0x5c) {
1508             /* the fix failed */
1509             log_err("error: ucnv_fixFileSeparator(%s) failed\n", ucnv_getName(cnv, &errorCode));
1510             return;
1511         }
1512     }
1513 }
1514 
TestAmbiguous()1515 static void TestAmbiguous()
1516 {
1517     UErrorCode status = U_ZERO_ERROR;
1518     UConverter *ascii_cnv = 0, *sjis_cnv = 0, *cnv;
1519     static const char target[] = {
1520         /* "\\usr\\local\\share\\data\\icutest.txt" */
1521         0x5c, 0x75, 0x73, 0x72,
1522         0x5c, 0x6c, 0x6f, 0x63, 0x61, 0x6c,
1523         0x5c, 0x73, 0x68, 0x61, 0x72, 0x65,
1524         0x5c, 0x64, 0x61, 0x74, 0x61,
1525         0x5c, 0x69, 0x63, 0x75, 0x74, 0x65, 0x73, 0x74, 0x2e, 0x74, 0x78, 0x74,
1526         0
1527     };
1528     UChar asciiResult[200], sjisResult[200];
1529     int32_t /*asciiLength = 0,*/ sjisLength = 0, i;
1530     const char *name;
1531 
1532     /* enumerate all converters */
1533     status=U_ZERO_ERROR;
1534     for(i=0; (name=ucnv_getAvailableName(i))!=NULL; ++i) {
1535         cnv=ucnv_open(name, &status);
1536         if(U_SUCCESS(status)) {
1537             /* BEGIN android-changed. To save space Android does not build full ISO-2022-CN CJK tables. */
1538             const char* cnvName = ucnv_getName(cnv, &status);
1539             if (strlen(cnvName) < 8 ||
1540                 strncmp(cnvName, "ISO_2022_CN", 8) != 0) {
1541             TestAmbiguousConverter(cnv);
1542             }
1543             /* END android-changed */
1544             ucnv_close(cnv);
1545         } else {
1546             log_err("error: unable to open available converter \"%s\"\n", name);
1547             status=U_ZERO_ERROR;
1548         }
1549     }
1550 
1551 #if !UCONFIG_NO_LEGACY_CONVERSION
1552     sjis_cnv = ucnv_open("ibm-943", &status);
1553     if (U_FAILURE(status))
1554     {
1555         log_data_err("Failed to create a SJIS converter\n");
1556         return;
1557     }
1558     ascii_cnv = ucnv_open("LATIN-1", &status);
1559     if (U_FAILURE(status))
1560     {
1561         log_data_err("Failed to create a LATIN-1 converter\n");
1562         ucnv_close(sjis_cnv);
1563         return;
1564     }
1565     /* convert target from SJIS to Unicode */
1566     sjisLength = ucnv_toUChars(sjis_cnv, sjisResult, UPRV_LENGTHOF(sjisResult), target, (int32_t)strlen(target), &status);
1567     if (U_FAILURE(status))
1568     {
1569         log_err("Failed to convert the SJIS string.\n");
1570         ucnv_close(sjis_cnv);
1571         ucnv_close(ascii_cnv);
1572         return;
1573     }
1574     /* convert target from Latin-1 to Unicode */
1575     /*asciiLength =*/ ucnv_toUChars(ascii_cnv, asciiResult, UPRV_LENGTHOF(asciiResult), target, (int32_t)strlen(target), &status);
1576     if (U_FAILURE(status))
1577     {
1578         log_err("Failed to convert the Latin-1 string.\n");
1579         ucnv_close(sjis_cnv);
1580         ucnv_close(ascii_cnv);
1581         return;
1582     }
1583     if (!ucnv_isAmbiguous(sjis_cnv))
1584     {
1585         log_err("SJIS converter should contain ambiguous character mappings.\n");
1586         ucnv_close(sjis_cnv);
1587         ucnv_close(ascii_cnv);
1588         return;
1589     }
1590     if (u_strcmp(sjisResult, asciiResult) == 0)
1591     {
1592         log_err("File separators for SJIS don't need to be fixed.\n");
1593     }
1594     ucnv_fixFileSeparator(sjis_cnv, sjisResult, sjisLength);
1595     if (u_strcmp(sjisResult, asciiResult) != 0)
1596     {
1597         log_err("Fixing file separator for SJIS failed.\n");
1598     }
1599     ucnv_close(sjis_cnv);
1600     ucnv_close(ascii_cnv);
1601 #endif
1602 }
1603 
1604 static void
TestSignatureDetection()1605 TestSignatureDetection(){
1606     /* with null terminated strings */
1607     {
1608         static const char* data[] = {
1609                 "\xFE\xFF\x00\x00",     /* UTF-16BE */
1610                 "\xFF\xFE\x00\x00",     /* UTF-16LE */
1611                 "\xEF\xBB\xBF\x00",     /* UTF-8    */
1612                 "\x0E\xFE\xFF\x00",     /* SCSU     */
1613 
1614                 "\xFE\xFF",             /* UTF-16BE */
1615                 "\xFF\xFE",             /* UTF-16LE */
1616                 "\xEF\xBB\xBF",         /* UTF-8    */
1617                 "\x0E\xFE\xFF",         /* SCSU     */
1618 
1619                 "\xFE\xFF\x41\x42",     /* UTF-16BE */
1620                 "\xFF\xFE\x41\x41",     /* UTF-16LE */
1621                 "\xEF\xBB\xBF\x41",     /* UTF-8    */
1622                 "\x0E\xFE\xFF\x41",     /* SCSU     */
1623 
1624                 "\x2B\x2F\x76\x38\x2D", /* UTF-7    */
1625                 "\x2B\x2F\x76\x38\x41", /* UTF-7    */
1626                 "\x2B\x2F\x76\x39\x41", /* UTF-7    */
1627                 "\x2B\x2F\x76\x2B\x41", /* UTF-7    */
1628                 "\x2B\x2F\x76\x2F\x41",  /* UTF-7    */
1629 
1630                 "\xDD\x73\x66\x73"      /* UTF-EBCDIC */
1631         };
1632         static const char* expected[] = {
1633                 "UTF-16BE",
1634                 "UTF-16LE",
1635                 "UTF-8",
1636                 "SCSU",
1637 
1638                 "UTF-16BE",
1639                 "UTF-16LE",
1640                 "UTF-8",
1641                 "SCSU",
1642 
1643                 "UTF-16BE",
1644                 "UTF-16LE",
1645                 "UTF-8",
1646                 "SCSU",
1647 
1648                 "UTF-7",
1649                 "UTF-7",
1650                 "UTF-7",
1651                 "UTF-7",
1652                 "UTF-7",
1653                 "UTF-EBCDIC"
1654         };
1655         static const int32_t expectedLength[] ={
1656             2,
1657             2,
1658             3,
1659             3,
1660 
1661             2,
1662             2,
1663             3,
1664             3,
1665 
1666             2,
1667             2,
1668             3,
1669             3,
1670 
1671             5,
1672             4,
1673             4,
1674             4,
1675             4,
1676             4
1677         };
1678         int i=0;
1679         UErrorCode err;
1680         int32_t signatureLength = -1;
1681         const char* source = NULL;
1682         const char* enc = NULL;
1683         for( ; i<UPRV_LENGTHOF(data); i++){
1684             err = U_ZERO_ERROR;
1685             source = data[i];
1686             enc = ucnv_detectUnicodeSignature(source, -1 , &signatureLength, &err);
1687             if(U_FAILURE(err)){
1688                 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err));
1689                 continue;
1690             }
1691             if(enc == NULL || strcmp(enc,expected[i]) !=0){
1692                 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc);
1693                 continue;
1694             }
1695             if(signatureLength != expectedLength[i]){
1696                 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]);
1697             }
1698         }
1699     }
1700     {
1701         static const char* data[] = {
1702                 "\xFE\xFF\x00",         /* UTF-16BE */
1703                 "\xFF\xFE\x00",         /* UTF-16LE */
1704                 "\xEF\xBB\xBF\x00",     /* UTF-8    */
1705                 "\x0E\xFE\xFF\x00",     /* SCSU     */
1706                 "\x00\x00\xFE\xFF",     /* UTF-32BE */
1707                 "\xFF\xFE\x00\x00",     /* UTF-32LE */
1708                 "\xFE\xFF",             /* UTF-16BE */
1709                 "\xFF\xFE",             /* UTF-16LE */
1710                 "\xEF\xBB\xBF",         /* UTF-8    */
1711                 "\x0E\xFE\xFF",         /* SCSU     */
1712                 "\x00\x00\xFE\xFF",     /* UTF-32BE */
1713                 "\xFF\xFE\x00\x00",     /* UTF-32LE */
1714                 "\xFE\xFF\x41\x42",     /* UTF-16BE */
1715                 "\xFF\xFE\x41\x41",     /* UTF-16LE */
1716                 "\xEF\xBB\xBF\x41",     /* UTF-8    */
1717                 "\x0E\xFE\xFF\x41",     /* SCSU     */
1718                 "\x00\x00\xFE\xFF\x41", /* UTF-32BE */
1719                 "\xFF\xFE\x00\x00\x42", /* UTF-32LE */
1720                 "\xFB\xEE\x28",         /* BOCU-1   */
1721                 "\xFF\x41\x42"          /* NULL     */
1722         };
1723         static const int len[] = {
1724             3,
1725             3,
1726             4,
1727             4,
1728             4,
1729             4,
1730             2,
1731             2,
1732             3,
1733             3,
1734             4,
1735             4,
1736             4,
1737             4,
1738             4,
1739             4,
1740             5,
1741             5,
1742             3,
1743             3
1744         };
1745 
1746         static const char* expected[] = {
1747                 "UTF-16BE",
1748                 "UTF-16LE",
1749                 "UTF-8",
1750                 "SCSU",
1751                 "UTF-32BE",
1752                 "UTF-32LE",
1753                 "UTF-16BE",
1754                 "UTF-16LE",
1755                 "UTF-8",
1756                 "SCSU",
1757                 "UTF-32BE",
1758                 "UTF-32LE",
1759                 "UTF-16BE",
1760                 "UTF-16LE",
1761                 "UTF-8",
1762                 "SCSU",
1763                 "UTF-32BE",
1764                 "UTF-32LE",
1765                 "BOCU-1",
1766                 NULL
1767         };
1768         static const int32_t expectedLength[] ={
1769             2,
1770             2,
1771             3,
1772             3,
1773             4,
1774             4,
1775             2,
1776             2,
1777             3,
1778             3,
1779             4,
1780             4,
1781             2,
1782             2,
1783             3,
1784             3,
1785             4,
1786             4,
1787             3,
1788             0
1789         };
1790         int i=0;
1791         UErrorCode err;
1792         int32_t signatureLength = -1;
1793         int32_t sourceLength=-1;
1794         const char* source = NULL;
1795         const char* enc = NULL;
1796         for( ; i<UPRV_LENGTHOF(data); i++){
1797             err = U_ZERO_ERROR;
1798             source = data[i];
1799             sourceLength = len[i];
1800             enc = ucnv_detectUnicodeSignature(source, sourceLength , &signatureLength, &err);
1801             if(U_FAILURE(err)){
1802                 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err));
1803                 continue;
1804             }
1805             if(enc == NULL || strcmp(enc,expected[i]) !=0){
1806                 if(expected[i] !=NULL){
1807                  log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc);
1808                  continue;
1809                 }
1810             }
1811             if(signatureLength != expectedLength[i]){
1812                 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]);
1813             }
1814         }
1815     }
1816 }
1817 
TestUTF7()1818 static void TestUTF7() {
1819     /* test input */
1820     static const uint8_t in[]={
1821         /* H - +Jjo- - ! +- +2AHcAQ */
1822         0x48,
1823         0x2d,
1824         0x2b, 0x4a, 0x6a, 0x6f,
1825         0x2d, 0x2d,
1826         0x21,
1827         0x2b, 0x2d,
1828         0x2b, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51
1829     };
1830 
1831     /* expected test results */
1832     static const int32_t results[]={
1833         /* number of bytes read, code point */
1834         1, 0x48,
1835         1, 0x2d,
1836         4, 0x263a, /* <WHITE SMILING FACE> */
1837         2, 0x2d,
1838         1, 0x21,
1839         2, 0x2b,
1840         7, 0x10401
1841     };
1842 
1843     const char *cnvName;
1844     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
1845     UErrorCode errorCode=U_ZERO_ERROR;
1846     UConverter *cnv=ucnv_open("UTF-7", &errorCode);
1847     if(U_FAILURE(errorCode)) {
1848         log_data_err("Unable to open a UTF-7 converter: %s\n", u_errorName(errorCode));
1849         return;
1850     }
1851     TestNextUChar(cnv, source, limit, results, "UTF-7");
1852     /* Test the condition when source >= sourceLimit */
1853     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1854     cnvName = ucnv_getName(cnv, &errorCode);
1855     if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "UTF-7") != 0) {
1856         log_err("UTF-7 converter is called %s: %s\n", cnvName, u_errorName(errorCode));
1857     }
1858     ucnv_close(cnv);
1859 }
1860 
TestIMAP()1861 static void TestIMAP() {
1862     /* test input */
1863     static const uint8_t in[]={
1864         /* H - &Jjo- - ! &- &2AHcAQ- \ */
1865         0x48,
1866         0x2d,
1867         0x26, 0x4a, 0x6a, 0x6f,
1868         0x2d, 0x2d,
1869         0x21,
1870         0x26, 0x2d,
1871         0x26, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51, 0x2d
1872     };
1873 
1874     /* expected test results */
1875     static const int32_t results[]={
1876         /* number of bytes read, code point */
1877         1, 0x48,
1878         1, 0x2d,
1879         4, 0x263a, /* <WHITE SMILING FACE> */
1880         2, 0x2d,
1881         1, 0x21,
1882         2, 0x26,
1883         7, 0x10401
1884     };
1885 
1886     const char *cnvName;
1887     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
1888     UErrorCode errorCode=U_ZERO_ERROR;
1889     UConverter *cnv=ucnv_open("IMAP-mailbox-name", &errorCode);
1890     if(U_FAILURE(errorCode)) {
1891         log_data_err("Unable to open a IMAP-mailbox-name converter: %s\n", u_errorName(errorCode));
1892         return;
1893     }
1894     TestNextUChar(cnv, source, limit, results, "IMAP-mailbox-name");
1895     /* Test the condition when source >= sourceLimit */
1896     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1897     cnvName = ucnv_getName(cnv, &errorCode);
1898     if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "IMAP-mailbox-name") != 0) {
1899         log_err("IMAP-mailbox-name converter is called %s: %s\n", cnvName, u_errorName(errorCode));
1900     }
1901     ucnv_close(cnv);
1902 }
1903 
TestUTF8()1904 static void TestUTF8() {
1905     /* test input */
1906     static const uint8_t in[]={
1907         0x61,
1908         0xc2, 0x80,
1909         0xe0, 0xa0, 0x80,
1910         0xf0, 0x90, 0x80, 0x80,
1911         0xf4, 0x84, 0x8c, 0xa1,
1912         0xf0, 0x90, 0x90, 0x81
1913     };
1914 
1915     /* expected test results */
1916     static const int32_t results[]={
1917         /* number of bytes read, code point */
1918         1, 0x61,
1919         2, 0x80,
1920         3, 0x800,
1921         4, 0x10000,
1922         4, 0x104321,
1923         4, 0x10401
1924     };
1925 
1926     /* error test input */
1927     static const uint8_t in2[]={
1928         0x61,
1929         0xc0, 0x80,                     /* illegal non-shortest form */
1930         0xe0, 0x80, 0x80,               /* illegal non-shortest form */
1931         0xf0, 0x80, 0x80, 0x80,         /* illegal non-shortest form */
1932         0xc0, 0xc0,                     /* illegal trail byte */
1933         0xf4, 0x90, 0x80, 0x80,         /* 0x110000 out of range */
1934         0xf8, 0x80, 0x80, 0x80, 0x80,   /* too long */
1935         0xfe,                           /* illegal byte altogether */
1936         0x62
1937     };
1938 
1939     /* expected error test results */
1940     static const int32_t results2[]={
1941         /* number of bytes read, code point */
1942         1, 0x61,
1943         22, 0x62
1944     };
1945 
1946     UConverterToUCallback cb;
1947     const void *p;
1948 
1949     const char *source=(const char *)in,*limit=(const char *)in+sizeof(in);
1950     UErrorCode errorCode=U_ZERO_ERROR;
1951     UConverter *cnv=ucnv_open("UTF-8", &errorCode);
1952     if(U_FAILURE(errorCode)) {
1953         log_err("Unable to open a UTF-8 converter: %s\n", u_errorName(errorCode));
1954         return;
1955     }
1956     TestNextUChar(cnv, source, limit, results, "UTF-8");
1957     /* Test the condition when source >= sourceLimit */
1958     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1959 
1960     /* test error behavior with a skip callback */
1961     ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
1962     source=(const char *)in2;
1963     limit=(const char *)(in2+sizeof(in2));
1964     TestNextUChar(cnv, source, limit, results2, "UTF-8");
1965 
1966     ucnv_close(cnv);
1967 }
1968 
TestCESU8()1969 static void TestCESU8() {
1970     /* test input */
1971     static const uint8_t in[]={
1972         0x61,
1973         0xc2, 0x80,
1974         0xe0, 0xa0, 0x80,
1975         0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80,
1976         0xed, 0xb0, 0x81, 0xed, 0xa0, 0x82,
1977         0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf,
1978         0xef, 0xbf, 0xbc
1979     };
1980 
1981     /* expected test results */
1982     static const int32_t results[]={
1983         /* number of bytes read, code point */
1984         1, 0x61,
1985         2, 0x80,
1986         3, 0x800,
1987         6, 0x10000,
1988         3, 0xdc01,
1989         -1,0xd802,  /* may read 3 or 6 bytes */
1990         -1,0x10ffff,/* may read 0 or 3 bytes */
1991         3, 0xfffc
1992     };
1993 
1994     /* error test input */
1995     static const uint8_t in2[]={
1996         0x61,
1997         0xc0, 0x80,                     /* illegal non-shortest form */
1998         0xe0, 0x80, 0x80,               /* illegal non-shortest form */
1999         0xf0, 0x80, 0x80, 0x80,         /* illegal non-shortest form */
2000         0xc0, 0xc0,                     /* illegal trail byte */
2001         0xf0, 0x90, 0x80, 0x80,         /* illegal 4-byte supplementary code point */
2002         0xf4, 0x84, 0x8c, 0xa1,         /* illegal 4-byte supplementary code point */
2003         0xf0, 0x90, 0x90, 0x81,         /* illegal 4-byte supplementary code point */
2004         0xf4, 0x90, 0x80, 0x80,         /* 0x110000 out of range */
2005         0xf8, 0x80, 0x80, 0x80, 0x80,   /* too long */
2006         0xfe,                           /* illegal byte altogether */
2007         0x62
2008     };
2009 
2010     /* expected error test results */
2011     static const int32_t results2[]={
2012         /* number of bytes read, code point */
2013         1, 0x61,
2014         34, 0x62
2015     };
2016 
2017     UConverterToUCallback cb;
2018     const void *p;
2019 
2020     const char *source=(const char *)in,*limit=(const char *)in+sizeof(in);
2021     UErrorCode errorCode=U_ZERO_ERROR;
2022     UConverter *cnv=ucnv_open("CESU-8", &errorCode);
2023     if(U_FAILURE(errorCode)) {
2024         log_data_err("Unable to open a CESU-8 converter: %s\n", u_errorName(errorCode));
2025         return;
2026     }
2027     TestNextUChar(cnv, source, limit, results, "CESU-8");
2028     /* Test the condition when source >= sourceLimit */
2029     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2030 
2031     /* test error behavior with a skip callback */
2032     ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2033     source=(const char *)in2;
2034     limit=(const char *)(in2+sizeof(in2));
2035     TestNextUChar(cnv, source, limit, results2, "CESU-8");
2036 
2037     ucnv_close(cnv);
2038 }
2039 
TestUTF16()2040 static void TestUTF16() {
2041     /* test input */
2042     static const uint8_t in1[]={
2043         0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff
2044     };
2045     static const uint8_t in2[]={
2046         0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff
2047     };
2048     static const uint8_t in3[]={
2049         0xfe, 0xfe, 0x4e, 0x00, 0xfe, 0xff, 0xd8, 0x40, 0xdc, 0x01
2050     };
2051 
2052     /* expected test results */
2053     static const int32_t results1[]={
2054         /* number of bytes read, code point */
2055         4, 0x4e00,
2056         2, 0xfeff
2057     };
2058     static const int32_t results2[]={
2059         /* number of bytes read, code point */
2060         4, 0x004e,
2061         2, 0xfffe
2062     };
2063     static const int32_t results3[]={
2064         /* number of bytes read, code point */
2065         2, 0xfefe,
2066         2, 0x4e00,
2067         2, 0xfeff,
2068         4, 0x20001
2069     };
2070 
2071     const char *source, *limit;
2072 
2073     UErrorCode errorCode=U_ZERO_ERROR;
2074     UConverter *cnv=ucnv_open("UTF-16", &errorCode);
2075     if(U_FAILURE(errorCode)) {
2076         log_err("Unable to open a UTF-16 converter: %s\n", u_errorName(errorCode));
2077         return;
2078     }
2079 
2080     source=(const char *)in1, limit=(const char *)in1+sizeof(in1);
2081     TestNextUChar(cnv, source, limit, results1, "UTF-16");
2082 
2083     source=(const char *)in2, limit=(const char *)in2+sizeof(in2);
2084     ucnv_resetToUnicode(cnv);
2085     TestNextUChar(cnv, source, limit, results2, "UTF-16");
2086 
2087     source=(const char *)in3, limit=(const char *)in3+sizeof(in3);
2088     ucnv_resetToUnicode(cnv);
2089     TestNextUChar(cnv, source, limit, results3, "UTF-16");
2090 
2091     /* Test the condition when source >= sourceLimit */
2092     ucnv_resetToUnicode(cnv);
2093     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2094 
2095     ucnv_close(cnv);
2096 }
2097 
TestUTF16BE()2098 static void TestUTF16BE() {
2099     /* test input */
2100     static const uint8_t in[]={
2101         0x00, 0x61,
2102         0x00, 0xc0,
2103         0x00, 0x31,
2104         0x00, 0xf4,
2105         0xce, 0xfe,
2106         0xd8, 0x01, 0xdc, 0x01
2107     };
2108 
2109     /* expected test results */
2110     static const int32_t results[]={
2111         /* number of bytes read, code point */
2112         2, 0x61,
2113         2, 0xc0,
2114         2, 0x31,
2115         2, 0xf4,
2116         2, 0xcefe,
2117         4, 0x10401
2118     };
2119 
2120     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2121     UErrorCode errorCode=U_ZERO_ERROR;
2122     UConverter *cnv=ucnv_open("utf-16be", &errorCode);
2123     if(U_FAILURE(errorCode)) {
2124         log_err("Unable to open a UTF16-BE converter: %s\n", u_errorName(errorCode));
2125         return;
2126     }
2127     TestNextUChar(cnv, source, limit, results, "UTF-16BE");
2128     /* Test the condition when source >= sourceLimit */
2129     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2130     /*Test for the condition where there is an invalid character*/
2131     {
2132         static const uint8_t source2[]={0x61};
2133         ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2134         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character");
2135     }
2136 #if 0
2137     /*
2138      * Test disabled because currently the UTF-16BE/LE converters are supposed
2139      * to not set errors for unpaired surrogates.
2140      * This may change with
2141      * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
2142      */
2143 
2144     /*Test for the condition where there is a surrogate pair*/
2145     {
2146         const uint8_t source2[]={0xd8, 0x01};
2147         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character");
2148     }
2149 #endif
2150     ucnv_close(cnv);
2151 }
2152 
2153 static void
TestUTF16LE()2154 TestUTF16LE() {
2155     /* test input */
2156     static const uint8_t in[]={
2157         0x61, 0x00,
2158         0x31, 0x00,
2159         0x4e, 0x2e,
2160         0x4e, 0x00,
2161         0x01, 0xd8, 0x01, 0xdc
2162     };
2163 
2164     /* expected test results */
2165     static const int32_t results[]={
2166         /* number of bytes read, code point */
2167         2, 0x61,
2168         2, 0x31,
2169         2, 0x2e4e,
2170         2, 0x4e,
2171         4, 0x10401
2172     };
2173 
2174     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2175     UErrorCode errorCode=U_ZERO_ERROR;
2176     UConverter *cnv=ucnv_open("utf-16le", &errorCode);
2177     if(U_FAILURE(errorCode)) {
2178         log_err("Unable to open a UTF16-LE converter: %s\n", u_errorName(errorCode));
2179         return;
2180     }
2181     TestNextUChar(cnv, source, limit, results, "UTF-16LE");
2182     /* Test the condition when source >= sourceLimit */
2183     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2184     /*Test for the condition where there is an invalid character*/
2185     {
2186         static const uint8_t source2[]={0x61};
2187         ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2188         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character");
2189     }
2190 #if 0
2191     /*
2192      * Test disabled because currently the UTF-16BE/LE converters are supposed
2193      * to not set errors for unpaired surrogates.
2194      * This may change with
2195      * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
2196      */
2197 
2198     /*Test for the condition where there is a surrogate character*/
2199     {
2200         static const uint8_t source2[]={0x01, 0xd8};
2201         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character");
2202     }
2203 #endif
2204 
2205     ucnv_close(cnv);
2206 }
2207 
TestUTF32()2208 static void TestUTF32() {
2209     /* test input */
2210     static const uint8_t in1[]={
2211         0x00, 0x00, 0xfe, 0xff,   0x00, 0x10, 0x0f, 0x00,   0x00, 0x00, 0xfe, 0xff
2212     };
2213     static const uint8_t in2[]={
2214         0xff, 0xfe, 0x00, 0x00,   0x00, 0x10, 0x0f, 0x00,   0xfe, 0xff, 0x00, 0x00
2215     };
2216     static const uint8_t in3[]={
2217         0x00, 0x00, 0xfe, 0xfe,   0x00, 0x10, 0x0f, 0x00,   0x00, 0x00, 0xd8, 0x40,   0x00, 0x00, 0xdc, 0x01
2218     };
2219 
2220     /* expected test results */
2221     static const int32_t results1[]={
2222         /* number of bytes read, code point */
2223         8, 0x100f00,
2224         4, 0xfeff
2225     };
2226     static const int32_t results2[]={
2227         /* number of bytes read, code point */
2228         8, 0x0f1000,
2229         4, 0xfffe
2230     };
2231     static const int32_t results3[]={
2232         /* number of bytes read, code point */
2233         4, 0xfefe,
2234         4, 0x100f00,
2235         4, 0xfffd, /* unmatched surrogate */
2236         4, 0xfffd  /* unmatched surrogate */
2237     };
2238 
2239     const char *source, *limit;
2240 
2241     UErrorCode errorCode=U_ZERO_ERROR;
2242     UConverter *cnv=ucnv_open("UTF-32", &errorCode);
2243     if(U_FAILURE(errorCode)) {
2244         log_data_err("Unable to open a UTF-32 converter: %s\n", u_errorName(errorCode));
2245         return;
2246     }
2247 
2248     source=(const char *)in1, limit=(const char *)in1+sizeof(in1);
2249     TestNextUChar(cnv, source, limit, results1, "UTF-32");
2250 
2251     source=(const char *)in2, limit=(const char *)in2+sizeof(in2);
2252     ucnv_resetToUnicode(cnv);
2253     TestNextUChar(cnv, source, limit, results2, "UTF-32");
2254 
2255     source=(const char *)in3, limit=(const char *)in3+sizeof(in3);
2256     ucnv_resetToUnicode(cnv);
2257     TestNextUChar(cnv, source, limit, results3, "UTF-32");
2258 
2259     /* Test the condition when source >= sourceLimit */
2260     ucnv_resetToUnicode(cnv);
2261     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2262 
2263     ucnv_close(cnv);
2264 }
2265 
2266 static void
TestUTF32BE()2267 TestUTF32BE() {
2268     /* test input */
2269     static const uint8_t in[]={
2270         0x00, 0x00, 0x00, 0x61,
2271         0x00, 0x00, 0x30, 0x61,
2272         0x00, 0x00, 0xdc, 0x00,
2273         0x00, 0x00, 0xd8, 0x00,
2274         0x00, 0x00, 0xdf, 0xff,
2275         0x00, 0x00, 0xff, 0xfe,
2276         0x00, 0x10, 0xab, 0xcd,
2277         0x00, 0x10, 0xff, 0xff
2278     };
2279 
2280     /* expected test results */
2281     static const int32_t results[]={
2282         /* number of bytes read, code point */
2283         4, 0x61,
2284         4, 0x3061,
2285         4, 0xfffd,
2286         4, 0xfffd,
2287         4, 0xfffd,
2288         4, 0xfffe,
2289         4, 0x10abcd,
2290         4, 0x10ffff
2291     };
2292 
2293     /* error test input */
2294     static const uint8_t in2[]={
2295         0x00, 0x00, 0x00, 0x61,
2296         0x00, 0x11, 0x00, 0x00,         /* 0x110000 out of range */
2297         0x00, 0x00, 0x00, 0x62,
2298         0xff, 0xff, 0xff, 0xff,         /* 0xffffffff out of range */
2299         0x7f, 0xff, 0xff, 0xff,         /* 0x7fffffff out of range */
2300         0x00, 0x00, 0x01, 0x62,
2301         0x00, 0x00, 0x02, 0x62
2302     };
2303 
2304     /* expected error test results */
2305     static const int32_t results2[]={
2306         /* number of bytes read, code point */
2307         4,  0x61,
2308         8,  0x62,
2309         12, 0x162,
2310         4,  0x262
2311     };
2312 
2313     UConverterToUCallback cb;
2314     const void *p;
2315 
2316     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2317     UErrorCode errorCode=U_ZERO_ERROR;
2318     UConverter *cnv=ucnv_open("UTF-32BE", &errorCode);
2319     if(U_FAILURE(errorCode)) {
2320         log_data_err("Unable to open a UTF-32BE converter: %s\n", u_errorName(errorCode));
2321         return;
2322     }
2323     TestNextUChar(cnv, source, limit, results, "UTF-32BE");
2324 
2325     /* Test the condition when source >= sourceLimit */
2326     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2327 
2328     /* test error behavior with a skip callback */
2329     ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2330     source=(const char *)in2;
2331     limit=(const char *)(in2+sizeof(in2));
2332     TestNextUChar(cnv, source, limit, results2, "UTF-32BE");
2333 
2334     ucnv_close(cnv);
2335 }
2336 
2337 static void
TestUTF32LE()2338 TestUTF32LE() {
2339     /* test input */
2340     static const uint8_t in[]={
2341         0x61, 0x00, 0x00, 0x00,
2342         0x61, 0x30, 0x00, 0x00,
2343         0x00, 0xdc, 0x00, 0x00,
2344         0x00, 0xd8, 0x00, 0x00,
2345         0xff, 0xdf, 0x00, 0x00,
2346         0xfe, 0xff, 0x00, 0x00,
2347         0xcd, 0xab, 0x10, 0x00,
2348         0xff, 0xff, 0x10, 0x00
2349     };
2350 
2351     /* expected test results */
2352     static const int32_t results[]={
2353         /* number of bytes read, code point */
2354         4, 0x61,
2355         4, 0x3061,
2356         4, 0xfffd,
2357         4, 0xfffd,
2358         4, 0xfffd,
2359         4, 0xfffe,
2360         4, 0x10abcd,
2361         4, 0x10ffff
2362     };
2363 
2364     /* error test input */
2365     static const uint8_t in2[]={
2366         0x61, 0x00, 0x00, 0x00,
2367         0x00, 0x00, 0x11, 0x00,         /* 0x110000 out of range */
2368         0x62, 0x00, 0x00, 0x00,
2369         0xff, 0xff, 0xff, 0xff,         /* 0xffffffff out of range */
2370         0xff, 0xff, 0xff, 0x7f,         /* 0x7fffffff out of range */
2371         0x62, 0x01, 0x00, 0x00,
2372         0x62, 0x02, 0x00, 0x00,
2373     };
2374 
2375     /* expected error test results */
2376     static const int32_t results2[]={
2377         /* number of bytes read, code point */
2378         4,  0x61,
2379         8,  0x62,
2380         12, 0x162,
2381         4,  0x262,
2382     };
2383 
2384     UConverterToUCallback cb;
2385     const void *p;
2386 
2387     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2388     UErrorCode errorCode=U_ZERO_ERROR;
2389     UConverter *cnv=ucnv_open("UTF-32LE", &errorCode);
2390     if(U_FAILURE(errorCode)) {
2391         log_data_err("Unable to open a UTF-32LE converter: %s\n", u_errorName(errorCode));
2392         return;
2393     }
2394     TestNextUChar(cnv, source, limit, results, "UTF-32LE");
2395 
2396     /* Test the condition when source >= sourceLimit */
2397     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2398 
2399     /* test error behavior with a skip callback */
2400     ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2401     source=(const char *)in2;
2402     limit=(const char *)(in2+sizeof(in2));
2403     TestNextUChar(cnv, source, limit, results2, "UTF-32LE");
2404 
2405     ucnv_close(cnv);
2406 }
2407 
2408 static void
TestLATIN1()2409 TestLATIN1() {
2410     /* test input */
2411     static const uint8_t in[]={
2412        0x61,
2413        0x31,
2414        0x32,
2415        0xc0,
2416        0xf0,
2417        0xf4,
2418     };
2419 
2420     /* expected test results */
2421     static const int32_t results[]={
2422         /* number of bytes read, code point */
2423         1, 0x61,
2424         1, 0x31,
2425         1, 0x32,
2426         1, 0xc0,
2427         1, 0xf0,
2428         1, 0xf4,
2429     };
2430     static const uint16_t in1[] = {
2431         0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
2432         0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
2433         0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
2434         0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
2435         0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
2436         0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
2437         0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
2438         0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
2439         0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
2440         0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
2441         0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
2442         0xcb, 0x82
2443     };
2444     static const uint8_t out1[] = {
2445         0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
2446         0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
2447         0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
2448         0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
2449         0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
2450         0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
2451         0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
2452         0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
2453         0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
2454         0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
2455         0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
2456         0xcb, 0x82
2457     };
2458     static const uint16_t in2[]={
2459         0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2460         0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2461         0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2462         0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
2463         0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
2464         0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
2465         0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
2466         0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
2467         0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
2468         0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
2469         0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
2470         0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
2471         0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
2472         0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
2473         0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2474         0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2475         0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
2476         0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
2477         0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2478         0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2479         0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
2480         0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
2481         0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
2482         0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
2483         0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
2484         0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
2485         0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
2486         0x37, 0x20, 0x2A, 0x2F,
2487     };
2488     static const unsigned char out2[]={
2489         0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2490         0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2491         0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2492         0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
2493         0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
2494         0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
2495         0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
2496         0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
2497         0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
2498         0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
2499         0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
2500         0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
2501         0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
2502         0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
2503         0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2504         0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2505         0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
2506         0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
2507         0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2508         0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2509         0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
2510         0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
2511         0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
2512         0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
2513         0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
2514         0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
2515         0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
2516         0x37, 0x20, 0x2A, 0x2F,
2517     };
2518     const char *source=(const char *)in;
2519     const char *limit=(const char *)in+sizeof(in);
2520 
2521     UErrorCode errorCode=U_ZERO_ERROR;
2522     UConverter *cnv=ucnv_open("LATIN_1", &errorCode);
2523     if(U_FAILURE(errorCode)) {
2524         log_data_err("Unable to open a LATIN_1 converter: %s\n", u_errorName(errorCode));
2525         return;
2526     }
2527     TestNextUChar(cnv, source, limit, results, "LATIN_1");
2528     /* Test the condition when source >= sourceLimit */
2529     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2530     TestConv((uint16_t*)in1,sizeof(in1)/2,"LATIN_1","LATIN-1",(char*)out1,sizeof(out1));
2531     TestConv((uint16_t*)in2,sizeof(in2)/2,"ASCII","ASCII",(char*)out2,sizeof(out2));
2532 
2533     ucnv_close(cnv);
2534 }
2535 
2536 static void
TestSBCS()2537 TestSBCS() {
2538     /* test input */
2539     static const uint8_t in[]={ 0x61, 0xc0, 0x80, 0xe0, 0xf0, 0xf4};
2540     /* expected test results */
2541     static const int32_t results[]={
2542         /* number of bytes read, code point */
2543         1, 0x61,
2544         1, 0xbf,
2545         1, 0xc4,
2546         1, 0x2021,
2547         1, 0xf8ff,
2548         1, 0x00d9
2549     };
2550 
2551     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2552     UErrorCode errorCode=U_ZERO_ERROR;
2553     UConverter *cnv=ucnv_open("x-mac-turkish", &errorCode);
2554     if(U_FAILURE(errorCode)) {
2555         log_data_err("Unable to open a SBCS(x-mac-turkish) converter: %s\n", u_errorName(errorCode));
2556         return;
2557     }
2558     TestNextUChar(cnv, source, limit, results, "SBCS(x-mac-turkish)");
2559     /* Test the condition when source >= sourceLimit */
2560     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2561     /*Test for Illegal character */ /*
2562     {
2563     static const uint8_t input1[]={ 0xA1 };
2564     const char* illegalsource=(const char*)input1;
2565     TestNextUCharError(cnv, illegalsource, illegalsource+sizeof(illegalsource), U_INVALID_CHAR_FOUND, "source has a illegal characte");
2566     }
2567    */
2568     ucnv_close(cnv);
2569 }
2570 
2571 static void
TestDBCS()2572 TestDBCS() {
2573     /* test input */
2574     static const uint8_t in[]={
2575         0x44, 0x6a,
2576         0xc4, 0x9c,
2577         0x7a, 0x74,
2578         0x46, 0xab,
2579         0x42, 0x5b,
2580 
2581     };
2582 
2583     /* expected test results */
2584     static const int32_t results[]={
2585         /* number of bytes read, code point */
2586         2, 0x00a7,
2587         2, 0xe1d2,
2588         2, 0x6962,
2589         2, 0xf842,
2590         2, 0xffe5,
2591     };
2592 
2593     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2594     UErrorCode errorCode=U_ZERO_ERROR;
2595 
2596     UConverter *cnv=my_ucnv_open("@ibm9027", &errorCode);
2597     if(U_FAILURE(errorCode)) {
2598         log_data_err("Unable to open a DBCS(@ibm9027) converter: %s\n", u_errorName(errorCode));
2599         return;
2600     }
2601     TestNextUChar(cnv, source, limit, results, "DBCS(@ibm9027)");
2602     /* Test the condition when source >= sourceLimit */
2603     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2604     /*Test for the condition where there is an invalid character*/
2605     {
2606         static const uint8_t source2[]={0x1a, 0x1b};
2607         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character");
2608     }
2609     /*Test for the condition where we have a truncated char*/
2610     {
2611         static const uint8_t source1[]={0xc4};
2612         ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2613         TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2614     }
2615     ucnv_close(cnv);
2616 }
2617 
2618 static void
TestMBCS()2619 TestMBCS() {
2620     /* test input */
2621     static const uint8_t in[]={
2622         0x01,
2623         0xa6, 0xa3,
2624         0x00,
2625         0xa6, 0xa1,
2626         0x08,
2627         0xc2, 0x76,
2628         0xc2, 0x78,
2629 
2630     };
2631 
2632     /* expected test results */
2633     static const int32_t results[]={
2634         /* number of bytes read, code point */
2635         1, 0x0001,
2636         2, 0x250c,
2637         1, 0x0000,
2638         2, 0x2500,
2639         1, 0x0008,
2640         2, 0xd60c,
2641         2, 0xd60e,
2642     };
2643 
2644     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2645     UErrorCode errorCode=U_ZERO_ERROR;
2646 
2647     UConverter *cnv=ucnv_open("ibm-1363", &errorCode);
2648     if(U_FAILURE(errorCode)) {
2649         log_data_err("Unable to open a MBCS(ibm-1363) converter: %s\n", u_errorName(errorCode));
2650         return;
2651     }
2652     TestNextUChar(cnv, source, limit, results, "MBCS(ibm-1363)");
2653     /* Test the condition when source >= sourceLimit */
2654     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2655     /*Test for the condition where there is an invalid character*/
2656     {
2657         static const uint8_t source2[]={0xa1, 0x80};
2658         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character");
2659     }
2660     /*Test for the condition where we have a truncated char*/
2661     {
2662         static const uint8_t source1[]={0xc4};
2663         ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2664         TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2665     }
2666     ucnv_close(cnv);
2667 
2668 }
2669 
2670 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
2671 static void
TestICCRunout()2672 TestICCRunout() {
2673 /*    { "ibm-1363", :bin{ a2aea2 }, "\u00a1\u001a", :intvector{ 0, 2 }, :int{1}, :int{0}, "\", "?", :bin{""} } */
2674 
2675     const char *cnvName = "ibm-1363";
2676     UErrorCode status = U_ZERO_ERROR;
2677     const char sourceData[] = { (char)0xa2, (char)0xae, (char)0xa2 };
2678     /* UChar   expectUData[] = { 0x00a1, 0x001a }; */
2679     const char *source = sourceData;
2680     const char *sourceLim = sourceData+sizeof(sourceData);
2681     UChar c1, c2, c3;
2682     UConverter *cnv=ucnv_open(cnvName, &status);
2683     if(U_FAILURE(status)) {
2684         log_data_err("Unable to open %s converter: %s\n", cnvName, u_errorName(status));
2685 	return;
2686     }
2687 
2688 #if 0
2689     {
2690     UChar   targetBuf[256];
2691     UChar   *target = targetBuf;
2692     UChar   *targetLim = target+256;
2693     ucnv_toUnicode(cnv, &target, targetLim, &source, sourceLim, NULL, TRUE, &status);
2694 
2695     log_info("After convert: target@%d, source@%d, status%s\n",
2696 	     target-targetBuf, source-sourceData, u_errorName(status));
2697 
2698     if(U_FAILURE(status)) {
2699 	log_err("Failed to convert: %s\n", u_errorName(status));
2700     } else {
2701 
2702     }
2703     }
2704 #endif
2705 
2706     c1=ucnv_getNextUChar(cnv, &source, sourceLim, &status);
2707     log_verbose("c1: U+%04X, source@%d, status %s\n", c1, source-sourceData, u_errorName(status));
2708 
2709     c2=ucnv_getNextUChar(cnv, &source, sourceLim, &status);
2710     log_verbose("c2: U+%04X, source@%d, status %s\n", c2, source-sourceData, u_errorName(status));
2711 
2712     c3=ucnv_getNextUChar(cnv, &source, sourceLim, &status);
2713     log_verbose("c3: U+%04X, source@%d, status %s\n", c3, source-sourceData, u_errorName(status));
2714 
2715     if(status==U_INDEX_OUTOFBOUNDS_ERROR && c3==0xFFFF) {
2716 	log_verbose("OK\n");
2717     } else {
2718 	log_err("FAIL: c3 was not FFFF or err was not U_INDEXOUTOFBOUNDS_ERROR\n");
2719     }
2720 
2721     ucnv_close(cnv);
2722 
2723 }
2724 #endif
2725 
2726 #ifdef U_ENABLE_GENERIC_ISO_2022
2727 
2728 static void
TestISO_2022()2729 TestISO_2022() {
2730     /* test input */
2731     static const uint8_t in[]={
2732         0x1b, 0x25, 0x42,
2733         0x31,
2734         0x32,
2735         0x61,
2736         0xc2, 0x80,
2737         0xe0, 0xa0, 0x80,
2738         0xf0, 0x90, 0x80, 0x80
2739     };
2740 
2741 
2742 
2743     /* expected test results */
2744     static const int32_t results[]={
2745         /* number of bytes read, code point */
2746         4, 0x0031,  /* 4 bytes including the escape sequence */
2747         1, 0x0032,
2748         1, 0x61,
2749         2, 0x80,
2750         3, 0x800,
2751         4, 0x10000
2752     };
2753 
2754     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2755     UErrorCode errorCode=U_ZERO_ERROR;
2756     UConverter *cnv;
2757 
2758     cnv=ucnv_open("ISO_2022", &errorCode);
2759     if(U_FAILURE(errorCode)) {
2760         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
2761         return;
2762     }
2763     TestNextUChar(cnv, source, limit, results, "ISO_2022");
2764 
2765     /* Test the condition when source >= sourceLimit */
2766     TestNextUCharError(cnv, source, source-1, U_ILLEGAL_ARGUMENT_ERROR, "sourceLimit < source");
2767     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2768     /*Test for the condition where we have a truncated char*/
2769     {
2770         static const uint8_t source1[]={0xc4};
2771         ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2772         TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2773     }
2774     /*Test for the condition where there is an invalid character*/
2775     {
2776         static const uint8_t source2[]={0xa1, 0x01};
2777         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_CHAR_FOUND, "an invalid character");
2778     }
2779     ucnv_close(cnv);
2780 }
2781 
2782 #endif
2783 
2784 static void
TestSmallTargetBuffer(const uint16_t * source,const UChar * sourceLimit,UConverter * cnv)2785 TestSmallTargetBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2786     const UChar* uSource;
2787     const UChar* uSourceLimit;
2788     const char* cSource;
2789     const char* cSourceLimit;
2790     UChar *uTargetLimit =NULL;
2791     UChar *uTarget;
2792     char *cTarget;
2793     const char *cTargetLimit;
2794     char *cBuf;
2795     UChar *uBuf; /*,*test;*/
2796     int32_t uBufSize = 120;
2797     int len=0;
2798     int i=2;
2799     UErrorCode errorCode=U_ZERO_ERROR;
2800     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2801     cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
2802     ucnv_reset(cnv);
2803     for(;--i>0; ){
2804         uSource = (UChar*) source;
2805         uSourceLimit=(const UChar*)sourceLimit;
2806         cTarget = cBuf;
2807         uTarget = uBuf;
2808         cSource = cBuf;
2809         cTargetLimit = cBuf;
2810         uTargetLimit = uBuf;
2811 
2812         do{
2813 
2814             cTargetLimit = cTargetLimit+ i;
2815             ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode);
2816             if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2817                errorCode=U_ZERO_ERROR;
2818                 continue;
2819             }
2820 
2821             if(U_FAILURE(errorCode)){
2822                 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2823                 return;
2824             }
2825 
2826         }while (uSource<uSourceLimit);
2827 
2828         cSourceLimit =cTarget;
2829         do{
2830             uTargetLimit=uTargetLimit+i;
2831             ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode);
2832             if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2833                errorCode=U_ZERO_ERROR;
2834                 continue;
2835             }
2836             if(U_FAILURE(errorCode)){
2837                    log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2838                     return;
2839             }
2840         }while(cSource<cSourceLimit);
2841 
2842         uSource = source;
2843         /*test =uBuf;*/
2844         for(len=0;len<(int)(source - sourceLimit);len++){
2845             if(uBuf[len]!=uSource[len]){
2846                 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ;
2847             }
2848         }
2849     }
2850     free(uBuf);
2851     free(cBuf);
2852 }
2853 /* Test for Jitterbug 778 */
TestToAndFromUChars(const uint16_t * source,const UChar * sourceLimit,UConverter * cnv)2854 static void TestToAndFromUChars(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2855     const UChar* uSource;
2856     const UChar* uSourceLimit;
2857     const char* cSource;
2858     UChar *uTargetLimit =NULL;
2859     UChar *uTarget;
2860     char *cTarget;
2861     const char *cTargetLimit;
2862     char *cBuf;
2863     UChar *uBuf,*test;
2864     int32_t uBufSize = 120;
2865     int numCharsInTarget=0;
2866     UErrorCode errorCode=U_ZERO_ERROR;
2867     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2868     cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
2869     uSource = source;
2870     uSourceLimit=sourceLimit;
2871     cTarget = cBuf;
2872     cTargetLimit = cBuf +uBufSize*5;
2873     uTarget = uBuf;
2874     uTargetLimit = uBuf+ uBufSize*5;
2875     ucnv_reset(cnv);
2876     numCharsInTarget=ucnv_fromUChars(cnv, cTarget, (int32_t)(cTargetLimit-cTarget), uSource, (int32_t)(uSourceLimit-uSource), &errorCode);
2877     if(U_FAILURE(errorCode)){
2878         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
2879         return;
2880     }
2881     cSource = cBuf;
2882     test =uBuf;
2883     ucnv_toUChars(cnv,uTarget,(int32_t)(uTargetLimit-uTarget),cSource,numCharsInTarget,&errorCode);
2884     if(U_FAILURE(errorCode)){
2885         log_err("ucnv_toUChars conversion failed, reason %s\n", u_errorName(errorCode));
2886         return;
2887     }
2888     uSource = source;
2889     while(uSource<uSourceLimit){
2890         if(*test!=*uSource){
2891 
2892             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
2893         }
2894         uSource++;
2895         test++;
2896     }
2897     free(uBuf);
2898     free(cBuf);
2899 }
2900 
TestSmallSourceBuffer(const uint16_t * source,const UChar * sourceLimit,UConverter * cnv)2901 static void TestSmallSourceBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2902     const UChar* uSource;
2903     const UChar* uSourceLimit;
2904     const char* cSource;
2905     const char* cSourceLimit;
2906     UChar *uTargetLimit =NULL;
2907     UChar *uTarget;
2908     char *cTarget;
2909     const char *cTargetLimit;
2910     char *cBuf;
2911     UChar *uBuf; /*,*test;*/
2912     int32_t uBufSize = 120;
2913     int len=0;
2914     int i=2;
2915     const UChar *temp = sourceLimit;
2916     UErrorCode errorCode=U_ZERO_ERROR;
2917     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2918     cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
2919 
2920     ucnv_reset(cnv);
2921     for(;--i>0;){
2922         uSource = (UChar*) source;
2923         cTarget = cBuf;
2924         uTarget = uBuf;
2925         cSource = cBuf;
2926         cTargetLimit = cBuf;
2927         uTargetLimit = uBuf+uBufSize*5;
2928         cTargetLimit = cTargetLimit+uBufSize*10;
2929         uSourceLimit=uSource;
2930         do{
2931 
2932             if (uSourceLimit < sourceLimit) {
2933                 uSourceLimit = uSourceLimit+1;
2934             }
2935             ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode);
2936             if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2937                errorCode=U_ZERO_ERROR;
2938                 continue;
2939             }
2940 
2941             if(U_FAILURE(errorCode)){
2942                 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2943                 return;
2944             }
2945 
2946         }while (uSource<temp);
2947 
2948         cSourceLimit =cBuf;
2949         do{
2950             if (cSourceLimit < cBuf + (cTarget - cBuf)) {
2951                 cSourceLimit = cSourceLimit+1;
2952             }
2953             ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode);
2954             if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2955                errorCode=U_ZERO_ERROR;
2956                 continue;
2957             }
2958             if(U_FAILURE(errorCode)){
2959                    log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2960                     return;
2961             }
2962         }while(cSource<cTarget);
2963 
2964         uSource = source;
2965         /*test =uBuf;*/
2966         for(;len<(int)(source - sourceLimit);len++){
2967             if(uBuf[len]!=uSource[len]){
2968                 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ;
2969             }
2970         }
2971     }
2972     free(uBuf);
2973     free(cBuf);
2974 }
2975 static void
TestGetNextUChar2022(UConverter * cnv,const char * source,const char * limit,const uint16_t results[],const char * message)2976 TestGetNextUChar2022(UConverter* cnv, const char* source, const char* limit,
2977                      const uint16_t results[], const char* message){
2978 /*     const char* s0; */
2979      const char* s=(char*)source;
2980      const uint16_t *r=results;
2981      UErrorCode errorCode=U_ZERO_ERROR;
2982      uint32_t c,exC;
2983      ucnv_reset(cnv);
2984      while(s<limit) {
2985 	 /* s0=s; */
2986         c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
2987         if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
2988             break; /* no more significant input */
2989         } else if(U_FAILURE(errorCode)) {
2990             log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode));
2991             break;
2992         } else {
2993             if(U16_IS_LEAD(*r)){
2994                 int i =0, len = 2;
2995                 U16_NEXT(r, i, len, exC);
2996                 r++;
2997             }else{
2998                 exC = *r;
2999             }
3000             if(c!=(uint32_t)(exC))
3001                 log_err("%s ucnv_getNextUChar() Expected:  \\u%04X Got:  \\u%04X \n",message,(uint32_t) (*r),c);
3002         }
3003         r++;
3004     }
3005 }
3006 
TestJitterbug930(const char * enc)3007 static int TestJitterbug930(const char* enc){
3008     UErrorCode err = U_ZERO_ERROR;
3009     UConverter*converter;
3010     char out[80];
3011     char*target = out;
3012     UChar in[4];
3013     const UChar*source = in;
3014     int32_t off[80];
3015     int32_t* offsets = off;
3016     int numOffWritten=0;
3017     UBool flush = 0;
3018     converter = my_ucnv_open(enc, &err);
3019 
3020     in[0] = 0x41;     /* 0x4E00;*/
3021     in[1] = 0x4E01;
3022     in[2] = 0x4E02;
3023     in[3] = 0x4E03;
3024 
3025     memset(off, '*', sizeof(off));
3026 
3027     ucnv_fromUnicode (converter,
3028             &target,
3029             target+2,
3030             &source,
3031             source+3,
3032             offsets,
3033             flush,
3034             &err);
3035 
3036         /* writes three bytes into the output buffer: 41 1B 24
3037         * but offsets contains 0 1 1
3038     */
3039     while(*offsets< off[10]){
3040         numOffWritten++;
3041         offsets++;
3042     }
3043     log_verbose("Testing Jitterbug 930 for encoding %s",enc);
3044     if(numOffWritten!= (int)(target-out)){
3045         log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc, (int)(target-out),numOffWritten);
3046     }
3047 
3048     err = U_ZERO_ERROR;
3049 
3050     memset(off,'*' , sizeof(off));
3051 
3052     flush = 1;
3053     offsets=off;
3054     ucnv_fromUnicode (converter,
3055             &target,
3056             target+4,
3057             &source,
3058             source,
3059             offsets,
3060             flush,
3061             &err);
3062     numOffWritten=0;
3063     while(*offsets< off[10]){
3064         numOffWritten++;
3065         if(*offsets!= -1){
3066             log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc,-1,*offsets) ;
3067         }
3068         offsets++;
3069     }
3070 
3071     /* writes 42 43 7A into output buffer,
3072      * offsets contains -1 -1 -1
3073      */
3074     ucnv_close(converter);
3075     return 0;
3076 }
3077 
3078 static void
TestHZ()3079 TestHZ() {
3080     /* test input */
3081     static const uint16_t in[]={
3082             0x3000, 0x3001, 0x3002, 0x00B7, 0x02C9, 0x02C7, 0x00A8, 0x3003, 0x3005, 0x2014,
3083             0xFF5E, 0x2016, 0x2026, 0x007E, 0x997C, 0x70B3, 0x75C5, 0x5E76, 0x73BB, 0x83E0,
3084             0x64AD, 0x62E8, 0x94B5, 0x000A, 0x6CE2, 0x535A, 0x52C3, 0x640F, 0x94C2, 0x7B94,
3085             0x4F2F, 0x5E1B, 0x8236, 0x000A, 0x8116, 0x818A, 0x6E24, 0x6CCA, 0x9A73, 0x6355,
3086             0x535C, 0x54FA, 0x8865, 0x000A, 0x57E0, 0x4E0D, 0x5E03, 0x6B65, 0x7C3F, 0x90E8,
3087             0x6016, 0x248F, 0x2490, 0x000A, 0x2491, 0x2492, 0x2493, 0x2494, 0x2495, 0x2496,
3088             0x2497, 0x2498, 0x2499, 0x000A, 0x249A, 0x249B, 0x2474, 0x2475, 0x2476, 0x2477,
3089             0x2478, 0x2479, 0x247A, 0x000A, 0x247B, 0x247C, 0x247D, 0x247E, 0x247F, 0x2480,
3090             0x2481, 0x2482, 0x2483, 0x000A, 0x0041, 0x0043, 0x0044, 0x0045, 0x0046, 0x007E,
3091             0x0048, 0x0049, 0x004A, 0x000A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050,
3092             0x0051, 0x0052, 0x0053, 0x000A, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059,
3093             0x005A, 0x005B, 0x005C, 0x000A
3094       };
3095     const UChar* uSource;
3096     const UChar* uSourceLimit;
3097     const char* cSource;
3098     const char* cSourceLimit;
3099     UChar *uTargetLimit =NULL;
3100     UChar *uTarget;
3101     char *cTarget;
3102     const char *cTargetLimit;
3103     char *cBuf;
3104     UChar *uBuf,*test;
3105     int32_t uBufSize = 120;
3106     UErrorCode errorCode=U_ZERO_ERROR;
3107     UConverter *cnv;
3108     int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3109     int32_t* myOff= offsets;
3110     cnv=ucnv_open("HZ", &errorCode);
3111     if(U_FAILURE(errorCode)) {
3112         log_data_err("Unable to open HZ converter: %s\n", u_errorName(errorCode));
3113         return;
3114     }
3115 
3116     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3117     cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3118     uSource = (const UChar*)in;
3119     uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
3120     cTarget = cBuf;
3121     cTargetLimit = cBuf +uBufSize*5;
3122     uTarget = uBuf;
3123     uTargetLimit = uBuf+ uBufSize*5;
3124     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3125     if(U_FAILURE(errorCode)){
3126         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3127         return;
3128     }
3129     cSource = cBuf;
3130     cSourceLimit =cTarget;
3131     test =uBuf;
3132     myOff=offsets;
3133     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3134     if(U_FAILURE(errorCode)){
3135         log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3136         return;
3137     }
3138     uSource = (const UChar*)in;
3139     while(uSource<uSourceLimit){
3140         if(*test!=*uSource){
3141 
3142             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3143         }
3144         uSource++;
3145         test++;
3146     }
3147     TestGetNextUChar2022(cnv, cBuf, cTarget, in, "HZ encoding");
3148     TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3149     TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3150     TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3151     TestJitterbug930("csISO2022JP");
3152     ucnv_close(cnv);
3153     free(offsets);
3154     free(uBuf);
3155     free(cBuf);
3156 }
3157 
3158 static void
TestISCII()3159 TestISCII(){
3160         /* test input */
3161     static const uint16_t in[]={
3162         /* test full range of Devanagari */
3163         0x0901,0x0902,0x0903,0x0905,0x0906,0x0907,0x0908,0x0909,0x090A,
3164         0x090B,0x090E,0x090F,0x0910,0x090D,0x0912,0x0913,0x0914,0x0911,
3165         0x0915,0x0916,0x0917,0x0918,0x0919,0x091A,0x091B,0x091C,0x091D,
3166         0x091E,0x091F,0x0920,0x0921,0x0922,0x0923,0x0924,0x0925,0x0926,
3167         0x0927,0x0928,0x0929,0x092A,0x092B,0x092C,0x092D,0x092E,0x092F,
3168         0x095F,0x0930,0x0931,0x0932,0x0933,0x0934,0x0935,0x0936,0x0937,
3169         0x0938,0x0939,0x200D,0x093E,0x093F,0x0940,0x0941,0x0942,0x0943,
3170         0x0946,0x0947,0x0948,0x0945,0x094A,0x094B,0x094C,0x0949,0x094D,
3171         0x093d,0x0966,0x0967,0x0968,0x0969,0x096A,0x096B,0x096C,
3172         0x096D,0x096E,0x096F,
3173         /* test Soft halant*/
3174         0x0915,0x094d, 0x200D,
3175         /* test explicit halant */
3176         0x0915,0x094d, 0x200c,
3177         /* test double danda */
3178         0x965,
3179         /* test ASCII */
3180         0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
3181         0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
3182         0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
3183         /* tests from Lotus */
3184         0x0061,0x0915,0x000D,0x000A,0x0996,0x0043,
3185         0x0930,0x094D,0x200D,
3186         0x0901,0x000D,0x000A,0x0905,0x0985,0x0043,
3187         0x0915,0x0921,0x002B,0x095F,
3188         /* tamil range */
3189         0x0B86, 0xB87, 0xB88,
3190         /* telugu range */
3191         0x0C05, 0x0C02, 0x0C03,0x0c31,
3192         /* kannada range */
3193         0x0C85, 0xC82, 0x0C83,
3194         /* test Abbr sign and Anudatta */
3195         0x0970, 0x952,
3196        /* 0x0958,
3197         0x0959,
3198         0x095A,
3199         0x095B,
3200         0x095C,
3201         0x095D,
3202         0x095E,
3203         0x095F,*/
3204         0x0960 /* Vocallic RRI 0xAB, 0xE9*/,
3205         0x0944 /* Vowel Sign Vocallic RRI 0xDF, 0xE9 */,
3206         0x090C ,
3207         0x0962,
3208         0x0961 /* Vocallic LL 0xa6, 0xE9 */,
3209         0x0963 /* Vowel Sign Vocallic LL 0xdb, 0xE9, */,
3210         0x0950 /* OM Symbol 0xa1, 0xE9,*/,
3211         0x093D /* Avagraha  0xEA, 0xE9*/,
3212         0x0958,
3213         0x0959,
3214         0x095A,
3215         0x095B,
3216         0x095C,
3217         0x095D,
3218         0x095E,
3219         0x0020, 0x094D, 0x0930, 0x0000, 0x00A0
3220       };
3221     static const unsigned char byteArr[]={
3222 
3223         0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,
3224         0xaa,0xab,0xac,0xad,0xae,0xaf,0xb0,0xb1,0xb2,
3225         0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,
3226         0xbc,0xbd,0xbe,0xbf,0xc0,0xc1,0xc2,0xc3,0xc4,
3227         0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,
3228         0xce,0xcf,0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,
3229         0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,
3230         0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,
3231         0xea,0xe9,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,
3232         0xf8,0xf9,0xfa,
3233         /* test soft halant */
3234         0xb3, 0xE8, 0xE9,
3235         /* test explicit halant */
3236         0xb3, 0xE8, 0xE8,
3237         /* test double danda */
3238         0xea, 0xea,
3239         /* test ASCII */
3240         0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
3241         0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
3242         0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
3243         /* test ATR code */
3244 
3245         /* tests from Lotus */
3246         0x61,0xEF,0x42,0xEF,0x30,0xB3,0x0D,0x0A,0xEF,0x43,0xB4,0x43,
3247         0xEF,0x42,0xCF,0xE8,0xD9,
3248         0xEF,0x42,0xA1,0x0D,0x0A,0xEF,0x42,0xA4,0xEF,0x43,0xA4,0x43,
3249         0xEF,0x42,0xB3,0xBF,0x2B,0xEF,0x42,0xCE,
3250         /* tamil range */
3251         0xEF, 0x44, 0xa5, 0xa6, 0xa7,
3252         /* telugu range */
3253         0xEF, 0x45,0xa4, 0xa2, 0xa3,0xd0,
3254         /* kannada range */
3255         0xEF, 0x48,0xa4, 0xa2, 0xa3,
3256         /* anudatta and abbreviation sign */
3257         0xEF, 0x42, 0xF0, 0xBF, 0xF0, 0xB8,
3258 
3259 
3260         0xAA, 0xE9,/* RI + NUKTA 0x0960*/
3261 
3262         0xDF, 0xE9,/* Vowel sign RI + NUKTA 0x0944*/
3263 
3264         0xa6, 0xE9,/* Vowel I + NUKTA 0x090C*/
3265 
3266         0xdb, 0xE9,/* Vowel sign I + Nukta 0x0962*/
3267 
3268         0xa7, 0xE9,/* Vowel II + NUKTA 0x0961*/
3269 
3270         0xdc, 0xE9,/* Vowel sign II + Nukta 0x0963*/
3271 
3272         0xa1, 0xE9,/* chandrabindu + Nukta 0x0950*/
3273 
3274         0xEA, 0xE9, /* Danda + Nukta 0x093D*/
3275 
3276         0xB3, 0xE9, /* Ka + NUKTA */
3277 
3278         0xB4, 0xE9, /* Kha + NUKTA */
3279 
3280         0xB5, 0xE9, /* Ga + NUKTA */
3281 
3282         0xBA, 0xE9,
3283 
3284         0xBF, 0xE9,
3285 
3286         0xC0, 0xE9,
3287 
3288         0xC9, 0xE9,
3289         /* INV halant RA    */
3290         0xD9, 0xE8, 0xCF,
3291         0x00, 0x00A0,
3292         /* just consume unhandled codepoints */
3293         0xEF, 0x30,
3294 
3295     };
3296     testConvertToU(byteArr,(sizeof(byteArr)),in,UPRV_LENGTHOF(in),"x-iscii-de",NULL,TRUE);
3297     TestConv(in,(sizeof(in)/2),"ISCII,version=0","hindi", (char *)byteArr,sizeof(byteArr));
3298 
3299 }
3300 
3301 static void
TestISO_2022_JP()3302 TestISO_2022_JP() {
3303     /* test input */
3304     static const uint16_t in[]={
3305         0x0041,/*0x00E9,*/0x3000, 0x3001, 0x3002, 0x0020, 0x000D, 0x000A,
3306         0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3307         0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3308         0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
3309         0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
3310         0x201D, 0x3014, 0x000D, 0x000A,
3311         0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3312         0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3313         };
3314     const UChar* uSource;
3315     const UChar* uSourceLimit;
3316     const char* cSource;
3317     const char* cSourceLimit;
3318     UChar *uTargetLimit =NULL;
3319     UChar *uTarget;
3320     char *cTarget;
3321     const char *cTargetLimit;
3322     char *cBuf;
3323     UChar *uBuf,*test;
3324     int32_t uBufSize = 120;
3325     UErrorCode errorCode=U_ZERO_ERROR;
3326     UConverter *cnv;
3327     int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3328     int32_t* myOff= offsets;
3329     cnv=ucnv_open("ISO_2022_JP_1", &errorCode);
3330     if(U_FAILURE(errorCode)) {
3331         log_data_err("Unable to open an ISO_2022_JP_1 converter: %s\n", u_errorName(errorCode));
3332         return;
3333     }
3334 
3335     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3336     cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3337     uSource = (const UChar*)in;
3338     uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
3339     cTarget = cBuf;
3340     cTargetLimit = cBuf +uBufSize*5;
3341     uTarget = uBuf;
3342     uTargetLimit = uBuf+ uBufSize*5;
3343     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3344     if(U_FAILURE(errorCode)){
3345         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3346         return;
3347     }
3348     cSource = cBuf;
3349     cSourceLimit =cTarget;
3350     test =uBuf;
3351     myOff=offsets;
3352     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3353     if(U_FAILURE(errorCode)){
3354         log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3355         return;
3356     }
3357 
3358     uSource = (const UChar*)in;
3359     while(uSource<uSourceLimit){
3360         if(*test!=*uSource){
3361 
3362             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3363         }
3364         uSource++;
3365         test++;
3366     }
3367 
3368     TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3369     TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3370     TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-JP encoding");
3371     TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3372     TestJitterbug930("csISO2022JP");
3373     ucnv_close(cnv);
3374     free(uBuf);
3375     free(cBuf);
3376     free(offsets);
3377 }
3378 
TestConv(const uint16_t in[],int len,const char * conv,const char * lang,char byteArr[],int byteArrLen)3379 static void TestConv(const uint16_t in[],int len, const char* conv, const char* lang, char byteArr[],int byteArrLen){
3380     const UChar* uSource;
3381     const UChar* uSourceLimit;
3382     const char* cSource;
3383     const char* cSourceLimit;
3384     UChar *uTargetLimit =NULL;
3385     UChar *uTarget;
3386     char *cTarget;
3387     const char *cTargetLimit;
3388     char *cBuf;
3389     UChar *uBuf,*test;
3390     int32_t uBufSize = 120*10;
3391     UErrorCode errorCode=U_ZERO_ERROR;
3392     UConverter *cnv;
3393     int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) );
3394     int32_t* myOff= offsets;
3395     cnv=my_ucnv_open(conv, &errorCode);
3396     if(U_FAILURE(errorCode)) {
3397         log_data_err("Unable to open a %s converter: %s\n", conv, u_errorName(errorCode));
3398         return;
3399     }
3400 
3401     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar));
3402     cBuf =(char*)malloc(uBufSize * sizeof(char));
3403     uSource = (const UChar*)in;
3404     uSourceLimit=uSource+len;
3405     cTarget = cBuf;
3406     cTargetLimit = cBuf +uBufSize;
3407     uTarget = uBuf;
3408     uTargetLimit = uBuf+ uBufSize;
3409     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3410     if(U_FAILURE(errorCode)){
3411         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3412         return;
3413     }
3414     /*log_verbose("length of compressed string for language %s using %s:%i \n",conv,lang,(cTarget-cBuf));*/
3415     cSource = cBuf;
3416     cSourceLimit =cTarget;
3417     test =uBuf;
3418     myOff=offsets;
3419     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3420     if(U_FAILURE(errorCode)){
3421         log_err("ucnv_toUnicode conversion failed, reason: %s\n", u_errorName(errorCode));
3422         return;
3423     }
3424 
3425     uSource = (const UChar*)in;
3426     while(uSource<uSourceLimit){
3427         if(*test!=*uSource){
3428             log_err("for codepage %s : Expected : \\u%04X \t Got: \\u%04X\n",conv,*uSource,(int)*test) ;
3429         }
3430         uSource++;
3431         test++;
3432     }
3433     TestSmallTargetBuffer(in,(const UChar*)&in[len],cnv);
3434     TestSmallSourceBuffer(in,(const UChar*)&in[len],cnv);
3435     TestGetNextUChar2022(cnv, cBuf, cTarget, in, conv);
3436     if(byteArr && byteArrLen!=0){
3437         TestGetNextUChar2022(cnv, byteArr, (byteArr+byteArrLen), in, lang);
3438         TestToAndFromUChars(in,(const UChar*)&in[len],cnv);
3439         {
3440             cSource = byteArr;
3441             cSourceLimit = cSource+byteArrLen;
3442             test=uBuf;
3443             myOff = offsets;
3444             ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3445             if(U_FAILURE(errorCode)){
3446                 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3447                 return;
3448             }
3449 
3450             uSource = (const UChar*)in;
3451             while(uSource<uSourceLimit){
3452                 if(*test!=*uSource){
3453                     log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3454                 }
3455                 uSource++;
3456                 test++;
3457             }
3458         }
3459     }
3460 
3461     ucnv_close(cnv);
3462     free(uBuf);
3463     free(cBuf);
3464     free(offsets);
3465 }
3466 static UChar U_CALLCONV
_charAt(int32_t offset,void * context)3467 _charAt(int32_t offset, void *context) {
3468     return ((char*)context)[offset];
3469 }
3470 
3471 static int32_t
unescape(UChar * dst,int32_t dstLen,const char * src,int32_t srcLen,UErrorCode * status)3472 unescape(UChar* dst, int32_t dstLen,const char* src,int32_t srcLen,UErrorCode *status){
3473     int32_t srcIndex=0;
3474     int32_t dstIndex=0;
3475     if(U_FAILURE(*status)){
3476         return 0;
3477     }
3478     if((dst==NULL && dstLen>0) || (src==NULL ) || dstLen < -1 || srcLen <-1 ){
3479         *status = U_ILLEGAL_ARGUMENT_ERROR;
3480         return 0;
3481     }
3482     if(srcLen==-1){
3483         srcLen = (int32_t)uprv_strlen(src);
3484     }
3485 
3486     for (; srcIndex<srcLen; ) {
3487         UChar32 c = src[srcIndex++];
3488         if (c == 0x005C /*'\\'*/) {
3489             c = u_unescapeAt(_charAt,&srcIndex,srcLen,(void*)src); /* advances i*/
3490             if (c == (UChar32)0xFFFFFFFF) {
3491                 *status=U_INVALID_CHAR_FOUND; /* return empty string */
3492                 break; /* invalid escape sequence */
3493             }
3494         }
3495         if(dstIndex < dstLen){
3496             if(c>0xFFFF){
3497                dst[dstIndex++] = U16_LEAD(c);
3498                if(dstIndex<dstLen){
3499                     dst[dstIndex]=U16_TRAIL(c);
3500                }else{
3501                    *status=U_BUFFER_OVERFLOW_ERROR;
3502                }
3503             }else{
3504                 dst[dstIndex]=(UChar)c;
3505             }
3506 
3507         }else{
3508             *status = U_BUFFER_OVERFLOW_ERROR;
3509         }
3510         dstIndex++; /* for preflighting */
3511     }
3512     return dstIndex;
3513 }
3514 
3515 static void
TestFullRoundtrip(const char * cp)3516 TestFullRoundtrip(const char* cp){
3517     UChar usource[10] ={0};
3518     UChar nsrc[10] = {0};
3519     uint32_t i=1;
3520     int len=0, ulen;
3521     nsrc[0]=0x0061;
3522     /* Test codepoint 0 */
3523     TestConv(usource,1,cp,"",NULL,0);
3524     TestConv(usource,2,cp,"",NULL,0);
3525     nsrc[2]=0x5555;
3526     TestConv(nsrc,3,cp,"",NULL,0);
3527 
3528     for(;i<=0x10FFFF;i++){
3529         if(i==0xD800){
3530             i=0xDFFF;
3531             continue;
3532         }
3533         if(i<=0xFFFF){
3534             usource[0] =(UChar) i;
3535             len=1;
3536         }else{
3537             usource[0]=U16_LEAD(i);
3538             usource[1]=U16_TRAIL(i);
3539             len=2;
3540         }
3541         ulen=len;
3542         if(i==0x80) {
3543             usource[2]=0;
3544         }
3545         /* Test only single code points */
3546         TestConv(usource,ulen,cp,"",NULL,0);
3547         /* Test codepoint repeated twice */
3548         usource[ulen]=usource[0];
3549         usource[ulen+1]=usource[1];
3550         ulen+=len;
3551         TestConv(usource,ulen,cp,"",NULL,0);
3552         /* Test codepoint repeated 3 times */
3553         usource[ulen]=usource[0];
3554         usource[ulen+1]=usource[1];
3555         ulen+=len;
3556         TestConv(usource,ulen,cp,"",NULL,0);
3557         /* Test codepoint in between 2 codepoints */
3558         nsrc[1]=usource[0];
3559         nsrc[2]=usource[1];
3560         nsrc[len+1]=0x5555;
3561         TestConv(nsrc,len+2,cp,"",NULL,0);
3562         uprv_memset(usource,0,sizeof(UChar)*10);
3563     }
3564 }
3565 
3566 static void
TestRoundTrippingAllUTF(void)3567 TestRoundTrippingAllUTF(void){
3568     if(!getTestOption(QUICK_OPTION)){
3569         log_verbose("Running exhaustive round trip test for BOCU-1\n");
3570         TestFullRoundtrip("BOCU-1");
3571         log_verbose("Running exhaustive round trip test for SCSU\n");
3572         TestFullRoundtrip("SCSU");
3573         log_verbose("Running exhaustive round trip test for UTF-8\n");
3574         TestFullRoundtrip("UTF-8");
3575         log_verbose("Running exhaustive round trip test for CESU-8\n");
3576         TestFullRoundtrip("CESU-8");
3577         log_verbose("Running exhaustive round trip test for UTF-16BE\n");
3578         TestFullRoundtrip("UTF-16BE");
3579         log_verbose("Running exhaustive round trip test for UTF-16LE\n");
3580         TestFullRoundtrip("UTF-16LE");
3581         log_verbose("Running exhaustive round trip test for UTF-16\n");
3582         TestFullRoundtrip("UTF-16");
3583         log_verbose("Running exhaustive round trip test for UTF-32BE\n");
3584         TestFullRoundtrip("UTF-32BE");
3585         log_verbose("Running exhaustive round trip test for UTF-32LE\n");
3586         TestFullRoundtrip("UTF-32LE");
3587         log_verbose("Running exhaustive round trip test for UTF-32\n");
3588         TestFullRoundtrip("UTF-32");
3589         log_verbose("Running exhaustive round trip test for UTF-7\n");
3590         TestFullRoundtrip("UTF-7");
3591         log_verbose("Running exhaustive round trip test for UTF-7\n");
3592         TestFullRoundtrip("UTF-7,version=1");
3593         log_verbose("Running exhaustive round trip test for IMAP-mailbox-name\n");
3594         TestFullRoundtrip("IMAP-mailbox-name");
3595         /*
3596          *
3597          * With the update to GB18030 2005 (Ticket #8274), this test will fail because the 2005 version of
3598          * GB18030 contains mappings to actual Unicode codepoints (which were previously mapped to PUA).
3599          * The old mappings remain as fallbacks.
3600          * This test may be reintroduced at a later time.
3601          *
3602          * 110118 - mow
3603          */
3604          /*
3605          log_verbose("Running exhaustive round trip test for GB18030\n");
3606          TestFullRoundtrip("GB18030");
3607          */
3608     }
3609 }
3610 
3611 static void
TestSCSU()3612 TestSCSU() {
3613 
3614     static const uint16_t germanUTF16[]={
3615         0x00d6, 0x006c, 0x0020, 0x0066, 0x006c, 0x0069, 0x0065, 0x00df, 0x0074
3616     };
3617 
3618     static const uint8_t germanSCSU[]={
3619         0xd6, 0x6c, 0x20, 0x66, 0x6c, 0x69, 0x65, 0xdf, 0x74
3620     };
3621 
3622     static const uint16_t russianUTF16[]={
3623         0x041c, 0x043e, 0x0441, 0x043a, 0x0432, 0x0430
3624     };
3625 
3626     static const uint8_t russianSCSU[]={
3627         0x12, 0x9c, 0xbe, 0xc1, 0xba, 0xb2, 0xb0
3628     };
3629 
3630     static const uint16_t japaneseUTF16[]={
3631         0x3000, 0x266a, 0x30ea, 0x30f3, 0x30b4, 0x53ef, 0x611b,
3632         0x3044, 0x3084, 0x53ef, 0x611b, 0x3044, 0x3084, 0x30ea, 0x30f3,
3633         0x30b4, 0x3002, 0x534a, 0x4e16, 0x7d00, 0x3082, 0x524d, 0x306b,
3634         0x6d41, 0x884c, 0x3057, 0x305f, 0x300c, 0x30ea, 0x30f3, 0x30b4,
3635         0x306e, 0x6b4c, 0x300d, 0x304c, 0x3074, 0x3063, 0x305f, 0x308a,
3636         0x3059, 0x308b, 0x304b, 0x3082, 0x3057, 0x308c, 0x306a, 0x3044,
3637         0x3002, 0x7c73, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x30b3, 0x30f3,
3638         0x30d4, 0x30e5, 0x30fc, 0x30bf, 0x793e, 0x306e, 0x30d1, 0x30bd,
3639         0x30b3, 0x30f3, 0x300c, 0x30de, 0x30c3, 0x30af, 0xff08, 0x30de,
3640         0x30c3, 0x30ad, 0x30f3, 0x30c8, 0x30c3, 0x30b7, 0x30e5, 0xff09,
3641         0x300d, 0x3092, 0x3001, 0x3053, 0x3088, 0x306a, 0x304f, 0x611b,
3642         0x3059, 0x308b, 0x4eba, 0x305f, 0x3061, 0x306e, 0x3053, 0x3068,
3643         0x3060, 0x3002, 0x300c, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x4fe1,
3644         0x8005, 0x300d, 0x306a, 0x3093, 0x3066, 0x8a00, 0x3044, 0x65b9,
3645         0x307e, 0x3067, 0x3042, 0x308b, 0x3002
3646     };
3647 
3648     /* SCSUEncoder produces a slightly longer result (179B vs. 178B) because of one different choice:
3649      it uses an SQn once where a longer look-ahead could have shown that SCn is more efficient */
3650     static const uint8_t japaneseSCSU[]={
3651         0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
3652         0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
3653         0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
3654         0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
3655         0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
3656         0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
3657         0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
3658         0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
3659         0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
3660         0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
3661         0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
3662         0xcb, 0x82
3663     };
3664 
3665     static const uint16_t allFeaturesUTF16[]={
3666         0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 0x01df, 0xf000, 0xdbff,
3667         0xdfff, 0x000d, 0x000a, 0x0041, 0x00df, 0x0401, 0x015f, 0x00df,
3668         0x01df, 0xf000, 0xdbff, 0xdfff
3669     };
3670 
3671     /* see comment at japaneseSCSU: the same kind of different choice yields a slightly shorter
3672      * result here (34B vs. 35B)
3673      */
3674     static const uint8_t allFeaturesSCSU[]={
3675         0x41, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x1b, 0x03,
3676         0xdf, 0x1c, 0x88, 0x80, 0x0b, 0xbf, 0xff, 0xff, 0x0d, 0x0a,
3677         0x41, 0x10, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x13,
3678         0xdf, 0x14, 0x80, 0x15, 0xff
3679     };
3680     static const uint16_t monkeyIn[]={
3681         0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
3682         0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
3683         0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
3684         0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
3685         0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
3686         0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
3687         0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
3688         0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
3689         0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
3690         0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
3691         0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
3692         0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3693         0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3694         0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3695         0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
3696         0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
3697         0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
3698         0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
3699         0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A,
3700         /* test non-BMP code points */
3701         0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
3702         0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
3703         0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
3704         0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
3705         0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
3706         0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
3707         0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
3708         0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
3709         0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
3710         0xD869, 0xDED5, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xDBFF, 0xDFFF,
3711         0xDBFF, 0xDFFF, 0xDBFF, 0xDFFF,
3712 
3713 
3714         0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
3715         0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
3716         0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
3717         0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
3718         0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
3719     };
3720     static const char *fTestCases [] = {
3721           "\\ud800\\udc00", /* smallest surrogate*/
3722           "\\ud8ff\\udcff",
3723           "\\udBff\\udFff", /* largest surrogate pair*/
3724           "\\ud834\\udc00",
3725           "\\U0010FFFF",
3726           "Hello \\u9292 \\u9192 World!",
3727           "Hell\\u0429o \\u9292 \\u9192 W\\u00e4rld!",
3728           "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
3729 
3730           "\\u0648\\u06c8", /* catch missing reset*/
3731           "\\u0648\\u06c8",
3732 
3733           "\\u4444\\uE001", /* lowest quotable*/
3734           "\\u4444\\uf2FF", /* highest quotable*/
3735           "\\u4444\\uf188\\u4444",
3736           "\\u4444\\uf188\\uf288",
3737           "\\u4444\\uf188abc\\u0429\\uf288",
3738           "\\u9292\\u2222",
3739           "Hell\\u0429\\u04230o \\u9292 \\u9292W\\u00e4\\u0192rld!",
3740           "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
3741           "Hello World!123456",
3742           "Hello W\\u0081\\u011f\\u0082!", /* Latin 1 run*/
3743 
3744           "abc\\u0301\\u0302",  /* uses SQn for u301 u302*/
3745           "abc\\u4411d",      /* uses SQU*/
3746           "abc\\u4411\\u4412d",/* uses SCU*/
3747           "abc\\u0401\\u0402\\u047f\\u00a5\\u0405", /* uses SQn for ua5*/
3748           "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000", /* SJIS like data*/
3749           "\\u9292\\u2222",
3750           "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000",
3751           "\\u9999\\u3051\\u300c\\u9999\\u9999\\u3060\\u9999\\u3065\\u3065\\u3065\\u300c",
3752           "\\u3000\\u266a\\u30ea\\u30f3\\u30b4\\u53ef\\u611b\\u3044\\u3084\\u53ef\\u611b\\u3044\\u3084\\u30ea\\u30f3\\u30b4\\u3002",
3753 
3754           "", /* empty input*/
3755           "\\u0000", /* smallest BMP character*/
3756           "\\uFFFF", /* largest BMP character*/
3757 
3758           /* regression tests*/
3759           "\\u6441\\ub413\\ua733\\uf8fe\\ueedb\\u587f\\u195f\\u4899\\uf23d\\u49fd\\u0aac\\u5792\\ufc22\\ufc3c\\ufc46\\u00aa",
3760           "\\u00df\\u01df\\uf000\\udbff\\udfff\\u000d\n\\u0041\\u00df\\u0401\\u015f\\u00df\\u01df\\uf000\\udbff\\udfff",
3761           "\\u30f9\\u8321\\u05e5\\u181c\\ud72b\\u2019\\u99c9\\u2f2f\\uc10c\\u82e1\\u2c4d\\u1ebc\\u6013\\u66dc\\ubbde\\u94a5\\u4726\\u74af\\u3083\\u55b9\\u000c",
3762           "\\u0041\\u00df\\u0401\\u015f",
3763           "\\u9066\\u2123abc",
3764           "\\ud266\\u43d7\\u\\ue386\\uc9c0\\u4a6b\\u9222\\u901f\\u7410\\ua63f\\u539b\\u9596\\u482e\\u9d47\\ucfe4\\u7b71\\uc280\\uf26a\\u982f\\u862a\\u4edd\\uf513\\ufda6\\u869d\\u2ee0\\ua216\\u3ff6\\u3c70\\u89c0\\u9576\\ud5ec\\ubfda\\u6cca\\u5bb3\\ubcea\\u554c\\u914e\\ufa4a\\uede3\\u2990\\ud2f5\\u2729\\u5141\\u0f26\\uccd8\\u5413\\ud196\\ubbe2\\u51b9\\u9b48\\u0dc8\\u2195\\u21a2\\u21e9\\u00e4\\u9d92\\u0bc0\\u06c5",
3765           "\\uf95b\\u2458\\u2468\\u0e20\\uf51b\\ue36e\\ubfc1\\u0080\\u02dd\\uf1b5\\u0cf3\\u6059\\u7489",
3766     };
3767     int i=0;
3768     for(;i<UPRV_LENGTHOF(fTestCases);i++){
3769         const char* cSrc = fTestCases[i];
3770         UErrorCode status = U_ZERO_ERROR;
3771         int32_t cSrcLen,srcLen;
3772         UChar* src;
3773         /* UConverter* cnv = ucnv_open("SCSU",&status); */
3774         cSrcLen = srcLen = (int32_t)uprv_strlen(fTestCases[i]);
3775         src = (UChar*) malloc((sizeof(UChar) * srcLen) + sizeof(UChar));
3776         srcLen=unescape(src,srcLen,cSrc,cSrcLen,&status);
3777         log_verbose("Testing roundtrip for src: %s at index :%d\n",cSrc,i);
3778         TestConv(src,srcLen,"SCSU","Coverage",NULL,0);
3779         free(src);
3780     }
3781     TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features", (char *)allFeaturesSCSU,sizeof(allFeaturesSCSU));
3782     TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features",(char *)allFeaturesSCSU,sizeof(allFeaturesSCSU));
3783     TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU));
3784     TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU,locale=ja","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU));
3785     TestConv(germanUTF16,(sizeof(germanUTF16)/2),"SCSU","german",(char *)germanSCSU,sizeof(germanSCSU));
3786     TestConv(russianUTF16,(sizeof(russianUTF16)/2), "SCSU","russian",(char *)russianSCSU,sizeof(russianSCSU));
3787     TestConv(monkeyIn,(sizeof(monkeyIn)/2),"SCSU","monkey",NULL,0);
3788 }
3789 
3790 #if !UCONFIG_NO_LEGACY_CONVERSION
TestJitterbug2346()3791 static void TestJitterbug2346(){
3792     char source[] = { 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a,
3793                       0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a};
3794     uint16_t expected[] = {0x91CD,0x000D,0x000A,0x91CD,0x000D,0x000A};
3795 
3796     UChar uTarget[500]={'\0'};
3797     UChar* utarget=uTarget;
3798     UChar* utargetLimit=uTarget+sizeof(uTarget)/2;
3799 
3800     char cTarget[500]={'\0'};
3801     char* ctarget=cTarget;
3802     char* ctargetLimit=cTarget+sizeof(cTarget);
3803     const char* csource=source;
3804     UChar* temp = expected;
3805     UErrorCode err=U_ZERO_ERROR;
3806 
3807     UConverter* conv =ucnv_open("ISO_2022_JP",&err);
3808     if(U_FAILURE(err)) {
3809         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err));
3810         return;
3811     }
3812     ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(source),NULL,TRUE,&err);
3813     if(U_FAILURE(err)) {
3814         log_err("ISO_2022_JP to Unicode conversion failed: %s\n", u_errorName(err));
3815         return;
3816     }
3817     utargetLimit=utarget;
3818     utarget = uTarget;
3819     while(utarget<utargetLimit){
3820         if(*temp!=*utarget){
3821 
3822             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*utarget,(int)*temp) ;
3823         }
3824         utarget++;
3825         temp++;
3826     }
3827     ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err);
3828     if(U_FAILURE(err)) {
3829         log_err("ISO_2022_JP from Unicode conversion failed: %s\n", u_errorName(err));
3830         return;
3831     }
3832     ctargetLimit=ctarget;
3833     ctarget =cTarget;
3834     ucnv_close(conv);
3835 
3836 
3837 }
3838 
3839 static void
TestISO_2022_JP_1()3840 TestISO_2022_JP_1() {
3841     /* test input */
3842     static const uint16_t in[]={
3843         0x3000, 0x3001, 0x3002, 0x0020, 0xFF0E, 0x30FB, 0xFF1A, 0xFF1B, 0x000D, 0x000A,
3844         0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3845         0x52C8, 0x52CC, 0x52CF, 0x52D1, 0x52D4, 0x52D6, 0x52DB, 0x52DC, 0x000D, 0x000A,
3846         0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3847         0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
3848         0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
3849         0x201D, 0x000D, 0x000A,
3850         0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3851         0x4F94, 0x4F97, 0x52BA, 0x52BB, 0x52BD, 0x52C0, 0x52C4, 0x52C6, 0x000D, 0x000A,
3852         0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3853         0x4F78, 0x4F79, 0x4F7A, 0x4F7D, 0x4F7E, 0x4F81, 0x4F82, 0x4F84, 0x000D, 0x000A,
3854         0x4F85, 0x4F89, 0x4F8A, 0x4F8C, 0x4F8E, 0x4F90, 0x4F92, 0x4F93, 0x000D, 0x000A,
3855         0x52E1, 0x52E5, 0x52E8, 0x52E9, 0x000D, 0x000A
3856       };
3857     const UChar* uSource;
3858     const UChar* uSourceLimit;
3859     const char* cSource;
3860     const char* cSourceLimit;
3861     UChar *uTargetLimit =NULL;
3862     UChar *uTarget;
3863     char *cTarget;
3864     const char *cTargetLimit;
3865     char *cBuf;
3866     UChar *uBuf,*test;
3867     int32_t uBufSize = 120;
3868     UErrorCode errorCode=U_ZERO_ERROR;
3869     UConverter *cnv;
3870 
3871     cnv=ucnv_open("ISO_2022_JP_1", &errorCode);
3872     if(U_FAILURE(errorCode)) {
3873         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
3874         return;
3875     }
3876 
3877     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3878     cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3879     uSource = (const UChar*)in;
3880     uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
3881     cTarget = cBuf;
3882     cTargetLimit = cBuf +uBufSize*5;
3883     uTarget = uBuf;
3884     uTargetLimit = uBuf+ uBufSize*5;
3885     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,TRUE, &errorCode);
3886     if(U_FAILURE(errorCode)){
3887         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3888         return;
3889     }
3890     cSource = cBuf;
3891     cSourceLimit =cTarget;
3892     test =uBuf;
3893     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,TRUE,&errorCode);
3894     if(U_FAILURE(errorCode)){
3895         log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3896         return;
3897     }
3898     uSource = (const UChar*)in;
3899     while(uSource<uSourceLimit){
3900         if(*test!=*uSource){
3901 
3902             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3903         }
3904         uSource++;
3905         test++;
3906     }
3907     /*ucnv_close(cnv);
3908     cnv=ucnv_open("ISO_2022,locale=jp,version=1", &errorCode);*/
3909     /*Test for the condition where there is an invalid character*/
3910     ucnv_reset(cnv);
3911     {
3912         static const uint8_t source2[]={0x0e,0x24,0x053};
3913         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-1]");
3914     }
3915     TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3916     TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3917     ucnv_close(cnv);
3918     free(uBuf);
3919     free(cBuf);
3920 }
3921 
3922 static void
TestISO_2022_JP_2()3923 TestISO_2022_JP_2() {
3924     /* test input */
3925     static const uint16_t in[]={
3926         0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
3927         0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
3928         0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
3929         0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
3930         0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
3931         0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
3932         0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
3933         0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
3934         0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
3935         0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
3936         0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
3937         0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3938         0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3939         0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3940         0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
3941         0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
3942         0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
3943         0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
3944         0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A
3945       };
3946     const UChar* uSource;
3947     const UChar* uSourceLimit;
3948     const char* cSource;
3949     const char* cSourceLimit;
3950     UChar *uTargetLimit =NULL;
3951     UChar *uTarget;
3952     char *cTarget;
3953     const char *cTargetLimit;
3954     char *cBuf;
3955     UChar *uBuf,*test;
3956     int32_t uBufSize = 120;
3957     UErrorCode errorCode=U_ZERO_ERROR;
3958     UConverter *cnv;
3959     int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3960     int32_t* myOff= offsets;
3961     cnv=ucnv_open("ISO_2022_JP_2", &errorCode);
3962     if(U_FAILURE(errorCode)) {
3963         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
3964         return;
3965     }
3966 
3967     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3968     cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3969     uSource = (const UChar*)in;
3970     uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
3971     cTarget = cBuf;
3972     cTargetLimit = cBuf +uBufSize*5;
3973     uTarget = uBuf;
3974     uTargetLimit = uBuf+ uBufSize*5;
3975     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3976     if(U_FAILURE(errorCode)){
3977         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3978         return;
3979     }
3980     cSource = cBuf;
3981     cSourceLimit =cTarget;
3982     test =uBuf;
3983     myOff=offsets;
3984     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3985     if(U_FAILURE(errorCode)){
3986         log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3987         return;
3988     }
3989     uSource = (const UChar*)in;
3990     while(uSource<uSourceLimit){
3991         if(*test!=*uSource){
3992 
3993             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3994         }
3995         uSource++;
3996         test++;
3997     }
3998     TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3999     TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4000     TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4001     /*Test for the condition where there is an invalid character*/
4002     ucnv_reset(cnv);
4003     {
4004         static const uint8_t source2[]={0x0e,0x24,0x053};
4005         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-2]");
4006     }
4007     ucnv_close(cnv);
4008     free(uBuf);
4009     free(cBuf);
4010     free(offsets);
4011 }
4012 
4013 static void
TestISO_2022_KR()4014 TestISO_2022_KR() {
4015     /* test input */
4016     static const uint16_t in[]={
4017                     0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D
4018                    ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04
4019                    ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
4020                    ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
4021                    ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2
4022                    ,0x53E3,0x53E4,0x000A,0x000D};
4023     const UChar* uSource;
4024     const UChar* uSourceLimit;
4025     const char* cSource;
4026     const char* cSourceLimit;
4027     UChar *uTargetLimit =NULL;
4028     UChar *uTarget;
4029     char *cTarget;
4030     const char *cTargetLimit;
4031     char *cBuf;
4032     UChar *uBuf,*test;
4033     int32_t uBufSize = 120;
4034     UErrorCode errorCode=U_ZERO_ERROR;
4035     UConverter *cnv;
4036     int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4037     int32_t* myOff= offsets;
4038     cnv=ucnv_open("ISO_2022,locale=kr", &errorCode);
4039     if(U_FAILURE(errorCode)) {
4040         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4041         return;
4042     }
4043 
4044     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4045     cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
4046     uSource = (const UChar*)in;
4047     uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
4048     cTarget = cBuf;
4049     cTargetLimit = cBuf +uBufSize*5;
4050     uTarget = uBuf;
4051     uTargetLimit = uBuf+ uBufSize*5;
4052     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4053     if(U_FAILURE(errorCode)){
4054         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4055         return;
4056     }
4057     cSource = cBuf;
4058     cSourceLimit =cTarget;
4059     test =uBuf;
4060     myOff=offsets;
4061     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4062     if(U_FAILURE(errorCode)){
4063         log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4064         return;
4065     }
4066     uSource = (const UChar*)in;
4067     while(uSource<uSourceLimit){
4068         if(*test!=*uSource){
4069             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ;
4070         }
4071         uSource++;
4072         test++;
4073     }
4074     TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding");
4075     TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4076     TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4077     TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4078     TestJitterbug930("csISO2022KR");
4079     /*Test for the condition where there is an invalid character*/
4080     ucnv_reset(cnv);
4081     {
4082         static const uint8_t source2[]={0x1b,0x24,0x053};
4083         ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
4084         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]");
4085     }
4086     ucnv_close(cnv);
4087     free(uBuf);
4088     free(cBuf);
4089     free(offsets);
4090 }
4091 
4092 static void
TestISO_2022_KR_1()4093 TestISO_2022_KR_1() {
4094     /* test input */
4095     static const uint16_t in[]={
4096                     0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D
4097                    ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04
4098                    ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
4099                    ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
4100                    ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2
4101                    ,0x53E3,0x53E4,0x000A,0x000D};
4102     const UChar* uSource;
4103     const UChar* uSourceLimit;
4104     const char* cSource;
4105     const char* cSourceLimit;
4106     UChar *uTargetLimit =NULL;
4107     UChar *uTarget;
4108     char *cTarget;
4109     const char *cTargetLimit;
4110     char *cBuf;
4111     UChar *uBuf,*test;
4112     int32_t uBufSize = 120;
4113     UErrorCode errorCode=U_ZERO_ERROR;
4114     UConverter *cnv;
4115     int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4116     int32_t* myOff= offsets;
4117     cnv=ucnv_open("ibm-25546", &errorCode);
4118     if(U_FAILURE(errorCode)) {
4119         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4120         return;
4121     }
4122 
4123     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4124     cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
4125     uSource = (const UChar*)in;
4126     uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
4127     cTarget = cBuf;
4128     cTargetLimit = cBuf +uBufSize*5;
4129     uTarget = uBuf;
4130     uTargetLimit = uBuf+ uBufSize*5;
4131     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4132     if(U_FAILURE(errorCode)){
4133         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4134         return;
4135     }
4136     cSource = cBuf;
4137     cSourceLimit =cTarget;
4138     test =uBuf;
4139     myOff=offsets;
4140     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4141     if(U_FAILURE(errorCode)){
4142         log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4143         return;
4144     }
4145     uSource = (const UChar*)in;
4146     while(uSource<uSourceLimit){
4147         if(*test!=*uSource){
4148             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ;
4149         }
4150         uSource++;
4151         test++;
4152     }
4153     ucnv_reset(cnv);
4154     TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding");
4155     TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4156     TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4157     ucnv_reset(cnv);
4158     TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4159         /*Test for the condition where there is an invalid character*/
4160     ucnv_reset(cnv);
4161     {
4162         static const uint8_t source2[]={0x1b,0x24,0x053};
4163         ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
4164         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]");
4165     }
4166     ucnv_close(cnv);
4167     free(uBuf);
4168     free(cBuf);
4169     free(offsets);
4170 }
4171 
TestJitterbug2411()4172 static void TestJitterbug2411(){
4173     static const char* source = "\x1b\x24\x29\x43\x6b\x6b\x6e\x6e\x6a\x68\x70\x6f\x69\x75\x79\x71\x77\x65\x68\x67\x0A"
4174                          "\x1b\x24\x29\x43\x6a\x61\x73\x64\x66\x6a\x61\x73\x64\x66\x68\x6f\x69\x75\x79\x1b\x24\x29\x43";
4175     UConverter* kr=NULL, *kr1=NULL;
4176     UErrorCode errorCode = U_ZERO_ERROR;
4177     UChar tgt[100]={'\0'};
4178     UChar* target = tgt;
4179     UChar* targetLimit = target+100;
4180     kr=ucnv_open("iso-2022-kr", &errorCode);
4181     if(U_FAILURE(errorCode)) {
4182         log_data_err("Unable to open a iso-2022-kr converter: %s\n", u_errorName(errorCode));
4183         return;
4184     }
4185     ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode);
4186     if(U_FAILURE(errorCode)) {
4187         log_err("iso-2022-kr cannot handle multiple escape sequences : %s\n", u_errorName(errorCode));
4188         return;
4189     }
4190     kr1 = ucnv_open("ibm-25546", &errorCode);
4191     if(U_FAILURE(errorCode)) {
4192         log_data_err("Unable to open a iso-2022-kr_1 converter: %s\n", u_errorName(errorCode));
4193         return;
4194     }
4195     target = tgt;
4196     targetLimit = target+100;
4197     ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode);
4198 
4199     if(U_FAILURE(errorCode)) {
4200         log_err("iso-2022-kr_1 cannot handle multiple escape sequences : %s\n", u_errorName(errorCode));
4201         return;
4202     }
4203 
4204     ucnv_close(kr);
4205     ucnv_close(kr1);
4206 
4207 }
4208 
4209 static void
TestJIS()4210 TestJIS(){
4211     /* From Unicode moved to testdata/conversion.txt */
4212     /*To Unicode*/
4213     {
4214         static const uint8_t sampleTextJIS[] = {
4215             0x1b,0x28,0x48,0x41,0x42, /*jis-Roman*/
4216             0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
4217             0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
4218         };
4219         static const uint16_t expectedISO2022JIS[] = {
4220             0x0041, 0x0042,
4221             0xFF81, 0xFF82,
4222             0x3000
4223         };
4224         static const int32_t  toISO2022JISOffs[]={
4225             3,4,
4226             8,9,
4227             16
4228         };
4229 
4230         static const uint8_t sampleTextJIS7[] = {
4231             0x1b,0x28,0x48,0x41,0x42, /*JIS7-Roman*/
4232             0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
4233             0x1b,0x24,0x42,0x21,0x21,
4234             0x0e,0x41,0x42,0x0f,      /*Test Katakana set with SI and SO */
4235             0x21,0x22,
4236             0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
4237         };
4238         static const uint16_t expectedISO2022JIS7[] = {
4239             0x0041, 0x0042,
4240             0xFF81, 0xFF82,
4241             0x3000,
4242             0xFF81, 0xFF82,
4243             0x3001,
4244             0x3000
4245         };
4246         static const int32_t  toISO2022JIS7Offs[]={
4247             3,4,
4248             8,9,
4249             13,16,
4250             17,
4251             19,27
4252         };
4253         static const uint8_t sampleTextJIS8[] = {
4254             0x1b,0x28,0x48,0x41,0x42, /*JIS8-Roman*/
4255             0xa1,0xc8,0xd9,/*Katakana Set*/
4256             0x1b,0x28,0x42,
4257             0x41,0x42,
4258             0xb1,0xc3, /*Katakana Set*/
4259             0x1b,0x24,0x42,0x21,0x21
4260         };
4261         static const uint16_t expectedISO2022JIS8[] = {
4262             0x0041, 0x0042,
4263             0xff61, 0xff88, 0xff99,
4264             0x0041, 0x0042,
4265             0xff71, 0xff83,
4266             0x3000
4267         };
4268         static const int32_t  toISO2022JIS8Offs[]={
4269             3, 4,  5,  6,
4270             7, 11, 12, 13,
4271             14, 18,
4272         };
4273 
4274         testConvertToU(sampleTextJIS,sizeof(sampleTextJIS),expectedISO2022JIS,
4275             UPRV_LENGTHOF(expectedISO2022JIS),"JIS", toISO2022JISOffs,TRUE);
4276         testConvertToU(sampleTextJIS7,sizeof(sampleTextJIS7),expectedISO2022JIS7,
4277             UPRV_LENGTHOF(expectedISO2022JIS7),"JIS7", toISO2022JIS7Offs,TRUE);
4278         testConvertToU(sampleTextJIS8,sizeof(sampleTextJIS8),expectedISO2022JIS8,
4279             UPRV_LENGTHOF(expectedISO2022JIS8),"JIS8", toISO2022JIS8Offs,TRUE);
4280     }
4281 
4282 }
4283 
4284 
4285 #if 0
4286  ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
4287 
4288 static void TestJitterbug915(){
4289 /* tests for roundtripping of the below sequence
4290 \x1b$)G\x0E#!#"###$#%#&#'#(#)#*#+          / *plane 1 * /
4291 \x1b$*H\x1bN"!\x1bN""\x1bN"#\x1bN"$\x1bN"% / *plane 2 * /
4292 \x1b$+I\x1bO"D\x1bO"E\x1bO"F\x1bO"G\x1bO"H / *plane 3 * /
4293 \x1b$+J\x1bO!D\x1bO!E\x1bO"j\x1bO"k\x1bO"l / *plane 4 * /
4294 \x1b$+K\x1bO!t\x1bO"P\x1bO"Q\x1bO#7\x1bO"\ / *plane 5 * /
4295 \x1b$+L\x1bO!#\x1bO",\x1bO#N\x1bO!n\x1bO#q / *plane 6 * /
4296 \x1b$+M\x1bO"q\x1bO!N\x1bO!j\x1bO#:\x1bO#o / *plane 7 * /
4297 */
4298     static const char cSource[]={
4299         0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
4300         0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
4301         0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
4302         0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
4303         0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
4304         0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
4305         0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x2F, 0x2A, 0x70,
4306         0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
4307         0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
4308         0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
4309         0x1B, 0x4F, 0x22, 0x48, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
4310         0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
4311         0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
4312         0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
4313         0x4F, 0x22, 0x6C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
4314         0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
4315         0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
4316         0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
4317         0x22, 0x5C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
4318         0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
4319         0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
4320         0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
4321         0x23, 0x71, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
4322         0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
4323         0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
4324         0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
4325         0x6F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
4326         0x37, 0x20, 0x2A, 0x2F
4327     };
4328     UChar uTarget[500]={'\0'};
4329     UChar* utarget=uTarget;
4330     UChar* utargetLimit=uTarget+sizeof(uTarget)/2;
4331 
4332     char cTarget[500]={'\0'};
4333     char* ctarget=cTarget;
4334     char* ctargetLimit=cTarget+sizeof(cTarget);
4335     const char* csource=cSource;
4336     const char* tempSrc = cSource;
4337     UErrorCode err=U_ZERO_ERROR;
4338 
4339     UConverter* conv =ucnv_open("ISO_2022_CN_EXT",&err);
4340     if(U_FAILURE(err)) {
4341         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err));
4342         return;
4343     }
4344     ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(cSource),NULL,TRUE,&err);
4345     if(U_FAILURE(err)) {
4346         log_err("iso-2022-CN to Unicode conversion failed: %s\n", u_errorName(err));
4347         return;
4348     }
4349     utargetLimit=utarget;
4350     utarget = uTarget;
4351     ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err);
4352     if(U_FAILURE(err)) {
4353         log_err("iso-2022-CN from Unicode conversion failed: %s\n", u_errorName(err));
4354         return;
4355     }
4356     ctargetLimit=ctarget;
4357     ctarget =cTarget;
4358     while(ctarget<ctargetLimit){
4359         if(*ctarget != *tempSrc){
4360             log_err("j915[%d] Expected : \\x%02X \t Got: \\x%02X\n", (int)(ctarget-cTarget), *ctarget,(int)*tempSrc) ;
4361         }
4362         ++ctarget;
4363         ++tempSrc;
4364     }
4365 
4366     ucnv_close(conv);
4367 }
4368 
4369 static void
4370 TestISO_2022_CN_EXT() {
4371     /* test input */
4372     static const uint16_t in[]={
4373                 /* test Non-BMP code points */
4374          0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
4375          0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
4376          0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
4377          0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
4378          0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
4379          0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
4380          0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
4381          0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
4382          0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
4383          0xD869, 0xDED5,
4384 
4385          0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
4386          0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
4387          0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
4388          0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
4389          0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
4390          0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
4391          0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
4392          0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
4393          0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
4394          0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
4395          0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
4396          0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
4397          0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
4398          0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x3443, 0x3444, 0x000D, 0x000A,
4399          0x3445, 0x3449, 0x344A, 0x344B, 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A,
4400          0x60F6, 0x60F7, 0x60F8, 0x60F9, 0x60FA, 0x60FB, 0x60FC, 0x60FD, 0x000D, 0x000A,
4401          0x60FE, 0x60FF, 0x6100, 0x6101, 0x6102, 0x0041, 0x0042, 0x0043, 0x000D, 0x000A,
4402          0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x004B, 0x000D, 0x000A,
4403 
4404          0x33E7, 0x33E8, 0x33E9, 0x33EA, 0x000D, 0x000A
4405 
4406       };
4407 
4408     const UChar* uSource;
4409     const UChar* uSourceLimit;
4410     const char* cSource;
4411     const char* cSourceLimit;
4412     UChar *uTargetLimit =NULL;
4413     UChar *uTarget;
4414     char *cTarget;
4415     const char *cTargetLimit;
4416     char *cBuf;
4417     UChar *uBuf,*test;
4418     int32_t uBufSize = 180;
4419     UErrorCode errorCode=U_ZERO_ERROR;
4420     UConverter *cnv;
4421     int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4422     int32_t* myOff= offsets;
4423     cnv=ucnv_open("ISO_2022,locale=cn,version=1", &errorCode);
4424     if(U_FAILURE(errorCode)) {
4425         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4426         return;
4427     }
4428 
4429     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4430     cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
4431     uSource = (const UChar*)in;
4432     uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
4433     cTarget = cBuf;
4434     cTargetLimit = cBuf +uBufSize*5;
4435     uTarget = uBuf;
4436     uTargetLimit = uBuf+ uBufSize*5;
4437     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4438     if(U_FAILURE(errorCode)){
4439         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4440         return;
4441     }
4442     cSource = cBuf;
4443     cSourceLimit =cTarget;
4444     test =uBuf;
4445     myOff=offsets;
4446     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4447     if(U_FAILURE(errorCode)){
4448         log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4449         return;
4450     }
4451     uSource = (const UChar*)in;
4452     while(uSource<uSourceLimit){
4453         if(*test!=*uSource){
4454             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
4455         }
4456         else{
4457             log_verbose("      Got: \\u%04X\n",(int)*test) ;
4458         }
4459         uSource++;
4460         test++;
4461     }
4462     TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4463     TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4464     /*Test for the condition where there is an invalid character*/
4465     ucnv_reset(cnv);
4466     {
4467         static const uint8_t source2[]={0x0e,0x24,0x053};
4468         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN-EXT]");
4469     }
4470     ucnv_close(cnv);
4471     free(uBuf);
4472     free(cBuf);
4473     free(offsets);
4474 }
4475 #endif
4476 
4477 static void
TestISO_2022_CN()4478 TestISO_2022_CN() {
4479     /* test input */
4480     static const uint16_t in[]={
4481          /* jitterbug 951 */
4482          0xFF2D, 0xFF49, 0xFF58, 0xFF45, 0xFF44, 0x0020, 0xFF43, 0xFF48, 0xFF41, 0xFF52,
4483          0x0020, 0xFF06, 0x0020, 0xFF11, 0xFF12, 0xFF13, 0xFF14, 0xFF15, 0xFF16, 0xFF17,
4484          0xFF18, 0xFF19, 0xFF10, 0x0020, 0xFF4E, 0xFF55, 0xFF4D, 0xFF42, 0xFF45, 0xFF52,
4485          0x0020, 0xFF54, 0xFF45, 0xFF53, 0xFF54, 0x0020, 0xFF4C, 0xFF49, 0xFF4E, 0xFF45,
4486          0x0020, 0x0045, 0x004e, 0x0044,
4487          /**/
4488          0x4E00, 0x4E00, 0x4E01, 0x4E03, 0x60F6, 0x60F7, 0x60F8, 0x60FB, 0x000D, 0x000A,
4489          0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x60FB, 0x60FC, 0x000D, 0x000A,
4490          0x4E07, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x0042, 0x0043, 0x000D, 0x000A,
4491          0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
4492          0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
4493          0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
4494          0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
4495          0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
4496          0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
4497          0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
4498          0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
4499          0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
4500          0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x60FE, 0x60FF, 0x000D, 0x000A,
4501          0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A, 0x60F9, 0x60FA, 0x000D, 0x000A,
4502          0x6100, 0x6101, 0x0041, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
4503          0x247D, 0x247E, 0x247F, 0x2480, 0x2481, 0x2482, 0x2483, 0x2484, 0x2485, 0x2486,
4504          0x2487, 0x2460, 0x2461, 0xFF20, 0xFF21, 0xFF22, 0x0049, 0x004A, 0x000D, 0x000A,
4505 
4506       };
4507     const UChar* uSource;
4508     const UChar* uSourceLimit;
4509     const char* cSource;
4510     const char* cSourceLimit;
4511     UChar *uTargetLimit =NULL;
4512     UChar *uTarget;
4513     char *cTarget;
4514     const char *cTargetLimit;
4515     char *cBuf;
4516     UChar *uBuf,*test;
4517     int32_t uBufSize = 180;
4518     UErrorCode errorCode=U_ZERO_ERROR;
4519     UConverter *cnv;
4520     int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4521     int32_t* myOff= offsets;
4522     cnv=ucnv_open("ISO_2022,locale=cn,version=0", &errorCode);
4523     if(U_FAILURE(errorCode)) {
4524         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4525         return;
4526     }
4527 
4528     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4529     cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
4530     uSource = (const UChar*)in;
4531     uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
4532     cTarget = cBuf;
4533     cTargetLimit = cBuf +uBufSize*5;
4534     uTarget = uBuf;
4535     uTargetLimit = uBuf+ uBufSize*5;
4536     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4537     if(U_FAILURE(errorCode)){
4538         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4539         return;
4540     }
4541     cSource = cBuf;
4542     cSourceLimit =cTarget;
4543     test =uBuf;
4544     myOff=offsets;
4545     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4546     if(U_FAILURE(errorCode)){
4547         log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4548         return;
4549     }
4550     uSource = (const UChar*)in;
4551     while(uSource<uSourceLimit){
4552         if(*test!=*uSource){
4553             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
4554         }
4555         else{
4556             log_verbose("      Got: \\u%04X\n",(int)*test) ;
4557         }
4558         uSource++;
4559         test++;
4560     }
4561     TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-CN encoding");
4562     TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4563     TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4564     TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4565     TestJitterbug930("csISO2022CN");
4566     /*Test for the condition where there is an invalid character*/
4567     ucnv_reset(cnv);
4568     {
4569         static const uint8_t source2[]={0x0e,0x24,0x053};
4570         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN]");
4571     }
4572 
4573     ucnv_close(cnv);
4574     free(uBuf);
4575     free(cBuf);
4576     free(offsets);
4577 }
4578 
4579 /* Tests for empty segments in ISO-2022-JP/KR/CN, HZ, check that UConverterCallbackReason is UCNV_IRREGULAR */
4580 typedef struct {
4581     const char *    converterName;
4582     const char *    inputText;
4583     int             inputTextLength;
4584 } EmptySegmentTest;
4585 
4586 /* Callback for TestJitterbug6175, should only get called for empty segment errors */
UCNV_TO_U_CALLBACK_EMPTYSEGMENT(const void * context,UConverterToUnicodeArgs * toArgs,const char * codeUnits,int32_t length,UConverterCallbackReason reason,UErrorCode * err)4587 static void UCNV_TO_U_CALLBACK_EMPTYSEGMENT( const void *context, UConverterToUnicodeArgs *toArgs, const char* codeUnits,
4588                                              int32_t length, UConverterCallbackReason reason, UErrorCode * err ) {
4589     if (reason > UCNV_IRREGULAR) {
4590         return;
4591     }
4592     if (reason != UCNV_IRREGULAR) {
4593         log_err("toUnicode callback invoked for empty segment but reason is not UCNV_IRREGULAR\n");
4594     }
4595     /* Standard stuff below from UCNV_TO_U_CALLBACK_SUBSTITUTE */
4596     *err = U_ZERO_ERROR;
4597     ucnv_cbToUWriteSub(toArgs,0,err);
4598 }
4599 
4600 enum { kEmptySegmentToUCharsMax = 64 };
TestJitterbug6175(void)4601 static void TestJitterbug6175(void) {
4602     static const char  iso2022jp_a[] = { 0x61, 0x62, 0x1B,0x24,0x42, 0x1B,0x28,0x42, 0x63, 0x64, 0x0D, 0x0A };
4603     static const char  iso2022kr_a[] = { 0x1B,0x24,0x29,0x43, 0x61, 0x0E, 0x0F, 0x62, 0x0D, 0x0A };
4604     static const char  iso2022cn_a[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x0F, 0x1B,0x24,0x2A,0x48, 0x1B,0x4E, 0x6A,0x65, 0x63, 0x0D, 0x0A };
4605     static const char  iso2022cn_b[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x1B,0x24,0x29,0x47, 0x68,0x64, 0x0F, 0x63, 0x0D, 0x0A };
4606     static const char  hzGB2312_a[]  = { 0x61, 0x62, 0x7E,0x7B, 0x7E,0x7D, 0x63, 0x64 };
4607     static const EmptySegmentTest emptySegmentTests[] = {
4608         /* converterName inputText    inputTextLength */
4609         { "ISO-2022-JP", iso2022jp_a, sizeof(iso2022jp_a) },
4610         { "ISO-2022-KR", iso2022kr_a, sizeof(iso2022kr_a) },
4611         { "ISO-2022-CN", iso2022cn_a, sizeof(iso2022cn_a) },
4612         { "ISO-2022-CN", iso2022cn_b, sizeof(iso2022cn_b) },
4613         { "HZ-GB-2312",  hzGB2312_a,  sizeof(hzGB2312_a)  },
4614         /* terminator: */
4615         { NULL,          NULL,        0,                  }
4616     };
4617     const EmptySegmentTest * testPtr;
4618     for (testPtr = emptySegmentTests; testPtr->converterName != NULL; ++testPtr) {
4619         UErrorCode   err = U_ZERO_ERROR;
4620         UConverter * cnv = ucnv_open(testPtr->converterName, &err);
4621         if (U_FAILURE(err)) {
4622             log_data_err("Unable to open %s converter: %s\n", testPtr->converterName, u_errorName(err));
4623             return;
4624         }
4625         ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_EMPTYSEGMENT, NULL, NULL, NULL, &err);
4626         if (U_FAILURE(err)) {
4627             log_data_err("Unable to setToUCallBack for %s converter: %s\n", testPtr->converterName, u_errorName(err));
4628             ucnv_close(cnv);
4629             return;
4630         }
4631         {
4632             UChar         toUChars[kEmptySegmentToUCharsMax];
4633             UChar *       toUCharsPtr = toUChars;
4634             const UChar * toUCharsLimit = toUCharsPtr + kEmptySegmentToUCharsMax;
4635             const char *  inCharsPtr = testPtr->inputText;
4636             const char *  inCharsLimit = inCharsPtr + testPtr->inputTextLength;
4637             ucnv_toUnicode(cnv, &toUCharsPtr, toUCharsLimit, &inCharsPtr, inCharsLimit, NULL, TRUE, &err);
4638         }
4639         ucnv_close(cnv);
4640     }
4641 }
4642 
4643 static void
TestEBCDIC_STATEFUL()4644 TestEBCDIC_STATEFUL() {
4645     /* test input */
4646     static const uint8_t in[]={
4647         0x61,
4648         0x1a,
4649         0x0f, 0x4b,
4650         0x42,
4651         0x40,
4652         0x36,
4653     };
4654 
4655     /* expected test results */
4656     static const int32_t results[]={
4657         /* number of bytes read, code point */
4658         1, 0x002f,
4659         1, 0x0092,
4660         2, 0x002e,
4661         1, 0xff62,
4662         1, 0x0020,
4663         1, 0x0096,
4664 
4665     };
4666     static const uint8_t in2[]={
4667         0x0f,
4668         0xa1,
4669         0x01
4670     };
4671 
4672     /* expected test results */
4673     static const int32_t results2[]={
4674         /* number of bytes read, code point */
4675         2, 0x203E,
4676         1, 0x0001,
4677     };
4678 
4679     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
4680     UErrorCode errorCode=U_ZERO_ERROR;
4681     UConverter *cnv=ucnv_open("ibm-930", &errorCode);
4682     if(U_FAILURE(errorCode)) {
4683         log_data_err("Unable to open a EBCDIC_STATEFUL(ibm-930) converter: %s\n", u_errorName(errorCode));
4684         return;
4685     }
4686     TestNextUChar(cnv, source, limit, results, "EBCDIC_STATEFUL(ibm-930)");
4687     ucnv_reset(cnv);
4688      /* Test the condition when source >= sourceLimit */
4689     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
4690     ucnv_reset(cnv);
4691     /*Test for the condition where source > sourcelimit after consuming the shift chracter */
4692     {
4693         static const uint8_t source1[]={0x0f};
4694         TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_INDEX_OUTOFBOUNDS_ERROR, "a character is truncated");
4695     }
4696     /*Test for the condition where there is an invalid character*/
4697     ucnv_reset(cnv);
4698     {
4699         static const uint8_t source2[]={0x0e, 0x7F, 0xFF};
4700         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [EBCDIC STATEFUL]");
4701     }
4702     ucnv_reset(cnv);
4703     source=(const char*)in2;
4704     limit=(const char*)in2+sizeof(in2);
4705     TestNextUChar(cnv,source,limit,results2,"EBCDIC_STATEFUL(ibm-930),seq#2");
4706     ucnv_close(cnv);
4707 
4708 }
4709 
4710 static void
TestGB18030()4711 TestGB18030() {
4712     /* test input */
4713     static const uint8_t in[]={
4714         0x24,
4715         0x7f,
4716         0x81, 0x30, 0x81, 0x30,
4717         0xa8, 0xbf,
4718         0xa2, 0xe3,
4719         0xd2, 0xbb,
4720         0x82, 0x35, 0x8f, 0x33,
4721         0x84, 0x31, 0xa4, 0x39,
4722         0x90, 0x30, 0x81, 0x30,
4723         0xe3, 0x32, 0x9a, 0x35
4724 #if 0
4725         /*
4726          * Feature removed   markus 2000-oct-26
4727          * Only some codepages must match surrogate pairs into supplementary code points -
4728          * see javadoc for ucnv_getNextUChar() and implementation notes in ucnvmbcs.c .
4729          * GB 18030 provides direct encodings for supplementary code points, therefore
4730          * it must not combine two single-encoded surrogates into one code point.
4731          */
4732         0x83, 0x36, 0xc8, 0x30, 0x83, 0x37, 0xb0, 0x34 /* separately encoded surrogates */
4733 #endif
4734     };
4735 
4736     /* expected test results */
4737     static const int32_t results[]={
4738         /* number of bytes read, code point */
4739         1, 0x24,
4740         1, 0x7f,
4741         4, 0x80,
4742         2, 0x1f9,
4743         2, 0x20ac,
4744         2, 0x4e00,
4745         4, 0x9fa6,
4746         4, 0xffff,
4747         4, 0x10000,
4748         4, 0x10ffff
4749 #if 0
4750         /* Feature removed. See comment above. */
4751         8, 0x10000
4752 #endif
4753     };
4754 
4755 /*    const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); */
4756     UErrorCode errorCode=U_ZERO_ERROR;
4757     UConverter *cnv=ucnv_open("gb18030", &errorCode);
4758     if(U_FAILURE(errorCode)) {
4759         log_data_err("Unable to open a gb18030 converter: %s\n", u_errorName(errorCode));
4760         return;
4761     }
4762     TestNextUChar(cnv, (const char *)in, (const char *)in+sizeof(in), results, "gb18030");
4763     ucnv_close(cnv);
4764 }
4765 
4766 static void
TestLMBCS()4767 TestLMBCS() {
4768     /* LMBCS-1 string */
4769     static const uint8_t pszLMBCS[]={
4770         0x61,
4771         0x01, 0x29,
4772         0x81,
4773         0xA0,
4774         0x0F, 0x27,
4775         0x0F, 0x91,
4776         0x14, 0x0a, 0x74,
4777         0x14, 0xF6, 0x02,
4778         0x14, 0xd8, 0x4d, 0x14, 0xdc, 0x56, /* UTF-16 surrogate pair */
4779         0x10, 0x88, 0xA0,
4780     };
4781 
4782     /* Unicode UChar32 equivalents */
4783     static const UChar32 pszUnicode32[]={
4784         /* code point */
4785         0x00000061,
4786         0x00002013,
4787         0x000000FC,
4788         0x000000E1,
4789         0x00000007,
4790         0x00000091,
4791         0x00000a74,
4792         0x00000200,
4793         0x00023456, /* code point for surrogate pair */
4794         0x00005516
4795     };
4796 
4797 /* Unicode UChar equivalents */
4798     static const UChar pszUnicode[]={
4799         /* code point */
4800         0x0061,
4801         0x2013,
4802         0x00FC,
4803         0x00E1,
4804         0x0007,
4805         0x0091,
4806         0x0a74,
4807         0x0200,
4808         0xD84D, /* low surrogate */
4809         0xDC56, /* high surrogate */
4810         0x5516
4811     };
4812 
4813 /* expected test results */
4814     static const int offsets32[]={
4815         /* number of bytes read, code point */
4816         0,
4817         1,
4818         3,
4819         4,
4820         5,
4821         7,
4822         9,
4823         12,
4824         15,
4825         21,
4826         24
4827     };
4828 
4829 /* expected test results */
4830     static const int offsets[]={
4831         /* number of bytes read, code point */
4832         0,
4833         1,
4834         3,
4835         4,
4836         5,
4837         7,
4838         9,
4839         12,
4840         15,
4841         18,
4842         21,
4843         24
4844     };
4845 
4846 
4847     UConverter *cnv;
4848 
4849 #define NAME_LMBCS_1 "LMBCS-1"
4850 #define NAME_LMBCS_2 "LMBCS-2"
4851 
4852 
4853    /* Some basic open/close/property tests on some LMBCS converters */
4854     {
4855 
4856       char expected_subchars[] = {0x3F};   /* ANSI Question Mark */
4857       char new_subchars [] = {0x7F};       /* subst char used by SmartSuite..*/
4858       char get_subchars [1];
4859       const char * get_name;
4860       UConverter *cnv1;
4861       UConverter *cnv2;
4862 
4863       int8_t len = sizeof(get_subchars);
4864 
4865       UErrorCode errorCode=U_ZERO_ERROR;
4866 
4867       /* Open */
4868       cnv1=ucnv_open(NAME_LMBCS_1, &errorCode);
4869       if(U_FAILURE(errorCode)) {
4870          log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
4871          return;
4872       }
4873       cnv2=ucnv_open(NAME_LMBCS_2, &errorCode);
4874       if(U_FAILURE(errorCode)) {
4875          log_data_err("Unable to open a LMBCS-2 converter: %s\n", u_errorName(errorCode));
4876          return;
4877       }
4878 
4879       /* Name */
4880       get_name = ucnv_getName (cnv1, &errorCode);
4881       if (strcmp(NAME_LMBCS_1,get_name)){
4882          log_err("Unexpected converter name: %s\n", get_name);
4883       }
4884       get_name = ucnv_getName (cnv2, &errorCode);
4885       if (strcmp(NAME_LMBCS_2,get_name)){
4886          log_err("Unexpected converter name: %s\n", get_name);
4887       }
4888 
4889       /* substitution chars */
4890       ucnv_getSubstChars (cnv1, get_subchars, &len, &errorCode);
4891       if(U_FAILURE(errorCode)) {
4892          log_err("Failure on get subst chars: %s\n", u_errorName(errorCode));
4893       }
4894       if (len!=1){
4895          log_err("Unexpected length of sub chars\n");
4896       }
4897       if (get_subchars[0] != expected_subchars[0]){
4898            log_err("Unexpected value of sub chars\n");
4899       }
4900       ucnv_setSubstChars (cnv2,new_subchars, len, &errorCode);
4901       if(U_FAILURE(errorCode)) {
4902          log_err("Failure on set subst chars: %s\n", u_errorName(errorCode));
4903       }
4904       ucnv_getSubstChars (cnv2, get_subchars, &len, &errorCode);
4905       if(U_FAILURE(errorCode)) {
4906          log_err("Failure on get subst chars: %s\n", u_errorName(errorCode));
4907       }
4908       if (len!=1){
4909          log_err("Unexpected length of sub chars\n");
4910       }
4911       if (get_subchars[0] != new_subchars[0]){
4912            log_err("Unexpected value of sub chars\n");
4913       }
4914       ucnv_close(cnv1);
4915       ucnv_close(cnv2);
4916 
4917     }
4918 
4919     /* LMBCS to Unicode - offsets */
4920     {
4921        UErrorCode errorCode=U_ZERO_ERROR;
4922 
4923        const char * pSource = (const char *)pszLMBCS;
4924        const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS);
4925 
4926        UChar Out [sizeof(pszUnicode) + 1];
4927        UChar * pOut = Out;
4928        UChar * OutLimit = Out + UPRV_LENGTHOF(pszUnicode);
4929 
4930        int32_t off [sizeof(offsets)];
4931 
4932       /* last 'offset' in expected results is just the final size.
4933          (Makes other tests easier). Compensate here: */
4934 
4935        off[UPRV_LENGTHOF(offsets)-1] = sizeof(pszLMBCS);
4936 
4937 
4938 
4939       cnv=ucnv_open("lmbcs", &errorCode); /* use generic name for LMBCS-1 */
4940       if(U_FAILURE(errorCode)) {
4941            log_data_err("Unable to open a LMBCS converter: %s\n", u_errorName(errorCode));
4942            return;
4943       }
4944 
4945 
4946 
4947       ucnv_toUnicode (cnv,
4948                       &pOut,
4949                       OutLimit,
4950                       &pSource,
4951                       sourceLimit,
4952                       off,
4953                       TRUE,
4954                       &errorCode);
4955 
4956 
4957        if (memcmp(off,offsets,sizeof(offsets)))
4958        {
4959          log_err("LMBCS->Uni: Calculated offsets do not match expected results\n");
4960        }
4961        if (memcmp(Out,pszUnicode,sizeof(pszUnicode)))
4962        {
4963          log_err("LMBCS->Uni: Calculated codepoints do not match expected results\n");
4964        }
4965        ucnv_close(cnv);
4966     }
4967     {
4968    /* LMBCS to Unicode - getNextUChar */
4969       const char * sourceStart;
4970       const char *source=(const char *)pszLMBCS;
4971       const char *limit=(const char *)pszLMBCS+sizeof(pszLMBCS);
4972       const UChar32 *results= pszUnicode32;
4973       const int *off = offsets32;
4974 
4975       UErrorCode errorCode=U_ZERO_ERROR;
4976       UChar32 uniChar;
4977 
4978       cnv=ucnv_open("LMBCS-1", &errorCode);
4979       if(U_FAILURE(errorCode)) {
4980            log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
4981            return;
4982       }
4983       else
4984       {
4985 
4986          while(source<limit) {
4987             sourceStart=source;
4988             uniChar=ucnv_getNextUChar(cnv, &source, source + (off[1] - off[0]), &errorCode);
4989             if(U_FAILURE(errorCode)) {
4990                   log_err("LMBCS-1 ucnv_getNextUChar() failed: %s\n", u_errorName(errorCode));
4991                   break;
4992             } else if(source-sourceStart != off[1] - off[0] || uniChar != *results) {
4993                log_err("LMBCS-1 ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
4994                    uniChar, (source-sourceStart), *results, *off);
4995                break;
4996             }
4997             results++;
4998             off++;
4999          }
5000        }
5001        ucnv_close(cnv);
5002     }
5003     { /* test locale & optimization group operations: Unicode to LMBCS */
5004 
5005       UErrorCode errorCode=U_ZERO_ERROR;
5006       UConverter *cnv16he = ucnv_open("LMBCS-16,locale=he", &errorCode);
5007       UConverter *cnv16jp = ucnv_open("LMBCS-16,locale=ja_JP", &errorCode);
5008       UConverter *cnv01us = ucnv_open("LMBCS-1,locale=us_EN", &errorCode);
5009       UChar uniString [] = {0x0192}; /* Latin Small letter f with hook */
5010       const UChar * pUniOut = uniString;
5011       UChar * pUniIn = uniString;
5012       uint8_t lmbcsString [4];
5013       const char * pLMBCSOut = (const char *)lmbcsString;
5014       char * pLMBCSIn = (char *)lmbcsString;
5015 
5016       /* 0192 (hook) converts to both group 3 & group 1. input locale should differentiate */
5017       ucnv_fromUnicode (cnv16he,
5018                         &pLMBCSIn, (pLMBCSIn + UPRV_LENGTHOF(lmbcsString)),
5019                         &pUniOut, pUniOut + UPRV_LENGTHOF(uniString),
5020                         NULL, 1, &errorCode);
5021 
5022       if (lmbcsString[0] != 0x3 || lmbcsString[1] != 0x83)
5023       {
5024          log_err("LMBCS-16,locale=he gives unexpected translation\n");
5025       }
5026 
5027       pLMBCSIn= (char *)lmbcsString;
5028       pUniOut = uniString;
5029       ucnv_fromUnicode (cnv01us,
5030                         &pLMBCSIn, (const char *)(lmbcsString + UPRV_LENGTHOF(lmbcsString)),
5031                         &pUniOut, pUniOut + UPRV_LENGTHOF(uniString),
5032                         NULL, 1, &errorCode);
5033 
5034       if (lmbcsString[0] != 0x9F)
5035       {
5036          log_err("LMBCS-1,locale=US gives unexpected translation\n");
5037       }
5038 
5039       /* single byte char from mbcs char set */
5040       lmbcsString[0] = 0xAE;  /* 1/2 width katakana letter small Yo */
5041       pLMBCSOut = (const char *)lmbcsString;
5042       pUniIn = uniString;
5043       ucnv_toUnicode (cnv16jp,
5044                         &pUniIn, pUniIn + 1,
5045                         &pLMBCSOut, (pLMBCSOut + 1),
5046                         NULL, 1, &errorCode);
5047       if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+1 || pUniIn != uniString+1 || uniString[0] != 0xFF6E)
5048       {
5049            log_err("Unexpected results from LMBCS-16 single byte char\n");
5050       }
5051       /* convert to group 1: should be 3 bytes */
5052       pLMBCSIn = (char *)lmbcsString;
5053       pUniOut = uniString;
5054       ucnv_fromUnicode (cnv01us,
5055                         &pLMBCSIn, (const char *)(pLMBCSIn + 3),
5056                         &pUniOut, pUniOut + 1,
5057                         NULL, 1, &errorCode);
5058       if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+3 || pUniOut != uniString+1
5059          || lmbcsString[0] != 0x10 || lmbcsString[1] != 0x10 || lmbcsString[2] != 0xAE)
5060       {
5061            log_err("Unexpected results to LMBCS-1 single byte mbcs char\n");
5062       }
5063       pLMBCSOut = (const char *)lmbcsString;
5064       pUniIn = uniString;
5065       ucnv_toUnicode (cnv01us,
5066                         &pUniIn, pUniIn + 1,
5067                         &pLMBCSOut, (const char *)(pLMBCSOut + 3),
5068                         NULL, 1, &errorCode);
5069       if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+3 || pUniIn != uniString+1 || uniString[0] != 0xFF6E)
5070       {
5071            log_err("Unexpected results from LMBCS-1 single byte mbcs char\n");
5072       }
5073       pLMBCSIn = (char *)lmbcsString;
5074       pUniOut = uniString;
5075       ucnv_fromUnicode (cnv16jp,
5076                         &pLMBCSIn, (const char *)(pLMBCSIn + 1),
5077                         &pUniOut, pUniOut + 1,
5078                         NULL, 1, &errorCode);
5079       if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+1 || pUniOut != uniString+1 || lmbcsString[0] != 0xAE)
5080       {
5081            log_err("Unexpected results to LMBCS-16 single byte mbcs char\n");
5082       }
5083       ucnv_close(cnv16he);
5084       ucnv_close(cnv16jp);
5085       ucnv_close(cnv01us);
5086     }
5087     {
5088        /* Small source buffer testing, LMBCS -> Unicode */
5089 
5090        UErrorCode errorCode=U_ZERO_ERROR;
5091 
5092        const char * pSource = (const char *)pszLMBCS;
5093        const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS);
5094        int codepointCount = 0;
5095 
5096        UChar Out [sizeof(pszUnicode) + 1];
5097        UChar * pOut = Out;
5098        UChar * OutLimit = Out + UPRV_LENGTHOF(pszUnicode);
5099 
5100 
5101        cnv = ucnv_open(NAME_LMBCS_1, &errorCode);
5102        if(U_FAILURE(errorCode)) {
5103            log_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
5104            return;
5105        }
5106 
5107 
5108        while ((pSource < sourceLimit) && U_SUCCESS (errorCode))
5109        {
5110            ucnv_toUnicode (cnv,
5111                &pOut,
5112                OutLimit,
5113                &pSource,
5114                (pSource+1), /* claim that this is a 1- byte buffer */
5115                NULL,
5116                FALSE,    /* FALSE means there might be more chars in the next buffer */
5117                &errorCode);
5118 
5119            if (U_SUCCESS (errorCode))
5120            {
5121                if ((pSource - (const char *)pszLMBCS) == offsets [codepointCount+1])
5122                {
5123                    /* we are on to the next code point: check value */
5124 
5125                    if (Out[0] != pszUnicode[codepointCount]){
5126                        log_err("LMBCS->Uni result %lx should have been %lx \n",
5127                            Out[0], pszUnicode[codepointCount]);
5128                    }
5129 
5130                    pOut = Out; /* reset for accumulating next code point */
5131                    codepointCount++;
5132                }
5133            }
5134            else
5135            {
5136                log_err("Unexpected Error on toUnicode: %s\n", u_errorName(errorCode));
5137            }
5138        }
5139        {
5140          /* limits & surrogate error testing */
5141          char LIn [sizeof(pszLMBCS)];
5142          const char * pLIn = LIn;
5143 
5144          char LOut [sizeof(pszLMBCS)];
5145          char * pLOut = LOut;
5146 
5147          UChar UOut [sizeof(pszUnicode)];
5148          UChar * pUOut = UOut;
5149 
5150          UChar UIn [sizeof(pszUnicode)];
5151          const UChar * pUIn = UIn;
5152 
5153          int32_t off [sizeof(offsets)];
5154          UChar32 uniChar;
5155 
5156          errorCode=U_ZERO_ERROR;
5157 
5158          /* negative source request should always return U_ILLEGAL_ARGUMENT_ERROR */
5159          pUIn++;
5160          ucnv_fromUnicode(cnv, &pLOut, pLOut+1, &pUIn, pUIn-1, off, FALSE, &errorCode);
5161          if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
5162          {
5163             log_err("Unexpected Error on negative source request to ucnv_fromUnicode: %s\n", u_errorName(errorCode));
5164          }
5165          pUIn--;
5166 
5167          errorCode=U_ZERO_ERROR;
5168          ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)(pLIn-1),off,FALSE, &errorCode);
5169          if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
5170          {
5171             log_err("Unexpected Error on negative source request to ucnv_toUnicode: %s\n", u_errorName(errorCode));
5172          }
5173          errorCode=U_ZERO_ERROR;
5174 
5175          uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)(pLIn-1), &errorCode);
5176          if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
5177          {
5178             log_err("Unexpected Error on negative source request to ucnv_getNextUChar: %s\n", u_errorName(errorCode));
5179          }
5180          errorCode=U_ZERO_ERROR;
5181 
5182          /* 0 byte source request - no error, no pointer movement */
5183          ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)pLIn,off,FALSE, &errorCode);
5184          ucnv_fromUnicode(cnv, &pLOut,pLOut+1,&pUIn,pUIn,off,FALSE, &errorCode);
5185          if(U_FAILURE(errorCode)) {
5186             log_err("0 byte source request: unexpected error: %s\n", u_errorName(errorCode));
5187          }
5188          if ((pUOut != UOut) || (pUIn != UIn) || (pLOut != LOut) || (pLIn != LIn))
5189          {
5190               log_err("Unexpected pointer move in 0 byte source request \n");
5191          }
5192          /*0 byte source request - GetNextUChar : error & value == fffe or ffff */
5193          uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)pLIn, &errorCode);
5194          if (errorCode != U_INDEX_OUTOFBOUNDS_ERROR)
5195          {
5196             log_err("Unexpected Error on 0-byte source request to ucnv_getnextUChar: %s\n", u_errorName(errorCode));
5197          }
5198          if (((uint32_t)uniChar - 0xfffe) > 1) /* not 0xfffe<=uniChar<=0xffff */
5199          {
5200             log_err("Unexpected value on 0-byte source request to ucnv_getnextUChar \n");
5201          }
5202          errorCode = U_ZERO_ERROR;
5203 
5204          /* running out of target room : U_BUFFER_OVERFLOW_ERROR */
5205 
5206          pUIn = pszUnicode;
5207          ucnv_fromUnicode(cnv, &pLOut,pLOut+offsets[4],&pUIn,pUIn+UPRV_LENGTHOF(pszUnicode),off,FALSE, &errorCode);
5208          if (errorCode != U_BUFFER_OVERFLOW_ERROR || pLOut != LOut + offsets[4] || pUIn != pszUnicode+4 )
5209          {
5210             log_err("Unexpected results on out of target room to ucnv_fromUnicode\n");
5211          }
5212 
5213          errorCode = U_ZERO_ERROR;
5214 
5215          pLIn = (const char *)pszLMBCS;
5216          ucnv_toUnicode(cnv, &pUOut,pUOut+4,&pLIn,(pLIn+sizeof(pszLMBCS)),off,FALSE, &errorCode);
5217          if (errorCode != U_BUFFER_OVERFLOW_ERROR || pUOut != UOut + 4 || pLIn != (const char *)pszLMBCS+offsets[4])
5218          {
5219             log_err("Unexpected results on out of target room to ucnv_toUnicode\n");
5220          }
5221 
5222          /* unpaired or chopped LMBCS surrogates */
5223 
5224          /* OK high surrogate, Low surrogate is chopped */
5225          LIn [0] = (char)0x14;
5226          LIn [1] = (char)0xD8;
5227          LIn [2] = (char)0x01;
5228          LIn [3] = (char)0x14;
5229          LIn [4] = (char)0xDC;
5230          pLIn = LIn;
5231          errorCode = U_ZERO_ERROR;
5232          pUOut = UOut;
5233 
5234          ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
5235          ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
5236          if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5)
5237          {
5238             log_err("Unexpected results on chopped low surrogate\n");
5239          }
5240 
5241          /* chopped at surrogate boundary */
5242          LIn [0] = (char)0x14;
5243          LIn [1] = (char)0xD8;
5244          LIn [2] = (char)0x01;
5245          pLIn = LIn;
5246          errorCode = U_ZERO_ERROR;
5247          pUOut = UOut;
5248 
5249          ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+3),off,TRUE, &errorCode);
5250          if (UOut[0] != 0xD801 || U_FAILURE(errorCode) || pUOut != UOut + 1 || pLIn != LIn + 3)
5251          {
5252             log_err("Unexpected results on chopped at surrogate boundary \n");
5253          }
5254 
5255          /* unpaired surrogate plus valid Unichar */
5256          LIn [0] = (char)0x14;
5257          LIn [1] = (char)0xD8;
5258          LIn [2] = (char)0x01;
5259          LIn [3] = (char)0x14;
5260          LIn [4] = (char)0xC9;
5261          LIn [5] = (char)0xD0;
5262          pLIn = LIn;
5263          errorCode = U_ZERO_ERROR;
5264          pUOut = UOut;
5265 
5266          ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+6),off,TRUE, &errorCode);
5267          if (UOut[0] != 0xD801 || UOut[1] != 0xC9D0 || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 6)
5268          {
5269             log_err("Unexpected results after unpaired surrogate plus valid Unichar \n");
5270          }
5271 
5272       /* unpaired surrogate plus chopped Unichar */
5273          LIn [0] = (char)0x14;
5274          LIn [1] = (char)0xD8;
5275          LIn [2] = (char)0x01;
5276          LIn [3] = (char)0x14;
5277          LIn [4] = (char)0xC9;
5278 
5279          pLIn = LIn;
5280          errorCode = U_ZERO_ERROR;
5281          pUOut = UOut;
5282 
5283          ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
5284          if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5)
5285          {
5286             log_err("Unexpected results after unpaired surrogate plus chopped Unichar \n");
5287          }
5288 
5289          /* unpaired surrogate plus valid non-Unichar */
5290          LIn [0] = (char)0x14;
5291          LIn [1] = (char)0xD8;
5292          LIn [2] = (char)0x01;
5293          LIn [3] = (char)0x0F;
5294          LIn [4] = (char)0x3B;
5295 
5296          pLIn = LIn;
5297          errorCode = U_ZERO_ERROR;
5298          pUOut = UOut;
5299 
5300          ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
5301          if (UOut[0] != 0xD801 || UOut[1] != 0x1B || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 5)
5302          {
5303             log_err("Unexpected results after unpaired surrogate plus valid non-Unichar\n");
5304          }
5305 
5306          /* unpaired surrogate plus chopped non-Unichar */
5307          LIn [0] = (char)0x14;
5308          LIn [1] = (char)0xD8;
5309          LIn [2] = (char)0x01;
5310          LIn [3] = (char)0x0F;
5311 
5312          pLIn = LIn;
5313          errorCode = U_ZERO_ERROR;
5314          pUOut = UOut;
5315 
5316          ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+4),off,TRUE, &errorCode);
5317 
5318          if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 4)
5319          {
5320             log_err("Unexpected results after unpaired surrogate plus chopped non-Unichar\n");
5321          }
5322        }
5323     }
5324    ucnv_close(cnv);  /* final cleanup */
5325 }
5326 
5327 
TestJitterbug255()5328 static void TestJitterbug255()
5329 {
5330     static const uint8_t testBytes[] = { 0x95, 0xcf, 0x8a, 0xb7, 0x0d, 0x0a, 0x00 };
5331     const char *testBuffer = (const char *)testBytes;
5332     const char *testEnd = (const char *)testBytes + sizeof(testBytes);
5333     UErrorCode status = U_ZERO_ERROR;
5334     /*UChar32 result;*/
5335     UConverter *cnv = 0;
5336 
5337     cnv = ucnv_open("shift-jis", &status);
5338     if (U_FAILURE(status) || cnv == 0) {
5339         log_data_err("Failed to open the converter for SJIS.\n");
5340                 return;
5341     }
5342     while (testBuffer != testEnd)
5343     {
5344         /*result = */ucnv_getNextUChar (cnv, &testBuffer, testEnd , &status);
5345         if (U_FAILURE(status))
5346         {
5347             log_err("Failed to convert the next UChar for SJIS.\n");
5348             break;
5349         }
5350     }
5351     ucnv_close(cnv);
5352 }
5353 
TestEBCDICUS4XML()5354 static void TestEBCDICUS4XML()
5355 {
5356     UChar unicodes_x[] = {0x0000, 0x0000, 0x0000, 0x0000};
5357     static const UChar toUnicodeMaps_x[] = {0x000A, 0x000A, 0x000D, 0x0000};
5358     static const char fromUnicodeMaps_x[] = {0x25, 0x25, 0x0D, 0x00};
5359     static const char newLines_x[] = {0x25, 0x15, 0x0D, 0x00};
5360     char target_x[] = {0x00, 0x00, 0x00, 0x00};
5361     UChar *unicodes = unicodes_x;
5362     const UChar *toUnicodeMaps = toUnicodeMaps_x;
5363     char *target = target_x;
5364     const char* fromUnicodeMaps = fromUnicodeMaps_x, *newLines = newLines_x;
5365     UErrorCode status = U_ZERO_ERROR;
5366     UConverter *cnv = 0;
5367 
5368     cnv = ucnv_open("ebcdic-xml-us", &status);
5369     if (U_FAILURE(status) || cnv == 0) {
5370         log_data_err("Failed to open the converter for EBCDIC-XML-US.\n");
5371         return;
5372     }
5373     ucnv_toUnicode(cnv, &unicodes, unicodes+3, (const char**)&newLines, newLines+3, NULL, TRUE, &status);
5374     if (U_FAILURE(status) || memcmp(unicodes_x, toUnicodeMaps, sizeof(UChar)*3) != 0) {
5375         log_err("To Unicode conversion failed in EBCDICUS4XML test. %s\n",
5376             u_errorName(status));
5377         printUSeqErr(unicodes_x, 3);
5378         printUSeqErr(toUnicodeMaps, 3);
5379     }
5380     status = U_ZERO_ERROR;
5381     ucnv_fromUnicode(cnv, &target, target+3, (const UChar**)&toUnicodeMaps, toUnicodeMaps+3, NULL, TRUE, &status);
5382     if (U_FAILURE(status) || memcmp(target_x, fromUnicodeMaps, sizeof(char)*3) != 0) {
5383         log_err("From Unicode conversion failed in EBCDICUS4XML test. %s\n",
5384             u_errorName(status));
5385         printSeqErr((const unsigned char*)target_x, 3);
5386         printSeqErr((const unsigned char*)fromUnicodeMaps, 3);
5387     }
5388     ucnv_close(cnv);
5389 }
5390 #endif /* #if !UCONFIG_NO_LEGACY_COLLATION */
5391 
5392 #if !UCONFIG_NO_COLLATION
5393 
TestJitterbug981()5394 static void TestJitterbug981(){
5395     const UChar* rules;
5396     int32_t rules_length, target_cap, bytes_needed, buff_size;
5397     UErrorCode status = U_ZERO_ERROR;
5398     UConverter *utf8cnv;
5399     UCollator* myCollator;
5400     char *buff;
5401     int numNeeded=0;
5402     utf8cnv = ucnv_open ("utf8", &status);
5403     if(U_FAILURE(status)){
5404         log_err("Could not open UTF-8 converter. Error: %s\n", u_errorName(status));
5405         return;
5406     }
5407     myCollator = ucol_open("zh", &status);
5408     if(U_FAILURE(status)){
5409         log_data_err("Could not open collator for zh locale. Error: %s\n", u_errorName(status));
5410         ucnv_close(utf8cnv);
5411         return;
5412     }
5413 
5414     rules = ucol_getRules(myCollator, &rules_length);
5415     if(rules_length == 0) {
5416         log_data_err("missing zh tailoring rule string\n");
5417         ucol_close(myCollator);
5418         ucnv_close(utf8cnv);
5419         return;
5420     }
5421     buff_size = rules_length * ucnv_getMaxCharSize(utf8cnv);
5422     buff = malloc(buff_size);
5423 
5424     target_cap = 0;
5425     do {
5426         ucnv_reset(utf8cnv);
5427         status = U_ZERO_ERROR;
5428         if(target_cap >= buff_size) {
5429             log_err("wanted %d bytes, only %d available\n", target_cap, buff_size);
5430             break;
5431         }
5432         bytes_needed = ucnv_fromUChars(utf8cnv, buff, target_cap,
5433             rules, rules_length, &status);
5434         target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1;
5435         if(numNeeded!=0 && numNeeded!= bytes_needed){
5436             log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
5437             break;
5438         }
5439         numNeeded = bytes_needed;
5440     } while (status == U_BUFFER_OVERFLOW_ERROR);
5441     ucol_close(myCollator);
5442     ucnv_close(utf8cnv);
5443     free(buff);
5444 }
5445 
5446 #endif
5447 
5448 #if !UCONFIG_NO_LEGACY_CONVERSION
TestJitterbug1293()5449 static void TestJitterbug1293(){
5450     static const UChar src[] = {0x30DE, 0x30A4, 0x5E83, 0x544A, 0x30BF, 0x30A4, 0x30D7,0x000};
5451     char target[256];
5452     UErrorCode status = U_ZERO_ERROR;
5453     UConverter* conv=NULL;
5454     int32_t target_cap, bytes_needed, numNeeded = 0;
5455     conv = ucnv_open("shift-jis",&status);
5456     if(U_FAILURE(status)){
5457       log_data_err("Could not open Shift-Jis converter. Error: %s", u_errorName(status));
5458       return;
5459     }
5460 
5461     do{
5462         target_cap =0;
5463         bytes_needed = ucnv_fromUChars(conv,target,256,src,u_strlen(src),&status);
5464         target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1;
5465         if(numNeeded!=0 && numNeeded!= bytes_needed){
5466           log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
5467         }
5468         numNeeded = bytes_needed;
5469     } while (status == U_BUFFER_OVERFLOW_ERROR);
5470     if(U_FAILURE(status)){
5471       log_err("An error occured in ucnv_fromUChars. Error: %s", u_errorName(status));
5472       return;
5473     }
5474     ucnv_close(conv);
5475 }
5476 #endif
5477 
TestJB5275_1()5478 static void TestJB5275_1(){
5479 
5480     static const char* data = "\x3B\xB3\x0A" /* Easy characters */
5481                                 "\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test */
5482                                 /* Switch script: */
5483                                 "\xEF\x43\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Bengali test */
5484                                 "\x3B\xB3\x0A" /* Easy characters - new line, so should default!*/
5485                                 "\xEF\x40\x3B\xB3\x0A";
5486     static const UChar expected[] ={
5487             0x003b, 0x0a15, 0x000a, /* Easy characters */
5488             0x0a5c, 0x0a4d, 0x0a39, 0x0a5c, 0x0a4d, 0x0a39, 0x000a, /* Gurmukhi test */
5489             0x09dd, 0x09dc, 0x09cd, 0x09b9, 0x000a, /* Switch script: to Bengali*/
5490             0x003b, 0x0a15, 0x000a, /* Easy characters - new line, so should default!*/
5491             0x003b, 0x0a15, 0x000a /* Back to Gurmukhi*/
5492     };
5493 
5494     UErrorCode status = U_ZERO_ERROR;
5495     UConverter* conv = ucnv_open("iscii-gur", &status);
5496     UChar dest[100] = {'\0'};
5497     UChar* target = dest;
5498     UChar* targetLimit = dest+100;
5499     const char* source = data;
5500     const char* sourceLimit = data+strlen(data);
5501     const UChar* exp = expected;
5502 
5503     if (U_FAILURE(status)) {
5504         log_data_err("Unable to open converter: iscii-gur got errorCode: %s\n", u_errorName(status));
5505         return;
5506     }
5507 
5508     log_verbose("Testing switching back to default script when new line is encountered.\n");
5509     ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &status);
5510     if(U_FAILURE(status)){
5511         log_err("conversion failed: %s \n", u_errorName(status));
5512     }
5513     targetLimit = target;
5514     target = dest;
5515     printUSeq(target, targetLimit-target);
5516     while(target<targetLimit){
5517         if(*exp!=*target){
5518             log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp, *target);
5519         }
5520         target++;
5521         exp++;
5522     }
5523     ucnv_close(conv);
5524 }
5525 
TestJB5275()5526 static void TestJB5275(){
5527     static const char* data =
5528     /* "\xEF\x42\xEF\x41\xA4\xD5\xE5\xB3\xEA\x0A"  unsupported sequence \xEF\x41 */
5529     /* "\xEF\x42\xEF\x41\xD4\xDA\xB3\xE8\xEA\x0A"  unsupported sequence \xEF\x41  */
5530     /* "\xEF\x44\xEF\x41\xC8\xE1\x8B\xDB\xB3\xE8 \xB3\xE4\xC1\xE8\x0A"  unsupported sequence \xEF\x41 */
5531         "\xEF\x4B\xC0\xE9\xBF\xE9\xE8\xD8\x0A"  /* Gurmukhi test */
5532         "\xEF\x4A\xC0\xD4\xBF\xD4\xE8\xD8\x0A"  /* Gujarati test */
5533         "\xEF\x48\x38\xB3\x0A"  /* Kannada test */
5534         "\xEF\x49\x39\xB3\x0A"  /* Malayalam test */
5535         "\xEF\x4A\x3A\xB3\x0A"  /* Gujarati test */
5536         "\xEF\x4B\x3B\xB3\x0A"  /* Punjabi test */
5537         /* "\xEF\x4C\x3C\xB3\x0A"  unsupported sequence \xEF\x41 */;
5538     static const UChar expected[] ={
5539         0x0A5C, 0x0A4D, 0x0A39, 0x0A5C, 0x0A4D, 0x0A39, 0x000A, /* Gurmukhi test */
5540         0x0AA2, 0x0AB5, 0x0AA1, 0x0AB5, 0x0ACD, 0x0AB9, 0x000A,     /* Gujarati test */
5541         0x0038, 0x0C95, 0x000A, /* Kannada test */
5542         0x0039, 0x0D15, 0x000A, /* Malayalam test */
5543         0x003A, 0x0A95, 0x000A, /* Gujarati test */
5544         0x003B, 0x0A15, 0x000A, /* Punjabi test */
5545     };
5546 
5547     UErrorCode status = U_ZERO_ERROR;
5548     UConverter* conv = ucnv_open("iscii", &status);
5549     UChar dest[100] = {'\0'};
5550     UChar* target = dest;
5551     UChar* targetLimit = dest+100;
5552     const char* source = data;
5553     const char* sourceLimit = data+strlen(data);
5554     const UChar* exp = expected;
5555     ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &status);
5556     if(U_FAILURE(status)){
5557         log_data_err("conversion failed: %s \n", u_errorName(status));
5558     }
5559     targetLimit = target;
5560     target = dest;
5561 
5562     printUSeq(target, targetLimit-target);
5563 
5564     while(target<targetLimit){
5565         if(*exp!=*target){
5566             log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp, *target);
5567         }
5568         target++;
5569         exp++;
5570     }
5571     ucnv_close(conv);
5572 }
5573 
5574 static void
TestIsFixedWidth()5575 TestIsFixedWidth() {
5576     UErrorCode status = U_ZERO_ERROR;
5577     UConverter *cnv = NULL;
5578     int32_t i;
5579 
5580     const char *fixedWidth[] = {
5581             "US-ASCII",
5582             "UTF32",
5583             "ibm-5478_P100-1995"
5584     };
5585 
5586     const char *notFixedWidth[] = {
5587             "GB18030",
5588             "UTF8",
5589             "windows-949-2000",
5590             "UTF16"
5591     };
5592 
5593     for (i = 0; i < UPRV_LENGTHOF(fixedWidth); i++) {
5594         cnv = ucnv_open(fixedWidth[i], &status);
5595         if (cnv == NULL || U_FAILURE(status)) {
5596             log_data_err("Error open converter: %s - %s \n", fixedWidth[i], u_errorName(status));
5597             continue;
5598         }
5599 
5600         if (!ucnv_isFixedWidth(cnv, &status)) {
5601             log_err("%s is a fixedWidth converter but returned FALSE.\n", fixedWidth[i]);
5602         }
5603         ucnv_close(cnv);
5604     }
5605 
5606     for (i = 0; i < UPRV_LENGTHOF(notFixedWidth); i++) {
5607         cnv = ucnv_open(notFixedWidth[i], &status);
5608         if (cnv == NULL || U_FAILURE(status)) {
5609             log_data_err("Error open converter: %s - %s \n", notFixedWidth[i], u_errorName(status));
5610             continue;
5611         }
5612 
5613         if (ucnv_isFixedWidth(cnv, &status)) {
5614             log_err("%s is NOT a fixedWidth converter but returned TRUE.\n", notFixedWidth[i]);
5615         }
5616         ucnv_close(cnv);
5617     }
5618 }
5619